Blame - fs/bio.c - SHIFTPHONES/kernel/shift/mainline

blob: 40aa96eae99ffd7ddce088ccb875b16e0b0cfcdb [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Jens Axboe	0fe2347	2006-09-04 15:41:16 +0200	[diff] [blame]	2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	22	#include <linux/iocontext.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23	#include <linux/slab.h>
				24	#include <linux/init.h>
				25	#include <linux/kernel.h>
Paul Gortmaker	630d9c4	2011-11-16 23:57:37 -0500	[diff] [blame]	26	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include <linux/mempool.h>
				28	#include <linux/workqueue.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	29	#include <linux/cgroup.h>
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	30	#include <scsi/sg.h> /* for struct sg_iovec */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31
Li Zefan	5578213	2009-06-09 13:43:05 +0800	[diff] [blame]	32	#include <trace/events/block.h>
Ingo Molnar	0bfc245	2008-11-26 11:59:56 +0100	[diff] [blame]	33
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	34	/*
				35	* Test patch to inline a certain number of bi_io_vec's inside the bio
				36	* itself, to shrink a bio data allocation from two mempool calls to one
				37	*/
				38	#define BIO_INLINE_VECS 4
				39
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	40	static mempool_t *bio_split_pool __read_mostly;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	42	/*
				43	* if you change this list, also change bvec_alloc or things will
				44	* break badly! cannot be bigger than what you can fit into an
				45	* unsigned short
				46	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	47	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
Martin K. Petersen	df67714	2011-03-08 08:28:01 +0100	[diff] [blame]	48	static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	49	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				50	};
				51	#undef BV
				52
				53	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	54	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				55	* IO code that does not need private memory pools.
				56	*/
Martin K. Petersen	51d654e	2008-06-17 18:59:56 +0200	[diff] [blame]	57	struct bio_set *fs_bio_set;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	58	EXPORT_SYMBOL(fs_bio_set);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	59
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	60	/*
				61	* Our slab pool management
				62	*/
				63	struct bio_slab {
				64	struct kmem_cache *slab;
				65	unsigned int slab_ref;
				66	unsigned int slab_size;
				67	char name[8];
				68	};
				69	static DEFINE_MUTEX(bio_slab_lock);
				70	static struct bio_slab *bio_slabs;
				71	static unsigned int bio_slab_nr, bio_slab_max;
				72
				73	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				74	{
				75	unsigned int sz = sizeof(struct bio) + extra_size;
				76	struct kmem_cache *slab = NULL;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	77	struct bio_slab bslab, new_bio_slabs;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	78	unsigned int new_bio_slab_max;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	79	unsigned int i, entry = -1;
				80
				81	mutex_lock(&bio_slab_lock);
				82
				83	i = 0;
				84	while (i < bio_slab_nr) {
Thiago Farina	f06f135	2010-01-19 14:07:09 +0100	[diff] [blame]	85	bslab = &bio_slabs[i];
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	86
				87	if (!bslab->slab && entry == -1)
				88	entry = i;
				89	else if (bslab->slab_size == sz) {
				90	slab = bslab->slab;
				91	bslab->slab_ref++;
				92	break;
				93	}
				94	i++;
				95	}
				96
				97	if (slab)
				98	goto out_unlock;
				99
				100	if (bio_slab_nr == bio_slab_max && entry == -1) {
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	101	new_bio_slab_max = bio_slab_max << 1;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	102	new_bio_slabs = krealloc(bio_slabs,
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	103	new_bio_slab_max * sizeof(struct bio_slab),
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	104	GFP_KERNEL);
				105	if (!new_bio_slabs)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	106	goto out_unlock;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	107	bio_slab_max = new_bio_slab_max;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	108	bio_slabs = new_bio_slabs;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	109	}
				110	if (entry == -1)
				111	entry = bio_slab_nr++;
				112
				113	bslab = &bio_slabs[entry];
				114
				115	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
				116	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
				117	if (!slab)
				118	goto out_unlock;
				119
Mandeep Singh Baines	80cdc6d	2011-03-22 16:33:54 -0700	[diff] [blame]	120	printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	121	bslab->slab = slab;
				122	bslab->slab_ref = 1;
				123	bslab->slab_size = sz;
				124	out_unlock:
				125	mutex_unlock(&bio_slab_lock);
				126	return slab;
				127	}
				128
				129	static void bio_put_slab(struct bio_set *bs)
				130	{
				131	struct bio_slab *bslab = NULL;
				132	unsigned int i;
				133
				134	mutex_lock(&bio_slab_lock);
				135
				136	for (i = 0; i < bio_slab_nr; i++) {
				137	if (bs->bio_slab == bio_slabs[i].slab) {
				138	bslab = &bio_slabs[i];
				139	break;
				140	}
				141	}
				142
				143	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				144	goto out;
				145
				146	WARN_ON(!bslab->slab_ref);
				147
				148	if (--bslab->slab_ref)
				149	goto out;
				150
				151	kmem_cache_destroy(bslab->slab);
				152	bslab->slab = NULL;
				153
				154	out:
				155	mutex_unlock(&bio_slab_lock);
				156	}
				157
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	158	unsigned int bvec_nr_vecs(unsigned short idx)
				159	{
				160	return bvec_slabs[idx].nr_vecs;
				161	}
				162
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	163	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	164	{
				165	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
				166
				167	if (idx == BIOVEC_MAX_IDX)
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	168	mempool_free(bv, pool);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	169	else {
				170	struct biovec_slab *bvs = bvec_slabs + idx;
				171
				172	kmem_cache_free(bvs->slab, bv);
				173	}
				174	}
				175
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	176	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				177	mempool_t *pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	178	{
				179	struct bio_vec *bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	180
				181	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	182	* see comment near bvec_array define!
				183	*/
				184	switch (nr) {
				185	case 1:
				186	*idx = 0;
				187	break;
				188	case 2 ... 4:
				189	*idx = 1;
				190	break;
				191	case 5 ... 16:
				192	*idx = 2;
				193	break;
				194	case 17 ... 64:
				195	*idx = 3;
				196	break;
				197	case 65 ... 128:
				198	*idx = 4;
				199	break;
				200	case 129 ... BIO_MAX_PAGES:
				201	*idx = 5;
				202	break;
				203	default:
				204	return NULL;
				205	}
				206
				207	/*
				208	* idx now points to the pool we want to allocate from. only the
				209	* 1-vec entry pool is mempool backed.
				210	*/
				211	if (*idx == BIOVEC_MAX_IDX) {
				212	fallback:
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	213	bvl = mempool_alloc(pool, gfp_mask);
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	214	} else {
				215	struct biovec_slab bvs = bvec_slabs + idx;
				216	gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT \| __GFP_IO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	217
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	218	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	219	* Make this allocation restricted and don't dump info on
				220	* allocation failures, since we'll fallback to the mempool
				221	* in case of failure.
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	222	*/
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	223	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				224
				225	/*
				226	* Try a slab allocation. If this fails and __GFP_WAIT
				227	* is set, retry with the 1-entry mempool
				228	*/
				229	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				230	if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
				231	*idx = BIOVEC_MAX_IDX;
				232	goto fallback;
				233	}
				234	}
				235
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	236	return bvl;
				237	}
				238
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	239	static void __bio_free(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	240	{
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	241	bio_disassociate_task(bio);
Jens Axboe	992c5dd	2007-07-18 13:18:08 +0200	[diff] [blame]	242
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	243	if (bio_integrity(bio))
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	244	bio_integrity_free(bio);
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	245	}
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	246
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	247	static void bio_free(struct bio *bio)
				248	{
				249	struct bio_set *bs = bio->bi_pool;
				250	void *p;
				251
				252	__bio_free(bio);
				253
				254	if (bs) {
				255	if (bio_has_allocated_vec(bio))
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	256	bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	257
				258	/*
				259	* If we have front padding, adjust the bio pointer before freeing
				260	*/
				261	p = bio;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	262	p -= bs->front_pad;
				263
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	264	mempool_free(p, bs->bio_pool);
				265	} else {
				266	/* Bio was allocated by bio_kmalloc() */
				267	kfree(bio);
				268	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	269	}
				270
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	271	void bio_init(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	272	{
Jens Axboe	2b94de5	2007-07-18 13:14:03 +0200	[diff] [blame]	273	memset(bio, 0, sizeof(*bio));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	274	bio->bi_flags = 1 << BIO_UPTODATE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	atomic_set(&bio->bi_cnt, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	276	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	277	EXPORT_SYMBOL(bio_init);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	278
				279	/**
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	280	* bio_reset - reinitialize a bio
				281	* @bio: bio to reset
				282	*
				283	* Description:
				284	* After calling bio_reset(), @bio will be in the same state as a freshly
				285	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				286	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				287	* comment in struct bio.
				288	*/
				289	void bio_reset(struct bio *bio)
				290	{
				291	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				292
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	293	__bio_free(bio);
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	294
				295	memset(bio, 0, BIO_RESET_BYTES);
				296	bio->bi_flags = flags\|(1 << BIO_UPTODATE);
				297	}
				298	EXPORT_SYMBOL(bio_reset);
				299
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	300	static void bio_alloc_rescue(struct work_struct *work)
				301	{
				302	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				303	struct bio *bio;
				304
				305	while (1) {
				306	spin_lock(&bs->rescue_lock);
				307	bio = bio_list_pop(&bs->rescue_list);
				308	spin_unlock(&bs->rescue_lock);
				309
				310	if (!bio)
				311	break;
				312
				313	generic_make_request(bio);
				314	}
				315	}
				316
				317	static void punt_bios_to_rescuer(struct bio_set *bs)
				318	{
				319	struct bio_list punt, nopunt;
				320	struct bio *bio;
				321
				322	/*
				323	* In order to guarantee forward progress we must punt only bios that
				324	* were allocated from this bio_set; otherwise, if there was a bio on
				325	* there for a stacking driver higher up in the stack, processing it
				326	* could require allocating bios from this bio_set, and doing that from
				327	* our own rescuer would be bad.
				328	*
				329	* Since bio lists are singly linked, pop them all instead of trying to
				330	* remove from the middle of the list:
				331	*/
				332
				333	bio_list_init(&punt);
				334	bio_list_init(&nopunt);
				335
				336	while ((bio = bio_list_pop(current->bio_list)))
				337	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				338
				339	*current->bio_list = nopunt;
				340
				341	spin_lock(&bs->rescue_lock);
				342	bio_list_merge(&bs->rescue_list, &punt);
				343	spin_unlock(&bs->rescue_lock);
				344
				345	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				346	}
				347
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	348	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	349	* bio_alloc_bioset - allocate a bio for I/O
				350	* @gfp_mask: the GFP_ mask given to the slab allocator
				351	* @nr_iovecs: number of iovecs to pre-allocate
Jaak Ristioja	db18efa	2010-01-15 12:05:07 +0200	[diff] [blame]	352	* @bs: the bio_set to allocate from.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	353	*
				354	* Description:
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	355	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				356	* backed by the @bs's mempool.
				357	*
				358	* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
				359	* able to allocate a bio. This is due to the mempool guarantees. To make this
				360	* work, callers must never allocate more than 1 bio at a time from this pool.
				361	* Callers that need to allocate more than 1 bio must always submit the
				362	* previously allocated bio for IO before attempting to allocate a new one.
				363	* Failure to do so can cause deadlocks under memory pressure.
				364	*
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	365	* Note that when running under generic_make_request() (i.e. any block
				366	* driver), bios are not submitted until after you return - see the code in
				367	* generic_make_request() that converts recursion into iteration, to prevent
				368	* stack overflows.
				369	*
				370	* This would normally mean allocating multiple bios under
				371	* generic_make_request() would be susceptible to deadlocks, but we have
				372	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				373	* thread.
				374	*
				375	* However, we do not guarantee forward progress for allocations from other
				376	* mempools. Doing multiple allocations from the same mempool under
				377	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				378	* for per bio allocations.
				379	*
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	380	* RETURNS:
				381	* Pointer to new bio on success, NULL on failure.
				382	*/
Al Viro	dd0fc66	2005-10-07 07:46:04 +0100	[diff] [blame]	383	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	384	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	385	gfp_t saved_gfp = gfp_mask;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	386	unsigned front_pad;
				387	unsigned inline_vecs;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	388	unsigned long idx = BIO_POOL_NONE;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	389	struct bio_vec *bvl = NULL;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	390	struct bio *bio;
				391	void *p;
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	392
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	393	if (!bs) {
				394	if (nr_iovecs > UIO_MAXIOV)
				395	return NULL;
				396
				397	p = kmalloc(sizeof(struct bio) +
				398	nr_iovecs * sizeof(struct bio_vec),
				399	gfp_mask);
				400	front_pad = 0;
				401	inline_vecs = nr_iovecs;
				402	} else {
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	403	/*
				404	* generic_make_request() converts recursion to iteration; this
				405	* means if we're running beneath it, any bios we allocate and
				406	* submit will not be submitted (and thus freed) until after we
				407	* return.
				408	*
				409	* This exposes us to a potential deadlock if we allocate
				410	* multiple bios from the same bio_set() while running
				411	* underneath generic_make_request(). If we were to allocate
				412	* multiple bios (say a stacking block driver that was splitting
				413	* bios), we would deadlock if we exhausted the mempool's
				414	* reserve.
				415	*
				416	* We solve this, and guarantee forward progress, with a rescuer
				417	* workqueue per bio_set. If we go to allocate and there are
				418	* bios on current->bio_list, we first try the allocation
				419	* without __GFP_WAIT; if that fails, we punt those bios we
				420	* would be blocking to the rescuer workqueue before we retry
				421	* with the original gfp_flags.
				422	*/
				423
				424	if (current->bio_list && !bio_list_empty(current->bio_list))
				425	gfp_mask &= ~__GFP_WAIT;
				426
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	427	p = mempool_alloc(bs->bio_pool, gfp_mask);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	428	if (!p && gfp_mask != saved_gfp) {
				429	punt_bios_to_rescuer(bs);
				430	gfp_mask = saved_gfp;
				431	p = mempool_alloc(bs->bio_pool, gfp_mask);
				432	}
				433
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	434	front_pad = bs->front_pad;
				435	inline_vecs = BIO_INLINE_VECS;
				436	}
				437
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	438	if (unlikely(!p))
				439	return NULL;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	440
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	441	bio = p + front_pad;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	442	bio_init(bio);
				443
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	444	if (nr_iovecs > inline_vecs) {
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	445	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	446	if (!bvl && gfp_mask != saved_gfp) {
				447	punt_bios_to_rescuer(bs);
				448	gfp_mask = saved_gfp;
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	449	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	450	}
				451
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	452	if (unlikely(!bvl))
				453	goto err_free;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	454	} else if (nr_iovecs) {
				455	bvl = bio->bi_inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	456	}
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	457
				458	bio->bi_pool = bs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	459	bio->bi_flags \|= idx << BIO_POOL_OFFSET;
				460	bio->bi_max_vecs = nr_iovecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	461	bio->bi_io_vec = bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	462	return bio;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	463
				464	err_free:
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	465	mempool_free(p, bs->bio_pool);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	466	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	467	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	468	EXPORT_SYMBOL(bio_alloc_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	469
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	470	void zero_fill_bio(struct bio *bio)
				471	{
				472	unsigned long flags;
				473	struct bio_vec *bv;
				474	int i;
				475
				476	bio_for_each_segment(bv, bio, i) {
				477	char *data = bvec_kmap_irq(bv, &flags);
				478	memset(data, 0, bv->bv_len);
				479	flush_dcache_page(bv->bv_page);
				480	bvec_kunmap_irq(data, &flags);
				481	}
				482	}
				483	EXPORT_SYMBOL(zero_fill_bio);
				484
				485	/**
				486	* bio_put - release a reference to a bio
				487	* @bio: bio to release reference to
				488	*
				489	* Description:
				490	* Put a reference to a &struct bio, either one you have gotten with
Alberto Bertogli	ad0bf11	2009-11-02 11:39:22 +0100	[diff] [blame]	491	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	492	**/
				493	void bio_put(struct bio *bio)
				494	{
				495	BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
				496
				497	/*
				498	* last put frees it
				499	*/
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	500	if (atomic_dec_and_test(&bio->bi_cnt))
				501	bio_free(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	502	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	503	EXPORT_SYMBOL(bio_put);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	504
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	505	inline int bio_phys_segments(struct request_queue q, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	506	{
				507	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				508	blk_recount_segments(q, bio);
				509
				510	return bio->bi_phys_segments;
				511	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	512	EXPORT_SYMBOL(bio_phys_segments);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	513
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	514	/**
				515	* __bio_clone - clone a bio
				516	* @bio: destination bio
				517	* @bio_src: bio to clone
				518	*
				519	* Clone a &bio. Caller will own the returned bio, but not
				520	* the actual data it points to. Reference count of returned
				521	* bio will be one.
				522	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	523	void __bio_clone(struct bio bio, struct bio bio_src)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	{
Andrew Morton	e525e15	2005-08-07 09:42:12 -0700	[diff] [blame]	525	memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
				526	bio_src->bi_max_vecs * sizeof(struct bio_vec));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	527
Jens Axboe	5d84070	2008-01-25 12:44:44 +0100	[diff] [blame]	528	/*
				529	* most users will be overriding ->bi_bdev with a new target,
				530	* so we don't set nor calculate new physical/hw segment counts here
				531	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	532	bio->bi_sector = bio_src->bi_sector;
				533	bio->bi_bdev = bio_src->bi_bdev;
				534	bio->bi_flags \|= 1 << BIO_CLONED;
				535	bio->bi_rw = bio_src->bi_rw;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	536	bio->bi_vcnt = bio_src->bi_vcnt;
				537	bio->bi_size = bio_src->bi_size;
Andrew Morton	a5453be	2005-07-28 01:07:18 -0700	[diff] [blame]	538	bio->bi_idx = bio_src->bi_idx;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	539	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	540	EXPORT_SYMBOL(__bio_clone);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	541
				542	/**
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	543	* bio_clone_bioset - clone a bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	544	* @bio: bio to clone
				545	* @gfp_mask: allocation priority
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	546	* @bs: bio_set to allocate from
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	547	*
				548	* Like __bio_clone, only also allocates the returned bio
				549	*/
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	550	struct bio bio_clone_bioset(struct bio bio, gfp_t gfp_mask,
				551	struct bio_set *bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	552	{
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	553	struct bio *b;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	554
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	555	b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	556	if (!b)
				557	return NULL;
				558
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	559	__bio_clone(b, bio);
				560
				561	if (bio_integrity(bio)) {
				562	int ret;
				563
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	564	ret = bio_integrity_clone(b, bio, gfp_mask);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	565
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	566	if (ret < 0) {
				567	bio_put(b);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	568	return NULL;
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	569	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	570	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	571
				572	return b;
				573	}
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	574	EXPORT_SYMBOL(bio_clone_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	575
				576	/**
				577	* bio_get_nr_vecs - return approx number of vecs
				578	* @bdev: I/O target
				579	*
				580	* Return the approximate number of pages we can send to this target.
				581	* There's no guarantee that you will be able to fit this number of pages
				582	* into a bio, it does not account for dynamic restrictions that vary
				583	* on offset.
				584	*/
				585	int bio_get_nr_vecs(struct block_device *bdev)
				586	{
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	587	struct request_queue *q = bdev_get_queue(bdev);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	588	int nr_pages;
				589
				590	nr_pages = min_t(unsigned,
Kent Overstreet	5abebfd	2012-02-08 22:07:18 +0100	[diff] [blame]	591	queue_max_segments(q),
				592	queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	593
				594	return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
				595
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	596	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	597	EXPORT_SYMBOL(bio_get_nr_vecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	598
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	599	static int __bio_add_page(struct request_queue q, struct bio bio, struct page
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	600	*page, unsigned int len, unsigned int offset,
				601	unsigned short max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	602	{
				603	int retried_segments = 0;
				604	struct bio_vec *bvec;
				605
				606	/*
				607	* cloned bio must not modify vec list
				608	*/
				609	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				610	return 0;
				611
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	612	if (((bio->bi_size + len) >> 9) > max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	return 0;
				614
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	615	/*
				616	* For filesystems with a blocksize smaller than the pagesize
				617	* we will often be called with the same page as last time and
				618	* a consecutive offset. Optimize this special case.
				619	*/
				620	if (bio->bi_vcnt > 0) {
				621	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				622
				623	if (page == prev->bv_page &&
				624	offset == prev->bv_offset + prev->bv_len) {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	625	unsigned int prev_bv_len = prev->bv_len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	626	prev->bv_len += len;
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	627
				628	if (q->merge_bvec_fn) {
				629	struct bvec_merge_data bvm = {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	630	/* prev_bvec is already charged in
				631	bi_size, discharge it in order to
				632	simulate merging updated prev_bvec
				633	as new bvec. */
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	634	.bi_bdev = bio->bi_bdev,
				635	.bi_sector = bio->bi_sector,
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	636	.bi_size = bio->bi_size - prev_bv_len,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	637	.bi_rw = bio->bi_rw,
				638	};
				639
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	640	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	641	prev->bv_len -= len;
				642	return 0;
				643	}
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	644	}
				645
				646	goto done;
				647	}
				648	}
				649
				650	if (bio->bi_vcnt >= bio->bi_max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	651	return 0;
				652
				653	/*
				654	* we might lose a segment or two here, but rather that than
				655	* make this too complex.
				656	*/
				657
Martin K. Petersen	8a78362	2010-02-26 00:20:39 -0500	[diff] [blame]	658	while (bio->bi_phys_segments >= queue_max_segments(q)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	659
				660	if (retried_segments)
				661	return 0;
				662
				663	retried_segments = 1;
				664	blk_recount_segments(q, bio);
				665	}
				666
				667	/*
				668	* setup the new entry, we might clear it again later if we
				669	* cannot add the page
				670	*/
				671	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				672	bvec->bv_page = page;
				673	bvec->bv_len = len;
				674	bvec->bv_offset = offset;
				675
				676	/*
				677	* if queue has other restrictions (eg varying max sector size
				678	* depending on offset), it can specify a merge_bvec_fn in the
				679	* queue to get further control
				680	*/
				681	if (q->merge_bvec_fn) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	682	struct bvec_merge_data bvm = {
				683	.bi_bdev = bio->bi_bdev,
				684	.bi_sector = bio->bi_sector,
				685	.bi_size = bio->bi_size,
				686	.bi_rw = bio->bi_rw,
				687	};
				688
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	689	/*
				690	* merge_bvec_fn() returns number of bytes it can accept
				691	* at this offset
				692	*/
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	693	if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	694	bvec->bv_page = NULL;
				695	bvec->bv_len = 0;
				696	bvec->bv_offset = 0;
				697	return 0;
				698	}
				699	}
				700
				701	/* If we may be able to merge these biovecs, force a recount */
Mikulas Patocka	b8b3e16	2008-08-15 10:15:19 +0200	[diff] [blame]	702	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	703	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
				704
				705	bio->bi_vcnt++;
				706	bio->bi_phys_segments++;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	707	done:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	708	bio->bi_size += len;
				709	return len;
				710	}
				711
				712	/**
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	713	* bio_add_pc_page - attempt to add page to bio
Jens Axboe	fddfdea	2006-01-31 15:24:34 +0100	[diff] [blame]	714	* @q: the target queue
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	715	* @bio: destination bio
				716	* @page: page to add
				717	* @len: vec entry length
				718	* @offset: vec entry offset
				719	*
				720	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	721	* number of reasons, such as the bio being full or target block device
				722	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				723	* so it is always possible to add a single page to an empty bio.
				724	*
				725	* This should only be used by REQ_PC bios.
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	726	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	727	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page *page,
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	728	unsigned int len, unsigned int offset)
				729	{
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	730	return __bio_add_page(q, bio, page, len, offset,
				731	queue_max_hw_sectors(q));
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	732	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	733	EXPORT_SYMBOL(bio_add_pc_page);
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	734
				735	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	736	* bio_add_page - attempt to add page to bio
				737	* @bio: destination bio
				738	* @page: page to add
				739	* @len: vec entry length
				740	* @offset: vec entry offset
				741	*
				742	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	743	* number of reasons, such as the bio being full or target block device
				744	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				745	* so it is always possible to add a single page to an empty bio.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	746	*/
				747	int bio_add_page(struct bio bio, struct page page, unsigned int len,
				748	unsigned int offset)
				749	{
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	750	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	751	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	752	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	753	EXPORT_SYMBOL(bio_add_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	754
				755	struct bio_map_data {
				756	struct bio_vec *iovecs;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	757	struct sg_iovec *sgvecs;
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	758	int nr_sgvecs;
				759	int is_our_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	760	};
				761
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	762	static void bio_set_map_data(struct bio_map_data bmd, struct bio bio,
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	763	struct sg_iovec *iov, int iov_count,
				764	int is_our_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	765	{
				766	memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	767	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
				768	bmd->nr_sgvecs = iov_count;
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	769	bmd->is_our_pages = is_our_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	770	bio->bi_private = bmd;
				771	}
				772
				773	static void bio_free_map_data(struct bio_map_data *bmd)
				774	{
				775	kfree(bmd->iovecs);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	776	kfree(bmd->sgvecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	777	kfree(bmd);
				778	}
				779
Dan Carpenter	121f099	2011-11-16 09:21:50 +0100	[diff] [blame]	780	static struct bio_map_data *bio_alloc_map_data(int nr_segs,
				781	unsigned int iov_count,
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	782	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	783	{
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	784	struct bio_map_data *bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	785
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	786	if (iov_count > UIO_MAXIOV)
				787	return NULL;
				788
				789	bmd = kmalloc(sizeof(*bmd), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	790	if (!bmd)
				791	return NULL;
				792
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	793	bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	794	if (!bmd->iovecs) {
				795	kfree(bmd);
				796	return NULL;
				797	}
				798
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	799	bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	800	if (bmd->sgvecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	801	return bmd;
				802
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	803	kfree(bmd->iovecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	804	kfree(bmd);
				805	return NULL;
				806	}
				807
FUJITA Tomonori	aefcc28	2008-08-25 20:36:08 +0200	[diff] [blame]	808	static int __bio_copy_iov(struct bio bio, struct bio_vec iovecs,
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	809	struct sg_iovec *iov, int iov_count,
				810	int to_user, int from_user, int do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	811	{
				812	int ret = 0, i;
				813	struct bio_vec *bvec;
				814	int iov_idx = 0;
				815	unsigned int iov_off = 0;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	816
				817	__bio_for_each_segment(bvec, bio, i, 0) {
				818	char *bv_addr = page_address(bvec->bv_page);
FUJITA Tomonori	aefcc28	2008-08-25 20:36:08 +0200	[diff] [blame]	819	unsigned int bv_len = iovecs[i].bv_len;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	820
				821	while (bv_len && iov_idx < iov_count) {
				822	unsigned int bytes;
Michal Simek	0e0c621	2009-06-10 12:57:07 -0700	[diff] [blame]	823	char __user *iov_addr;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	824
				825	bytes = min_t(unsigned int,
				826	iov[iov_idx].iov_len - iov_off, bv_len);
				827	iov_addr = iov[iov_idx].iov_base + iov_off;
				828
				829	if (!ret) {
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	830	if (to_user)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	831	ret = copy_to_user(iov_addr, bv_addr,
				832	bytes);
				833
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	834	if (from_user)
				835	ret = copy_from_user(bv_addr, iov_addr,
				836	bytes);
				837
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	838	if (ret)
				839	ret = -EFAULT;
				840	}
				841
				842	bv_len -= bytes;
				843	bv_addr += bytes;
				844	iov_addr += bytes;
				845	iov_off += bytes;
				846
				847	if (iov[iov_idx].iov_len == iov_off) {
				848	iov_idx++;
				849	iov_off = 0;
				850	}
				851	}
				852
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	853	if (do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	854	__free_page(bvec->bv_page);
				855	}
				856
				857	return ret;
				858	}
				859
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	860	/**
				861	* bio_uncopy_user - finish previously mapped bio
				862	* @bio: bio being terminated
				863	*
				864	* Free pages allocated from bio_copy_user() and write back data
				865	* to user space in case of a read.
				866	*/
				867	int bio_uncopy_user(struct bio *bio)
				868	{
				869	struct bio_map_data *bmd = bio->bi_private;
FUJITA Tomonori	8188276	2008-09-02 16:20:19 +0900	[diff] [blame]	870	int ret = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	871
FUJITA Tomonori	8188276	2008-09-02 16:20:19 +0900	[diff] [blame]	872	if (!bio_flagged(bio, BIO_NULL_MAPPED))
				873	ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	874	bmd->nr_sgvecs, bio_data_dir(bio) == READ,
				875	0, bmd->is_our_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	876	bio_free_map_data(bmd);
				877	bio_put(bio);
				878	return ret;
				879	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	880	EXPORT_SYMBOL(bio_uncopy_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	881
				882	/**
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	883	* bio_copy_user_iov - copy user data to bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	884	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	885	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	886	* @iov: the iovec.
				887	* @iov_count: number of elements in the iovec
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	888	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	889	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	890	*
				891	* Prepares and returns a bio for indirect user io, bouncing data
				892	* to/from kernel pages as necessary. Must be paired with
				893	* call bio_uncopy_user() on io completion.
				894	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	895	struct bio bio_copy_user_iov(struct request_queue q,
				896	struct rq_map_data *map_data,
				897	struct sg_iovec *iov, int iov_count,
				898	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	899	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	900	struct bio_map_data *bmd;
				901	struct bio_vec *bvec;
				902	struct page *page;
				903	struct bio *bio;
				904	int i, ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	905	int nr_pages = 0;
				906	unsigned int len = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	907	unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	908
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	909	for (i = 0; i < iov_count; i++) {
				910	unsigned long uaddr;
				911	unsigned long end;
				912	unsigned long start;
				913
				914	uaddr = (unsigned long)iov[i].iov_base;
				915	end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				916	start = uaddr >> PAGE_SHIFT;
				917
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	918	/*
				919	* Overflow, abort
				920	*/
				921	if (end < start)
				922	return ERR_PTR(-EINVAL);
				923
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	924	nr_pages += end - start;
				925	len += iov[i].iov_len;
				926	}
				927
FUJITA Tomonori	6983872	2009-04-28 20:24:29 +0200	[diff] [blame]	928	if (offset)
				929	nr_pages++;
				930
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	931	bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	932	if (!bmd)
				933	return ERR_PTR(-ENOMEM);
				934
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	935	ret = -ENOMEM;
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	936	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	937	if (!bio)
				938	goto out_bmd;
				939
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	940	if (!write_to_vm)
				941	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	942
				943	ret = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	944
				945	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	946	nr_pages = 1 << map_data->page_order;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	947	i = map_data->offset / PAGE_SIZE;
				948	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	949	while (len) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	950	unsigned int bytes = PAGE_SIZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	951
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	952	bytes -= offset;
				953
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	954	if (bytes > len)
				955	bytes = len;
				956
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	957	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	958	if (i == map_data->nr_entries * nr_pages) {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	959	ret = -ENOMEM;
				960	break;
				961	}
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	962
				963	page = map_data->pages[i / nr_pages];
				964	page += (i % nr_pages);
				965
				966	i++;
				967	} else {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	968	page = alloc_page(q->bounce_gfp \| gfp_mask);
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	969	if (!page) {
				970	ret = -ENOMEM;
				971	break;
				972	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	973	}
				974
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	975	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	976	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	977
				978	len -= bytes;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	979	offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	980	}
				981
				982	if (ret)
				983	goto cleanup;
				984
				985	/*
				986	* success
				987	*/
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	988	if ((!write_to_vm && (!map_data \|\| !map_data->null_mapped)) \|\|
				989	(map_data && map_data->from_user)) {
				990	ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	991	if (ret)
				992	goto cleanup;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	993	}
				994
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	995	bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	996	return bio;
				997	cleanup:
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	998	if (!map_data)
				999	bio_for_each_segment(bvec, bio, i)
				1000	__free_page(bvec->bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1001
				1002	bio_put(bio);
				1003	out_bmd:
				1004	bio_free_map_data(bmd);
				1005	return ERR_PTR(ret);
				1006	}
				1007
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1008	/**
				1009	* bio_copy_user - copy user data to bio
				1010	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1011	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1012	* @uaddr: start of user address
				1013	* @len: length in bytes
				1014	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1015	* @gfp_mask: memory allocation flags
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1016	*
				1017	* Prepares and returns a bio for indirect user io, bouncing data
				1018	* to/from kernel pages as necessary. Must be paired with
				1019	* call bio_uncopy_user() on io completion.
				1020	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1021	struct bio bio_copy_user(struct request_queue q, struct rq_map_data *map_data,
				1022	unsigned long uaddr, unsigned int len,
				1023	int write_to_vm, gfp_t gfp_mask)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1024	{
				1025	struct sg_iovec iov;
				1026
				1027	iov.iov_base = (void __user *)uaddr;
				1028	iov.iov_len = len;
				1029
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1030	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1031	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1032	EXPORT_SYMBOL(bio_copy_user);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1033
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1034	static struct bio __bio_map_user_iov(struct request_queue q,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1035	struct block_device *bdev,
				1036	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1037	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1038	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1039	int i, j;
				1040	int nr_pages = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1041	struct page **pages;
				1042	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1043	int cur_page = 0;
				1044	int ret, offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1045
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1046	for (i = 0; i < iov_count; i++) {
				1047	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1048	unsigned long len = iov[i].iov_len;
				1049	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1050	unsigned long start = uaddr >> PAGE_SHIFT;
				1051
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1052	/*
				1053	* Overflow, abort
				1054	*/
				1055	if (end < start)
				1056	return ERR_PTR(-EINVAL);
				1057
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1058	nr_pages += end - start;
				1059	/*
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1060	* buffer must be aligned to at least hardsector size for now
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1061	*/
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1062	if (uaddr & queue_dma_alignment(q))
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1063	return ERR_PTR(-EINVAL);
				1064	}
				1065
				1066	if (!nr_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1067	return ERR_PTR(-EINVAL);
				1068
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1069	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1070	if (!bio)
				1071	return ERR_PTR(-ENOMEM);
				1072
				1073	ret = -ENOMEM;
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1074	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1075	if (!pages)
				1076	goto out;
				1077
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1078	for (i = 0; i < iov_count; i++) {
				1079	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1080	unsigned long len = iov[i].iov_len;
				1081	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1082	unsigned long start = uaddr >> PAGE_SHIFT;
				1083	const int local_nr_pages = end - start;
				1084	const int page_limit = cur_page + local_nr_pages;
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1085
Nick Piggin	f5dd33c	2008-07-25 19:45:25 -0700	[diff] [blame]	1086	ret = get_user_pages_fast(uaddr, local_nr_pages,
				1087	write_to_vm, &pages[cur_page]);
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1088	if (ret < local_nr_pages) {
				1089	ret = -EFAULT;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1090	goto out_unmap;
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1091	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1092
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1093	offset = uaddr & ~PAGE_MASK;
				1094	for (j = cur_page; j < page_limit; j++) {
				1095	unsigned int bytes = PAGE_SIZE - offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1096
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1097	if (len <= 0)
				1098	break;
				1099
				1100	if (bytes > len)
				1101	bytes = len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1102
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1103	/*
				1104	* sorry...
				1105	*/
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1106	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1107	bytes)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1108	break;
				1109
				1110	len -= bytes;
				1111	offset = 0;
				1112	}
				1113
				1114	cur_page = j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1115	/*
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1116	* release the pages we didn't map into the bio, if any
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1117	*/
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1118	while (j < page_limit)
				1119	page_cache_release(pages[j++]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1120	}
				1121
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1122	kfree(pages);
				1123
				1124	/*
				1125	* set data direction, and check if mapped pages need bouncing
				1126	*/
				1127	if (!write_to_vm)
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1128	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1129
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1130	bio->bi_bdev = bdev;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1131	bio->bi_flags \|= (1 << BIO_USER_MAPPED);
				1132	return bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1133
				1134	out_unmap:
				1135	for (i = 0; i < nr_pages; i++) {
				1136	if(!pages[i])
				1137	break;
				1138	page_cache_release(pages[i]);
				1139	}
				1140	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1141	kfree(pages);
				1142	bio_put(bio);
				1143	return ERR_PTR(ret);
				1144	}
				1145
				1146	/**
				1147	* bio_map_user - map user address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1148	* @q: the struct request_queue for the bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1149	* @bdev: destination block device
				1150	* @uaddr: start of user address
				1151	* @len: length in bytes
				1152	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1153	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1154	*
				1155	* Map the user space address into a bio suitable for io to a block
				1156	* device. Returns an error pointer in case of error.
				1157	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1158	struct bio bio_map_user(struct request_queue q, struct block_device *bdev,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1159	unsigned long uaddr, unsigned int len, int write_to_vm,
				1160	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1161	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1162	struct sg_iovec iov;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1163
viro@ZenIV.linux.org.uk	3f70353	2005-09-09 16:53:56 +0100	[diff] [blame]	1164	iov.iov_base = (void __user *)uaddr;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1165	iov.iov_len = len;
				1166
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1167	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1168	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1169	EXPORT_SYMBOL(bio_map_user);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1170
				1171	/**
				1172	* bio_map_user_iov - map user sg_iovec table into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1173	* @q: the struct request_queue for the bio
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1174	* @bdev: destination block device
				1175	* @iov: the iovec.
				1176	* @iov_count: number of elements in the iovec
				1177	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1178	* @gfp_mask: memory allocation flags
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1179	*
				1180	* Map the user space address into a bio suitable for io to a block
				1181	* device. Returns an error pointer in case of error.
				1182	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1183	struct bio bio_map_user_iov(struct request_queue q, struct block_device *bdev,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1184	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1185	int write_to_vm, gfp_t gfp_mask)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1186	{
				1187	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1188
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1189	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
				1190	gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1191	if (IS_ERR(bio))
				1192	return bio;
				1193
				1194	/*
				1195	* subtle -- if __bio_map_user() ended up bouncing a bio,
				1196	* it would normally disappear when its bi_end_io is run.
				1197	* however, we need it for the unmap, so grab an extra
				1198	* reference to it
				1199	*/
				1200	bio_get(bio);
				1201
Mike Christie	0e75f90	2006-12-01 10:40:55 +0100	[diff] [blame]	1202	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1203	}
				1204
				1205	static void __bio_unmap_user(struct bio *bio)
				1206	{
				1207	struct bio_vec *bvec;
				1208	int i;
				1209
				1210	/*
				1211	* make sure we dirty pages we wrote to
				1212	*/
				1213	__bio_for_each_segment(bvec, bio, i, 0) {
				1214	if (bio_data_dir(bio) == READ)
				1215	set_page_dirty_lock(bvec->bv_page);
				1216
				1217	page_cache_release(bvec->bv_page);
				1218	}
				1219
				1220	bio_put(bio);
				1221	}
				1222
				1223	/**
				1224	* bio_unmap_user - unmap a bio
				1225	* @bio: the bio being unmapped
				1226	*
				1227	* Unmap a bio previously mapped by bio_map_user(). Must be called with
				1228	* a process context.
				1229	*
				1230	* bio_unmap_user() may sleep.
				1231	*/
				1232	void bio_unmap_user(struct bio *bio)
				1233	{
				1234	__bio_unmap_user(bio);
				1235	bio_put(bio);
				1236	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1237	EXPORT_SYMBOL(bio_unmap_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1238
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1239	static void bio_map_kern_endio(struct bio *bio, int err)
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1240	{
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1241	bio_put(bio);
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1242	}
				1243
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1244	static struct bio __bio_map_kern(struct request_queue q, void *data,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1245	unsigned int len, gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1246	{
				1247	unsigned long kaddr = (unsigned long)data;
				1248	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1249	unsigned long start = kaddr >> PAGE_SHIFT;
				1250	const int nr_pages = end - start;
				1251	int offset, i;
				1252	struct bio *bio;
				1253
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1254	bio = bio_kmalloc(gfp_mask, nr_pages);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1255	if (!bio)
				1256	return ERR_PTR(-ENOMEM);
				1257
				1258	offset = offset_in_page(kaddr);
				1259	for (i = 0; i < nr_pages; i++) {
				1260	unsigned int bytes = PAGE_SIZE - offset;
				1261
				1262	if (len <= 0)
				1263	break;
				1264
				1265	if (bytes > len)
				1266	bytes = len;
				1267
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1268	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
				1269	offset) < bytes)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1270	break;
				1271
				1272	data += bytes;
				1273	len -= bytes;
				1274	offset = 0;
				1275	}
				1276
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1277	bio->bi_end_io = bio_map_kern_endio;
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1278	return bio;
				1279	}
				1280
				1281	/**
				1282	* bio_map_kern - map kernel address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1283	* @q: the struct request_queue for the bio
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1284	* @data: pointer to buffer to map
				1285	* @len: length in bytes
				1286	* @gfp_mask: allocation flags for bio allocation
				1287	*
				1288	* Map the kernel address into a bio suitable for io to a block
				1289	* device. Returns an error pointer in case of error.
				1290	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1291	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1292	gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1293	{
				1294	struct bio *bio;
				1295
				1296	bio = __bio_map_kern(q, data, len, gfp_mask);
				1297	if (IS_ERR(bio))
				1298	return bio;
				1299
				1300	if (bio->bi_size == len)
				1301	return bio;
				1302
				1303	/*
				1304	* Don't support partial mappings.
				1305	*/
				1306	bio_put(bio);
				1307	return ERR_PTR(-EINVAL);
				1308	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1309	EXPORT_SYMBOL(bio_map_kern);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1310
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1311	static void bio_copy_kern_endio(struct bio *bio, int err)
				1312	{
				1313	struct bio_vec *bvec;
				1314	const int read = bio_data_dir(bio) == READ;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1315	struct bio_map_data *bmd = bio->bi_private;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1316	int i;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1317	char *p = bmd->sgvecs[0].iov_base;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1318
				1319	__bio_for_each_segment(bvec, bio, i, 0) {
				1320	char *addr = page_address(bvec->bv_page);
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1321	int len = bmd->iovecs[i].bv_len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1322
Tejun Heo	4fc981e	2009-05-19 18:33:06 +0900	[diff] [blame]	1323	if (read)
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1324	memcpy(p, addr, len);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1325
				1326	__free_page(bvec->bv_page);
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1327	p += len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1328	}
				1329
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1330	bio_free_map_data(bmd);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1331	bio_put(bio);
				1332	}
				1333
				1334	/**
				1335	* bio_copy_kern - copy kernel address into bio
				1336	* @q: the struct request_queue for the bio
				1337	* @data: pointer to buffer to copy
				1338	* @len: length in bytes
				1339	* @gfp_mask: allocation flags for bio and page allocation
Randy Dunlap	ffee025	2008-04-30 09:08:54 +0200	[diff] [blame]	1340	* @reading: data direction is READ
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1341	*
				1342	* copy the kernel address into a bio suitable for io to a block
				1343	* device. Returns an error pointer in case of error.
				1344	*/
				1345	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1346	gfp_t gfp_mask, int reading)
				1347	{
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1348	struct bio *bio;
				1349	struct bio_vec *bvec;
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1350	int i;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1351
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1352	bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
				1353	if (IS_ERR(bio))
				1354	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1355
				1356	if (!reading) {
				1357	void *p = data;
				1358
				1359	bio_for_each_segment(bvec, bio, i) {
				1360	char *addr = page_address(bvec->bv_page);
				1361
				1362	memcpy(addr, p, bvec->bv_len);
				1363	p += bvec->bv_len;
				1364	}
				1365	}
				1366
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1367	bio->bi_end_io = bio_copy_kern_endio;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1368
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1369	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1370	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1371	EXPORT_SYMBOL(bio_copy_kern);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1372
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1373	/*
				1374	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1375	* for performing direct-IO in BIOs.
				1376	*
				1377	* The problem is that we cannot run set_page_dirty() from interrupt context
				1378	* because the required locks are not interrupt-safe. So what we can do is to
				1379	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1380	* check that the pages are still dirty. If so, fine. If not, redirty them
				1381	* in process context.
				1382	*
				1383	* We special-case compound pages here: normally this means reads into hugetlb
				1384	* pages. The logic in here doesn't really work right for compound pages
				1385	* because the VM does not uniformly chase down the head page in all cases.
				1386	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1387	* handle them at all. So we skip compound pages here at an early stage.
				1388	*
				1389	* Note that this code is very hard to test under normal circumstances because
				1390	* direct-io pins the pages with get_user_pages(). This makes
				1391	* is_page_cache_freeable return false, and the VM will not clean the pages.
Artem Bityutskiy	0d5c3eb	2012-07-25 18:12:08 +0300	[diff] [blame]	1392	* But other code (eg, flusher threads) could clean the pages if they are mapped
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1393	* pagecache.
				1394	*
				1395	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1396	* deferred bio dirtying paths.
				1397	*/
				1398
				1399	/*
				1400	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1401	*/
				1402	void bio_set_pages_dirty(struct bio *bio)
				1403	{
				1404	struct bio_vec *bvec = bio->bi_io_vec;
				1405	int i;
				1406
				1407	for (i = 0; i < bio->bi_vcnt; i++) {
				1408	struct page *page = bvec[i].bv_page;
				1409
				1410	if (page && !PageCompound(page))
				1411	set_page_dirty_lock(page);
				1412	}
				1413	}
				1414
Adrian Bunk	86b6c7a	2008-02-18 13:48:32 +0100	[diff] [blame]	1415	static void bio_release_pages(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1416	{
				1417	struct bio_vec *bvec = bio->bi_io_vec;
				1418	int i;
				1419
				1420	for (i = 0; i < bio->bi_vcnt; i++) {
				1421	struct page *page = bvec[i].bv_page;
				1422
				1423	if (page)
				1424	put_page(page);
				1425	}
				1426	}
				1427
				1428	/*
				1429	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1430	* If they are, then fine. If, however, some pages are clean then they must
				1431	* have been written out during the direct-IO read. So we take another ref on
				1432	* the BIO and the offending pages and re-dirty the pages in process context.
				1433	*
				1434	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1435	* here on. It will run one page_cache_release() against each page and will
				1436	* run one bio_put() against the BIO.
				1437	*/
				1438
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1439	static void bio_dirty_fn(struct work_struct *work);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1440
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1441	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1442	static DEFINE_SPINLOCK(bio_dirty_lock);
				1443	static struct bio *bio_dirty_list;
				1444
				1445	/*
				1446	* This runs in process context
				1447	*/
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1448	static void bio_dirty_fn(struct work_struct *work)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1449	{
				1450	unsigned long flags;
				1451	struct bio *bio;
				1452
				1453	spin_lock_irqsave(&bio_dirty_lock, flags);
				1454	bio = bio_dirty_list;
				1455	bio_dirty_list = NULL;
				1456	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1457
				1458	while (bio) {
				1459	struct bio *next = bio->bi_private;
				1460
				1461	bio_set_pages_dirty(bio);
				1462	bio_release_pages(bio);
				1463	bio_put(bio);
				1464	bio = next;
				1465	}
				1466	}
				1467
				1468	void bio_check_pages_dirty(struct bio *bio)
				1469	{
				1470	struct bio_vec *bvec = bio->bi_io_vec;
				1471	int nr_clean_pages = 0;
				1472	int i;
				1473
				1474	for (i = 0; i < bio->bi_vcnt; i++) {
				1475	struct page *page = bvec[i].bv_page;
				1476
				1477	if (PageDirty(page) \|\| PageCompound(page)) {
				1478	page_cache_release(page);
				1479	bvec[i].bv_page = NULL;
				1480	} else {
				1481	nr_clean_pages++;
				1482	}
				1483	}
				1484
				1485	if (nr_clean_pages) {
				1486	unsigned long flags;
				1487
				1488	spin_lock_irqsave(&bio_dirty_lock, flags);
				1489	bio->bi_private = bio_dirty_list;
				1490	bio_dirty_list = bio;
				1491	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1492	schedule_work(&bio_dirty_work);
				1493	} else {
				1494	bio_put(bio);
				1495	}
				1496	}
				1497
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1498	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1499	void bio_flush_dcache_pages(struct bio *bi)
				1500	{
				1501	int i;
				1502	struct bio_vec *bvec;
				1503
				1504	bio_for_each_segment(bvec, bi, i)
				1505	flush_dcache_page(bvec->bv_page);
				1506	}
				1507	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1508	#endif
				1509
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1510	/**
				1511	* bio_endio - end I/O on a bio
				1512	* @bio: bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1513	* @error: error, if any
				1514	*
				1515	* Description:
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1516	* bio_endio() will end I/O on the whole bio. bio_endio() is the
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1517	* preferred way to end I/O on a bio, it takes care of clearing
				1518	* BIO_UPTODATE on error. @error is 0 on success, and and one of the
				1519	* established -Exxxx (-EIO, for instance) error values in case
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1520	* something went wrong. No one should call bi_end_io() directly on a
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1521	* bio unless they own it and thus know that it has an end_io
				1522	* function.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1523	**/
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1524	void bio_endio(struct bio *bio, int error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1525	{
				1526	if (error)
				1527	clear_bit(BIO_UPTODATE, &bio->bi_flags);
NeilBrown	9cc54d4	2007-09-27 12:46:12 +0200	[diff] [blame]	1528	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				1529	error = -EIO;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1530
Tejun Heo	3a366e6	2013-01-11 13:06:33 -0800	[diff] [blame]	1531	trace_block_bio_complete(bio, error);
				1532
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1533	if (bio->bi_end_io)
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1534	bio->bi_end_io(bio, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1535	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1536	EXPORT_SYMBOL(bio_endio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1537
				1538	void bio_pair_release(struct bio_pair *bp)
				1539	{
				1540	if (atomic_dec_and_test(&bp->cnt)) {
				1541	struct bio *master = bp->bio1.bi_private;
				1542
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1543	bio_endio(master, bp->error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1544	mempool_free(bp, bp->bio2.bi_private);
				1545	}
				1546	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1547	EXPORT_SYMBOL(bio_pair_release);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1548
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1549	static void bio_pair_end_1(struct bio *bi, int err)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1550	{
				1551	struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
				1552
				1553	if (err)
				1554	bp->error = err;
				1555
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1556	bio_pair_release(bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1557	}
				1558
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1559	static void bio_pair_end_2(struct bio *bi, int err)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1560	{
				1561	struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
				1562
				1563	if (err)
				1564	bp->error = err;
				1565
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1566	bio_pair_release(bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1567	}
				1568
				1569	/*
Alberto Bertogli	c7eee1b	2009-01-25 23:36:14 -0200	[diff] [blame]	1570	* split a bio - only worry about a bio with a single page in its iovec
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1571	*/
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1572	struct bio_pair bio_split(struct bio bi, int first_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1573	{
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1574	struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1575
				1576	if (!bp)
				1577	return bp;
				1578
Arnaldo Carvalho de Melo	5f3ea37	2008-10-30 08:34:33 +0100	[diff] [blame]	1579	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	1580	bi->bi_sector + first_sectors);
				1581
Shaohua Li	02f3939	2012-09-28 10:38:48 +0200	[diff] [blame]	1582	BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1583	BUG_ON(bi->bi_idx != 0);
				1584	atomic_set(&bp->cnt, 3);
				1585	bp->error = 0;
				1586	bp->bio1 = *bi;
				1587	bp->bio2 = *bi;
				1588	bp->bio2.bi_sector += first_sectors;
				1589	bp->bio2.bi_size -= first_sectors << 9;
				1590	bp->bio1.bi_size = first_sectors << 9;
				1591
Shaohua Li	02f3939	2012-09-28 10:38:48 +0200	[diff] [blame]	1592	if (bi->bi_vcnt != 0) {
				1593	bp->bv1 = bi->bi_io_vec[0];
				1594	bp->bv2 = bi->bi_io_vec[0];
Martin K. Petersen	4363ac7	2012-09-18 12:19:27 -0400	[diff] [blame]	1595
Shaohua Li	02f3939	2012-09-28 10:38:48 +0200	[diff] [blame]	1596	if (bio_is_rw(bi)) {
				1597	bp->bv2.bv_offset += first_sectors << 9;
				1598	bp->bv2.bv_len -= first_sectors << 9;
				1599	bp->bv1.bv_len = first_sectors << 9;
				1600	}
				1601
				1602	bp->bio1.bi_io_vec = &bp->bv1;
				1603	bp->bio2.bi_io_vec = &bp->bv2;
				1604
				1605	bp->bio1.bi_max_vecs = 1;
				1606	bp->bio2.bi_max_vecs = 1;
Martin K. Petersen	4363ac7	2012-09-18 12:19:27 -0400	[diff] [blame]	1607	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1608
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1609	bp->bio1.bi_end_io = bio_pair_end_1;
				1610	bp->bio2.bi_end_io = bio_pair_end_2;
				1611
				1612	bp->bio1.bi_private = bi;
Denis ChengRq	6feef53	2008-10-09 08:57:05 +0200	[diff] [blame]	1613	bp->bio2.bi_private = bio_split_pool;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1614
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	1615	if (bio_integrity(bi))
				1616	bio_integrity_split(bi, bp, first_sectors);
				1617
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1618	return bp;
				1619	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1620	EXPORT_SYMBOL(bio_split);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1621
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1622	/**
				1623	* bio_sector_offset - Find hardware sector offset in bio
				1624	* @bio: bio to inspect
				1625	* @index: bio_vec index
				1626	* @offset: offset in bv_page
				1627	*
				1628	* Return the number of hardware sectors between beginning of bio
				1629	* and an end point indicated by a bio_vec index and an offset
				1630	* within that vector's page.
				1631	*/
				1632	sector_t bio_sector_offset(struct bio *bio, unsigned short index,
				1633	unsigned int offset)
				1634	{
Martin K. Petersen	e1defc4	2009-05-22 17:17:49 -0400	[diff] [blame]	1635	unsigned int sector_sz;
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1636	struct bio_vec *bv;
				1637	sector_t sectors;
				1638	int i;
				1639
Martin K. Petersen	e1defc4	2009-05-22 17:17:49 -0400	[diff] [blame]	1640	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1641	sectors = 0;
				1642
				1643	if (index >= bio->bi_idx)
				1644	index = bio->bi_vcnt - 1;
				1645
				1646	__bio_for_each_segment(bv, bio, i, 0) {
				1647	if (i == index) {
				1648	if (offset > bv->bv_offset)
				1649	sectors += (offset - bv->bv_offset) / sector_sz;
				1650	break;
				1651	}
				1652
				1653	sectors += bv->bv_len / sector_sz;
				1654	}
				1655
				1656	return sectors;
				1657	}
				1658	EXPORT_SYMBOL(bio_sector_offset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1659
				1660	/*
				1661	* create memory pools for biovec's in a bio_set.
				1662	* use the global biovec slabs created for general use.
				1663	*/
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	1664	mempool_t biovec_create_pool(struct bio_set bs, int pool_entries)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1665	{
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	1666	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1667
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	1668	return mempool_create_slab_pool(pool_entries, bp->slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1669	}
				1670
				1671	void bioset_free(struct bio_set *bs)
				1672	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1673	if (bs->rescue_workqueue)
				1674	destroy_workqueue(bs->rescue_workqueue);
				1675
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1676	if (bs->bio_pool)
				1677	mempool_destroy(bs->bio_pool);
				1678
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	1679	if (bs->bvec_pool)
				1680	mempool_destroy(bs->bvec_pool);
				1681
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1682	bioset_integrity_free(bs);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1683	bio_put_slab(bs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1684
				1685	kfree(bs);
				1686	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1687	EXPORT_SYMBOL(bioset_free);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1688
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1689	/**
				1690	* bioset_create - Create a bio_set
				1691	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1692	* @front_pad: Number of bytes to allocate in front of the returned bio
				1693	*
				1694	* Description:
				1695	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1696	* to ask for a number of bytes to be allocated in front of the bio.
				1697	* Front pad allocation is useful for embedding the bio inside
				1698	* another structure, to avoid allocating extra data to go with the bio.
				1699	* Note that the bio must be embedded at the END of that structure always,
				1700	* or things will break badly.
				1701	*/
				1702	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1703	{
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1704	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1705	struct bio_set *bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1706
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1707	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1708	if (!bs)
				1709	return NULL;
				1710
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1711	bs->front_pad = front_pad;
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1712
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1713	spin_lock_init(&bs->rescue_lock);
				1714	bio_list_init(&bs->rescue_list);
				1715	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1716
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1717	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1718	if (!bs->bio_slab) {
				1719	kfree(bs);
				1720	return NULL;
				1721	}
				1722
				1723	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1724	if (!bs->bio_pool)
				1725	goto bad;
				1726
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame^]	1727	bs->bvec_pool = biovec_create_pool(bs, pool_size);
				1728	if (!bs->bvec_pool)
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1729	goto bad;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1730
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1731	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1732	if (!bs->rescue_workqueue)
				1733	goto bad;
				1734
				1735	return bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1736	bad:
				1737	bioset_free(bs);
				1738	return NULL;
				1739	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1740	EXPORT_SYMBOL(bioset_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1741
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	1742	#ifdef CONFIG_BLK_CGROUP
				1743	/**
				1744	* bio_associate_current - associate a bio with %current
				1745	* @bio: target bio
				1746	*
				1747	* Associate @bio with %current if it hasn't been associated yet. Block
				1748	* layer will treat @bio as if it were issued by %current no matter which
				1749	* task actually issues it.
				1750	*
				1751	* This function takes an extra reference of @task's io_context and blkcg
				1752	* which will be put when @bio is released. The caller must own @bio,
				1753	* ensure %current->io_context exists, and is responsible for synchronizing
				1754	* calls to this function.
				1755	*/
				1756	int bio_associate_current(struct bio *bio)
				1757	{
				1758	struct io_context *ioc;
				1759	struct cgroup_subsys_state *css;
				1760
				1761	if (bio->bi_ioc)
				1762	return -EBUSY;
				1763
				1764	ioc = current->io_context;
				1765	if (!ioc)
				1766	return -ENOENT;
				1767
				1768	/* acquire active ref on @ioc and associate */
				1769	get_io_context_active(ioc);
				1770	bio->bi_ioc = ioc;
				1771
				1772	/* associate blkcg if exists */
				1773	rcu_read_lock();
				1774	css = task_subsys_state(current, blkio_subsys_id);
				1775	if (css && css_tryget(css))
				1776	bio->bi_css = css;
				1777	rcu_read_unlock();
				1778
				1779	return 0;
				1780	}
				1781
				1782	/**
				1783	* bio_disassociate_task - undo bio_associate_current()
				1784	* @bio: target bio
				1785	*/
				1786	void bio_disassociate_task(struct bio *bio)
				1787	{
				1788	if (bio->bi_ioc) {
				1789	put_io_context(bio->bi_ioc);
				1790	bio->bi_ioc = NULL;
				1791	}
				1792	if (bio->bi_css) {
				1793	css_put(bio->bi_css);
				1794	bio->bi_css = NULL;
				1795	}
				1796	}
				1797
				1798	#endif /* CONFIG_BLK_CGROUP */
				1799
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1800	static void __init biovec_init_slabs(void)
				1801	{
				1802	int i;
				1803
				1804	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
				1805	int size;
				1806	struct biovec_slab *bvs = bvec_slabs + i;
				1807
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	1808	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				1809	bvs->slab = NULL;
				1810	continue;
				1811	}
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	1812
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1813	size = bvs->nr_vecs * sizeof(struct bio_vec);
				1814	bvs->slab = kmem_cache_create(bvs->name, size, 0,
Paul Mundt	20c2df8	2007-07-20 10:11:58 +0900	[diff] [blame]	1815	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1816	}
				1817	}
				1818
				1819	static int __init init_bio(void)
				1820	{
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1821	bio_slab_max = 2;
				1822	bio_slab_nr = 0;
				1823	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				1824	if (!bio_slabs)
				1825	panic("bio: can't allocate bios\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1826
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1827	bio_integrity_init();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1828	biovec_init_slabs();
				1829
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1830	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1831	if (!fs_bio_set)
				1832	panic("bio: can't allocate bios\n");
				1833
Martin K. Petersen	a91a278	2011-03-17 11:11:05 +0100	[diff] [blame]	1834	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				1835	panic("bio: can't create integrity pool\n");
				1836
Matthew Dobson	0eaae62a	2006-03-26 01:37:47 -0800	[diff] [blame]	1837	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
				1838	sizeof(struct bio_pair));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1839	if (!bio_split_pool)
				1840	panic("bio: can't create split pool\n");
				1841
				1842	return 0;
				1843	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1844	subsys_initcall(init_bio);