Blame - fs/bio.c - SHIFTPHONES/mainline/linux

blob: 75c49a38223969c1f7256868cb3b09fc7d3bd286 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Jens Axboe	0fe2347	2006-09-04 15:41:16 +0200	[diff] [blame]	2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
Kent Overstreet	a27bb33	2013-05-07 16:19:08 -0700	[diff] [blame]	22	#include <linux/uio.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	23	#include <linux/iocontext.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
Paul Gortmaker	630d9c4	2011-11-16 23:57:37 -0500	[diff] [blame]	27	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	30	#include <linux/cgroup.h>
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	31	#include <scsi/sg.h> /* for struct sg_iovec */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	32
Li Zefan	5578213	2009-06-09 13:43:05 +0800	[diff] [blame]	33	#include <trace/events/block.h>
Ingo Molnar	0bfc245	2008-11-26 11:59:56 +0100	[diff] [blame]	34
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	35	/*
				36	* Test patch to inline a certain number of bi_io_vec's inside the bio
				37	* itself, to shrink a bio data allocation from two mempool calls to one
				38	*/
				39	#define BIO_INLINE_VECS 4
				40
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	/*
				42	* if you change this list, also change bvec_alloc or things will
				43	* break badly! cannot be bigger than what you can fit into an
				44	* unsigned short
				45	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	46	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
Martin K. Petersen	df67714	2011-03-08 08:28:01 +0100	[diff] [blame]	47	static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				49	};
				50	#undef BV
				51
				52	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				54	* IO code that does not need private memory pools.
				55	*/
Martin K. Petersen	51d654e	2008-06-17 18:59:56 +0200	[diff] [blame]	56	struct bio_set *fs_bio_set;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	57	EXPORT_SYMBOL(fs_bio_set);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	58
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	59	/*
				60	* Our slab pool management
				61	*/
				62	struct bio_slab {
				63	struct kmem_cache *slab;
				64	unsigned int slab_ref;
				65	unsigned int slab_size;
				66	char name[8];
				67	};
				68	static DEFINE_MUTEX(bio_slab_lock);
				69	static struct bio_slab *bio_slabs;
				70	static unsigned int bio_slab_nr, bio_slab_max;
				71
				72	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				73	{
				74	unsigned int sz = sizeof(struct bio) + extra_size;
				75	struct kmem_cache *slab = NULL;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	76	struct bio_slab bslab, new_bio_slabs;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	77	unsigned int new_bio_slab_max;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	78	unsigned int i, entry = -1;
				79
				80	mutex_lock(&bio_slab_lock);
				81
				82	i = 0;
				83	while (i < bio_slab_nr) {
Thiago Farina	f06f135	2010-01-19 14:07:09 +0100	[diff] [blame]	84	bslab = &bio_slabs[i];
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	85
				86	if (!bslab->slab && entry == -1)
				87	entry = i;
				88	else if (bslab->slab_size == sz) {
				89	slab = bslab->slab;
				90	bslab->slab_ref++;
				91	break;
				92	}
				93	i++;
				94	}
				95
				96	if (slab)
				97	goto out_unlock;
				98
				99	if (bio_slab_nr == bio_slab_max && entry == -1) {
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	100	new_bio_slab_max = bio_slab_max << 1;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	101	new_bio_slabs = krealloc(bio_slabs,
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	102	new_bio_slab_max * sizeof(struct bio_slab),
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	103	GFP_KERNEL);
				104	if (!new_bio_slabs)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	105	goto out_unlock;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	106	bio_slab_max = new_bio_slab_max;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	107	bio_slabs = new_bio_slabs;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	108	}
				109	if (entry == -1)
				110	entry = bio_slab_nr++;
				111
				112	bslab = &bio_slabs[entry];
				113
				114	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
				115	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
				116	if (!slab)
				117	goto out_unlock;
				118
Mandeep Singh Baines	80cdc6d	2011-03-22 16:33:54 -0700	[diff] [blame]	119	printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	120	bslab->slab = slab;
				121	bslab->slab_ref = 1;
				122	bslab->slab_size = sz;
				123	out_unlock:
				124	mutex_unlock(&bio_slab_lock);
				125	return slab;
				126	}
				127
				128	static void bio_put_slab(struct bio_set *bs)
				129	{
				130	struct bio_slab *bslab = NULL;
				131	unsigned int i;
				132
				133	mutex_lock(&bio_slab_lock);
				134
				135	for (i = 0; i < bio_slab_nr; i++) {
				136	if (bs->bio_slab == bio_slabs[i].slab) {
				137	bslab = &bio_slabs[i];
				138	break;
				139	}
				140	}
				141
				142	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				143	goto out;
				144
				145	WARN_ON(!bslab->slab_ref);
				146
				147	if (--bslab->slab_ref)
				148	goto out;
				149
				150	kmem_cache_destroy(bslab->slab);
				151	bslab->slab = NULL;
				152
				153	out:
				154	mutex_unlock(&bio_slab_lock);
				155	}
				156
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	157	unsigned int bvec_nr_vecs(unsigned short idx)
				158	{
				159	return bvec_slabs[idx].nr_vecs;
				160	}
				161
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	162	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	163	{
				164	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
				165
				166	if (idx == BIOVEC_MAX_IDX)
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	167	mempool_free(bv, pool);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	168	else {
				169	struct biovec_slab *bvs = bvec_slabs + idx;
				170
				171	kmem_cache_free(bvs->slab, bv);
				172	}
				173	}
				174
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	175	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				176	mempool_t *pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	177	{
				178	struct bio_vec *bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	179
				180	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	181	* see comment near bvec_array define!
				182	*/
				183	switch (nr) {
				184	case 1:
				185	*idx = 0;
				186	break;
				187	case 2 ... 4:
				188	*idx = 1;
				189	break;
				190	case 5 ... 16:
				191	*idx = 2;
				192	break;
				193	case 17 ... 64:
				194	*idx = 3;
				195	break;
				196	case 65 ... 128:
				197	*idx = 4;
				198	break;
				199	case 129 ... BIO_MAX_PAGES:
				200	*idx = 5;
				201	break;
				202	default:
				203	return NULL;
				204	}
				205
				206	/*
				207	* idx now points to the pool we want to allocate from. only the
				208	* 1-vec entry pool is mempool backed.
				209	*/
				210	if (*idx == BIOVEC_MAX_IDX) {
				211	fallback:
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	212	bvl = mempool_alloc(pool, gfp_mask);
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	213	} else {
				214	struct biovec_slab bvs = bvec_slabs + idx;
				215	gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT \| __GFP_IO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	216
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	217	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	218	* Make this allocation restricted and don't dump info on
				219	* allocation failures, since we'll fallback to the mempool
				220	* in case of failure.
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	221	*/
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	222	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				223
				224	/*
				225	* Try a slab allocation. If this fails and __GFP_WAIT
				226	* is set, retry with the 1-entry mempool
				227	*/
				228	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				229	if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
				230	*idx = BIOVEC_MAX_IDX;
				231	goto fallback;
				232	}
				233	}
				234
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	235	return bvl;
				236	}
				237
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	238	static void __bio_free(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	239	{
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	240	bio_disassociate_task(bio);
Jens Axboe	992c5dd	2007-07-18 13:18:08 +0200	[diff] [blame]	241
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	242	if (bio_integrity(bio))
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	243	bio_integrity_free(bio);
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	244	}
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	245
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	246	static void bio_free(struct bio *bio)
				247	{
				248	struct bio_set *bs = bio->bi_pool;
				249	void *p;
				250
				251	__bio_free(bio);
				252
				253	if (bs) {
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	254	if (bio_flagged(bio, BIO_OWNS_VEC))
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	255	bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	256
				257	/*
				258	* If we have front padding, adjust the bio pointer before freeing
				259	*/
				260	p = bio;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	261	p -= bs->front_pad;
				262
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	263	mempool_free(p, bs->bio_pool);
				264	} else {
				265	/* Bio was allocated by bio_kmalloc() */
				266	kfree(bio);
				267	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	268	}
				269
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	270	void bio_init(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	271	{
Jens Axboe	2b94de5	2007-07-18 13:14:03 +0200	[diff] [blame]	272	memset(bio, 0, sizeof(*bio));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	273	bio->bi_flags = 1 << BIO_UPTODATE;
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	274	atomic_set(&bio->bi_remaining, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	atomic_set(&bio->bi_cnt, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	276	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	277	EXPORT_SYMBOL(bio_init);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	278
				279	/**
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	280	* bio_reset - reinitialize a bio
				281	* @bio: bio to reset
				282	*
				283	* Description:
				284	* After calling bio_reset(), @bio will be in the same state as a freshly
				285	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				286	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				287	* comment in struct bio.
				288	*/
				289	void bio_reset(struct bio *bio)
				290	{
				291	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				292
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	293	__bio_free(bio);
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	294
				295	memset(bio, 0, BIO_RESET_BYTES);
				296	bio->bi_flags = flags\|(1 << BIO_UPTODATE);
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	297	atomic_set(&bio->bi_remaining, 1);
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	298	}
				299	EXPORT_SYMBOL(bio_reset);
				300
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	301	static void bio_chain_endio(struct bio *bio, int error)
				302	{
				303	bio_endio(bio->bi_private, error);
				304	bio_put(bio);
				305	}
				306
				307	/**
				308	* bio_chain - chain bio completions
				309	*
				310	* The caller won't have a bi_end_io called when @bio completes - instead,
				311	* @parent's bi_end_io won't be called until both @parent and @bio have
				312	* completed; the chained bio will also be freed when it completes.
				313	*
				314	* The caller must not set bi_private or bi_end_io in @bio.
				315	*/
				316	void bio_chain(struct bio bio, struct bio parent)
				317	{
				318	BUG_ON(bio->bi_private \|\| bio->bi_end_io);
				319
				320	bio->bi_private = parent;
				321	bio->bi_end_io = bio_chain_endio;
				322	atomic_inc(&parent->bi_remaining);
				323	}
				324	EXPORT_SYMBOL(bio_chain);
				325
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	326	static void bio_alloc_rescue(struct work_struct *work)
				327	{
				328	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				329	struct bio *bio;
				330
				331	while (1) {
				332	spin_lock(&bs->rescue_lock);
				333	bio = bio_list_pop(&bs->rescue_list);
				334	spin_unlock(&bs->rescue_lock);
				335
				336	if (!bio)
				337	break;
				338
				339	generic_make_request(bio);
				340	}
				341	}
				342
				343	static void punt_bios_to_rescuer(struct bio_set *bs)
				344	{
				345	struct bio_list punt, nopunt;
				346	struct bio *bio;
				347
				348	/*
				349	* In order to guarantee forward progress we must punt only bios that
				350	* were allocated from this bio_set; otherwise, if there was a bio on
				351	* there for a stacking driver higher up in the stack, processing it
				352	* could require allocating bios from this bio_set, and doing that from
				353	* our own rescuer would be bad.
				354	*
				355	* Since bio lists are singly linked, pop them all instead of trying to
				356	* remove from the middle of the list:
				357	*/
				358
				359	bio_list_init(&punt);
				360	bio_list_init(&nopunt);
				361
				362	while ((bio = bio_list_pop(current->bio_list)))
				363	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				364
				365	*current->bio_list = nopunt;
				366
				367	spin_lock(&bs->rescue_lock);
				368	bio_list_merge(&bs->rescue_list, &punt);
				369	spin_unlock(&bs->rescue_lock);
				370
				371	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				372	}
				373
Kent Overstreet	f44b48c7	2012-09-06 15:34:58 -0700	[diff] [blame]	374	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	375	* bio_alloc_bioset - allocate a bio for I/O
				376	* @gfp_mask: the GFP_ mask given to the slab allocator
				377	* @nr_iovecs: number of iovecs to pre-allocate
Jaak Ristioja	db18efa	2010-01-15 12:05:07 +0200	[diff] [blame]	378	* @bs: the bio_set to allocate from.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	379	*
				380	* Description:
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	381	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				382	* backed by the @bs's mempool.
				383	*
				384	* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
				385	* able to allocate a bio. This is due to the mempool guarantees. To make this
				386	* work, callers must never allocate more than 1 bio at a time from this pool.
				387	* Callers that need to allocate more than 1 bio must always submit the
				388	* previously allocated bio for IO before attempting to allocate a new one.
				389	* Failure to do so can cause deadlocks under memory pressure.
				390	*
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	391	* Note that when running under generic_make_request() (i.e. any block
				392	* driver), bios are not submitted until after you return - see the code in
				393	* generic_make_request() that converts recursion into iteration, to prevent
				394	* stack overflows.
				395	*
				396	* This would normally mean allocating multiple bios under
				397	* generic_make_request() would be susceptible to deadlocks, but we have
				398	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				399	* thread.
				400	*
				401	* However, we do not guarantee forward progress for allocations from other
				402	* mempools. Doing multiple allocations from the same mempool under
				403	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				404	* for per bio allocations.
				405	*
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	406	* RETURNS:
				407	* Pointer to new bio on success, NULL on failure.
				408	*/
Al Viro	dd0fc66	2005-10-07 07:46:04 +0100	[diff] [blame]	409	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	410	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	411	gfp_t saved_gfp = gfp_mask;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	412	unsigned front_pad;
				413	unsigned inline_vecs;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	414	unsigned long idx = BIO_POOL_NONE;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	415	struct bio_vec *bvl = NULL;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	416	struct bio *bio;
				417	void *p;
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	418
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	419	if (!bs) {
				420	if (nr_iovecs > UIO_MAXIOV)
				421	return NULL;
				422
				423	p = kmalloc(sizeof(struct bio) +
				424	nr_iovecs * sizeof(struct bio_vec),
				425	gfp_mask);
				426	front_pad = 0;
				427	inline_vecs = nr_iovecs;
				428	} else {
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	429	/*
				430	* generic_make_request() converts recursion to iteration; this
				431	* means if we're running beneath it, any bios we allocate and
				432	* submit will not be submitted (and thus freed) until after we
				433	* return.
				434	*
				435	* This exposes us to a potential deadlock if we allocate
				436	* multiple bios from the same bio_set() while running
				437	* underneath generic_make_request(). If we were to allocate
				438	* multiple bios (say a stacking block driver that was splitting
				439	* bios), we would deadlock if we exhausted the mempool's
				440	* reserve.
				441	*
				442	* We solve this, and guarantee forward progress, with a rescuer
				443	* workqueue per bio_set. If we go to allocate and there are
				444	* bios on current->bio_list, we first try the allocation
				445	* without __GFP_WAIT; if that fails, we punt those bios we
				446	* would be blocking to the rescuer workqueue before we retry
				447	* with the original gfp_flags.
				448	*/
				449
				450	if (current->bio_list && !bio_list_empty(current->bio_list))
				451	gfp_mask &= ~__GFP_WAIT;
				452
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	453	p = mempool_alloc(bs->bio_pool, gfp_mask);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	454	if (!p && gfp_mask != saved_gfp) {
				455	punt_bios_to_rescuer(bs);
				456	gfp_mask = saved_gfp;
				457	p = mempool_alloc(bs->bio_pool, gfp_mask);
				458	}
				459
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	460	front_pad = bs->front_pad;
				461	inline_vecs = BIO_INLINE_VECS;
				462	}
				463
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	464	if (unlikely(!p))
				465	return NULL;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	466
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	467	bio = p + front_pad;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	468	bio_init(bio);
				469
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	470	if (nr_iovecs > inline_vecs) {
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	471	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	472	if (!bvl && gfp_mask != saved_gfp) {
				473	punt_bios_to_rescuer(bs);
				474	gfp_mask = saved_gfp;
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	475	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	476	}
				477
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	478	if (unlikely(!bvl))
				479	goto err_free;
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	480
				481	bio->bi_flags \|= 1 << BIO_OWNS_VEC;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	482	} else if (nr_iovecs) {
				483	bvl = bio->bi_inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	484	}
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	485
				486	bio->bi_pool = bs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	487	bio->bi_flags \|= idx << BIO_POOL_OFFSET;
				488	bio->bi_max_vecs = nr_iovecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	489	bio->bi_io_vec = bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	490	return bio;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	491
				492	err_free:
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	493	mempool_free(p, bs->bio_pool);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	494	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	495	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	496	EXPORT_SYMBOL(bio_alloc_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	497
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	498	void zero_fill_bio(struct bio *bio)
				499	{
				500	unsigned long flags;
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	501	struct bio_vec bv;
				502	struct bvec_iter iter;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	503
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	504	bio_for_each_segment(bv, bio, iter) {
				505	char *data = bvec_kmap_irq(&bv, &flags);
				506	memset(data, 0, bv.bv_len);
				507	flush_dcache_page(bv.bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	508	bvec_kunmap_irq(data, &flags);
				509	}
				510	}
				511	EXPORT_SYMBOL(zero_fill_bio);
				512
				513	/**
				514	* bio_put - release a reference to a bio
				515	* @bio: bio to release reference to
				516	*
				517	* Description:
				518	* Put a reference to a &struct bio, either one you have gotten with
Alberto Bertogli	ad0bf11	2009-11-02 11:39:22 +0100	[diff] [blame]	519	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	520	**/
				521	void bio_put(struct bio *bio)
				522	{
				523	BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
				524
				525	/*
				526	* last put frees it
				527	*/
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	528	if (atomic_dec_and_test(&bio->bi_cnt))
				529	bio_free(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	530	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	531	EXPORT_SYMBOL(bio_put);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	532
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	533	inline int bio_phys_segments(struct request_queue q, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	534	{
				535	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				536	blk_recount_segments(q, bio);
				537
				538	return bio->bi_phys_segments;
				539	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	540	EXPORT_SYMBOL(bio_phys_segments);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	541
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	542	/**
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	543	* __bio_clone_fast - clone a bio that shares the original bio's biovec
				544	* @bio: destination bio
				545	* @bio_src: bio to clone
				546	*
				547	* Clone a &bio. Caller will own the returned bio, but not
				548	* the actual data it points to. Reference count of returned
				549	* bio will be one.
				550	*
				551	* Caller must ensure that @bio_src is not freed before @bio.
				552	*/
				553	void __bio_clone_fast(struct bio bio, struct bio bio_src)
				554	{
				555	BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
				556
				557	/*
				558	* most users will be overriding ->bi_bdev with a new target,
				559	* so we don't set nor calculate new physical/hw segment counts here
				560	*/
				561	bio->bi_bdev = bio_src->bi_bdev;
				562	bio->bi_flags \|= 1 << BIO_CLONED;
				563	bio->bi_rw = bio_src->bi_rw;
				564	bio->bi_iter = bio_src->bi_iter;
				565	bio->bi_io_vec = bio_src->bi_io_vec;
				566	}
				567	EXPORT_SYMBOL(__bio_clone_fast);
				568
				569	/**
				570	* bio_clone_fast - clone a bio that shares the original bio's biovec
				571	* @bio: bio to clone
				572	* @gfp_mask: allocation priority
				573	* @bs: bio_set to allocate from
				574	*
				575	* Like __bio_clone_fast, only also allocates the returned bio
				576	*/
				577	struct bio bio_clone_fast(struct bio bio, gfp_t gfp_mask, struct bio_set *bs)
				578	{
				579	struct bio *b;
				580
				581	b = bio_alloc_bioset(gfp_mask, 0, bs);
				582	if (!b)
				583	return NULL;
				584
				585	__bio_clone_fast(b, bio);
				586
				587	if (bio_integrity(bio)) {
				588	int ret;
				589
				590	ret = bio_integrity_clone(b, bio, gfp_mask);
				591
				592	if (ret < 0) {
				593	bio_put(b);
				594	return NULL;
				595	}
				596	}
				597
				598	return b;
				599	}
				600	EXPORT_SYMBOL(bio_clone_fast);
				601
				602	/**
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	603	* bio_clone_bioset - clone a bio
				604	* @bio_src: bio to clone
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	605	* @gfp_mask: allocation priority
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	606	* @bs: bio_set to allocate from
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	607	*
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	608	* Clone bio. Caller will own the returned bio, but not the actual data it
				609	* points to. Reference count of returned bio will be one.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	610	*/
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	611	struct bio bio_clone_bioset(struct bio bio_src, gfp_t gfp_mask,
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	612	struct bio_set *bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	{
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	614	unsigned nr_iovecs = 0;
				615	struct bvec_iter iter;
				616	struct bio_vec bv;
				617	struct bio *bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	618
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	619	/*
				620	* Pre immutable biovecs, __bio_clone() used to just do a memcpy from
				621	* bio_src->bi_io_vec to bio->bi_io_vec.
				622	*
				623	* We can't do that anymore, because:
				624	*
				625	* - The point of cloning the biovec is to produce a bio with a biovec
				626	* the caller can modify: bi_idx and bi_bvec_done should be 0.
				627	*
				628	* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
				629	* we tried to clone the whole thing bio_alloc_bioset() would fail.
				630	* But the clone should succeed as long as the number of biovecs we
				631	* actually need to allocate is fewer than BIO_MAX_PAGES.
				632	*
				633	* - Lastly, bi_vcnt should not be looked at or relied upon by code
				634	* that does not own the bio - reason being drivers don't use it for
				635	* iterating over the biovec anymore, so expecting it to be kept up
				636	* to date (i.e. for clones that share the parent biovec) is just
				637	* asking for trouble and would force extra work on
				638	* __bio_clone_fast() anyways.
				639	*/
				640
				641	bio_for_each_segment(bv, bio_src, iter)
				642	nr_iovecs++;
				643
				644	bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
				645	if (!bio)
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	646	return NULL;
				647
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	648	bio->bi_bdev = bio_src->bi_bdev;
				649	bio->bi_rw = bio_src->bi_rw;
				650	bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
				651	bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	652
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	653	bio_for_each_segment(bv, bio_src, iter)
				654	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				655
				656	if (bio_integrity(bio_src)) {
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	657	int ret;
				658
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	659	ret = bio_integrity_clone(bio, bio_src, gfp_mask);
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	660	if (ret < 0) {
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	661	bio_put(bio);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	662	return NULL;
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	663	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	664	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	665
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	666	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	667	}
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	668	EXPORT_SYMBOL(bio_clone_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	669
				670	/**
				671	* bio_get_nr_vecs - return approx number of vecs
				672	* @bdev: I/O target
				673	*
				674	* Return the approximate number of pages we can send to this target.
				675	* There's no guarantee that you will be able to fit this number of pages
				676	* into a bio, it does not account for dynamic restrictions that vary
				677	* on offset.
				678	*/
				679	int bio_get_nr_vecs(struct block_device *bdev)
				680	{
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	681	struct request_queue *q = bdev_get_queue(bdev);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	682	int nr_pages;
				683
				684	nr_pages = min_t(unsigned,
Kent Overstreet	5abebfd	2012-02-08 22:07:18 +0100	[diff] [blame]	685	queue_max_segments(q),
				686	queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	687
				688	return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
				689
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	690	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	691	EXPORT_SYMBOL(bio_get_nr_vecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	692
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	693	static int __bio_add_page(struct request_queue q, struct bio bio, struct page
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	694	*page, unsigned int len, unsigned int offset,
Akinobu Mita	34f2fd8	2013-11-18 22:11:42 +0900	[diff] [blame]	695	unsigned int max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	696	{
				697	int retried_segments = 0;
				698	struct bio_vec *bvec;
				699
				700	/*
				701	* cloned bio must not modify vec list
				702	*/
				703	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				704	return 0;
				705
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	706	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	707	return 0;
				708
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	709	/*
				710	* For filesystems with a blocksize smaller than the pagesize
				711	* we will often be called with the same page as last time and
				712	* a consecutive offset. Optimize this special case.
				713	*/
				714	if (bio->bi_vcnt > 0) {
				715	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				716
				717	if (page == prev->bv_page &&
				718	offset == prev->bv_offset + prev->bv_len) {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	719	unsigned int prev_bv_len = prev->bv_len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	720	prev->bv_len += len;
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	721
				722	if (q->merge_bvec_fn) {
				723	struct bvec_merge_data bvm = {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	724	/* prev_bvec is already charged in
				725	bi_size, discharge it in order to
				726	simulate merging updated prev_bvec
				727	as new bvec. */
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	728	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	729	.bi_sector = bio->bi_iter.bi_sector,
				730	.bi_size = bio->bi_iter.bi_size -
				731	prev_bv_len,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	732	.bi_rw = bio->bi_rw,
				733	};
				734
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	735	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	736	prev->bv_len -= len;
				737	return 0;
				738	}
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	739	}
				740
				741	goto done;
				742	}
				743	}
				744
				745	if (bio->bi_vcnt >= bio->bi_max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	746	return 0;
				747
				748	/*
				749	* we might lose a segment or two here, but rather that than
				750	* make this too complex.
				751	*/
				752
Martin K. Petersen	8a78362	2010-02-26 00:20:39 -0500	[diff] [blame]	753	while (bio->bi_phys_segments >= queue_max_segments(q)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	754
				755	if (retried_segments)
				756	return 0;
				757
				758	retried_segments = 1;
				759	blk_recount_segments(q, bio);
				760	}
				761
				762	/*
				763	* setup the new entry, we might clear it again later if we
				764	* cannot add the page
				765	*/
				766	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				767	bvec->bv_page = page;
				768	bvec->bv_len = len;
				769	bvec->bv_offset = offset;
				770
				771	/*
				772	* if queue has other restrictions (eg varying max sector size
				773	* depending on offset), it can specify a merge_bvec_fn in the
				774	* queue to get further control
				775	*/
				776	if (q->merge_bvec_fn) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	777	struct bvec_merge_data bvm = {
				778	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	779	.bi_sector = bio->bi_iter.bi_sector,
				780	.bi_size = bio->bi_iter.bi_size,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	781	.bi_rw = bio->bi_rw,
				782	};
				783
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	784	/*
				785	* merge_bvec_fn() returns number of bytes it can accept
				786	* at this offset
				787	*/
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	788	if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	789	bvec->bv_page = NULL;
				790	bvec->bv_len = 0;
				791	bvec->bv_offset = 0;
				792	return 0;
				793	}
				794	}
				795
				796	/* If we may be able to merge these biovecs, force a recount */
Mikulas Patocka	b8b3e16	2008-08-15 10:15:19 +0200	[diff] [blame]	797	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
				799
				800	bio->bi_vcnt++;
				801	bio->bi_phys_segments++;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	802	done:
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	803	bio->bi_iter.bi_size += len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	804	return len;
				805	}
				806
				807	/**
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	808	* bio_add_pc_page - attempt to add page to bio
Jens Axboe	fddfdea	2006-01-31 15:24:34 +0100	[diff] [blame]	809	* @q: the target queue
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	810	* @bio: destination bio
				811	* @page: page to add
				812	* @len: vec entry length
				813	* @offset: vec entry offset
				814	*
				815	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	816	* number of reasons, such as the bio being full or target block device
				817	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				818	* so it is always possible to add a single page to an empty bio.
				819	*
				820	* This should only be used by REQ_PC bios.
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	821	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	822	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page *page,
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	823	unsigned int len, unsigned int offset)
				824	{
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	825	return __bio_add_page(q, bio, page, len, offset,
				826	queue_max_hw_sectors(q));
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	827	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	828	EXPORT_SYMBOL(bio_add_pc_page);
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	829
				830	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	831	* bio_add_page - attempt to add page to bio
				832	* @bio: destination bio
				833	* @page: page to add
				834	* @len: vec entry length
				835	* @offset: vec entry offset
				836	*
				837	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	838	* number of reasons, such as the bio being full or target block device
				839	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				840	* so it is always possible to add a single page to an empty bio.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	841	*/
				842	int bio_add_page(struct bio bio, struct page page, unsigned int len,
				843	unsigned int offset)
				844	{
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	845	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	846	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	847	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	848	EXPORT_SYMBOL(bio_add_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	849
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	850	struct submit_bio_ret {
				851	struct completion event;
				852	int error;
				853	};
				854
				855	static void submit_bio_wait_endio(struct bio *bio, int error)
				856	{
				857	struct submit_bio_ret *ret = bio->bi_private;
				858
				859	ret->error = error;
				860	complete(&ret->event);
				861	}
				862
				863	/**
				864	* submit_bio_wait - submit a bio, and wait until it completes
				865	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
				866	* @bio: The &struct bio which describes the I/O
				867	*
				868	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				869	* bio_endio() on failure.
				870	*/
				871	int submit_bio_wait(int rw, struct bio *bio)
				872	{
				873	struct submit_bio_ret ret;
				874
				875	rw \|= REQ_SYNC;
				876	init_completion(&ret.event);
				877	bio->bi_private = &ret;
				878	bio->bi_end_io = submit_bio_wait_endio;
				879	submit_bio(rw, bio);
				880	wait_for_completion(&ret.event);
				881
				882	return ret.error;
				883	}
				884	EXPORT_SYMBOL(submit_bio_wait);
				885
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	886	/**
				887	* bio_advance - increment/complete a bio by some number of bytes
				888	* @bio: bio to advance
				889	* @bytes: number of bytes to complete
				890	*
				891	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				892	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				893	* be updated on the last bvec as well.
				894	*
				895	* @bio will then represent the remaining, uncompleted portion of the io.
				896	*/
				897	void bio_advance(struct bio *bio, unsigned bytes)
				898	{
				899	if (bio_integrity(bio))
				900	bio_integrity_advance(bio, bytes);
				901
Kent Overstreet	4550dd6	2013-08-07 14:26:21 -0700	[diff] [blame]	902	bio_advance_iter(bio, &bio->bi_iter, bytes);
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	903	}
				904	EXPORT_SYMBOL(bio_advance);
				905
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	906	/**
Kent Overstreet	a078760	2012-09-10 14:03:28 -0700	[diff] [blame]	907	* bio_alloc_pages - allocates a single page for each bvec in a bio
				908	* @bio: bio to allocate pages for
				909	* @gfp_mask: flags for allocation
				910	*
				911	* Allocates pages up to @bio->bi_vcnt.
				912	*
				913	* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
				914	* freed.
				915	*/
				916	int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
				917	{
				918	int i;
				919	struct bio_vec *bv;
				920
				921	bio_for_each_segment_all(bv, bio, i) {
				922	bv->bv_page = alloc_page(gfp_mask);
				923	if (!bv->bv_page) {
				924	while (--bv >= bio->bi_io_vec)
				925	__free_page(bv->bv_page);
				926	return -ENOMEM;
				927	}
				928	}
				929
				930	return 0;
				931	}
				932	EXPORT_SYMBOL(bio_alloc_pages);
				933
				934	/**
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	935	* bio_copy_data - copy contents of data buffers from one chain of bios to
				936	* another
				937	* @src: source bio list
				938	* @dst: destination bio list
				939	*
				940	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				941	* @src and @dst as linked lists of bios.
				942	*
				943	* Stops when it reaches the end of either @src or @dst - that is, copies
				944	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				945	*/
				946	void bio_copy_data(struct bio dst, struct bio src)
				947	{
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	948	struct bvec_iter src_iter, dst_iter;
				949	struct bio_vec src_bv, dst_bv;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	950	void src_p, dst_p;
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	951	unsigned bytes;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	952
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	953	src_iter = src->bi_iter;
				954	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	955
				956	while (1) {
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	957	if (!src_iter.bi_size) {
				958	src = src->bi_next;
				959	if (!src)
				960	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	961
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	962	src_iter = src->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	963	}
				964
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	965	if (!dst_iter.bi_size) {
				966	dst = dst->bi_next;
				967	if (!dst)
				968	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	969
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	970	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	971	}
				972
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	973	src_bv = bio_iter_iovec(src, src_iter);
				974	dst_bv = bio_iter_iovec(dst, dst_iter);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	975
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	976	bytes = min(src_bv.bv_len, dst_bv.bv_len);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	977
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	978	src_p = kmap_atomic(src_bv.bv_page);
				979	dst_p = kmap_atomic(dst_bv.bv_page);
				980
				981	memcpy(dst_p + dst_bv.bv_offset,
				982	src_p + src_bv.bv_offset,
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	983	bytes);
				984
				985	kunmap_atomic(dst_p);
				986	kunmap_atomic(src_p);
				987
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	988	bio_advance_iter(src, &src_iter, bytes);
				989	bio_advance_iter(dst, &dst_iter, bytes);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	990	}
				991	}
				992	EXPORT_SYMBOL(bio_copy_data);
				993
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	994	struct bio_map_data {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	995	int nr_sgvecs;
				996	int is_our_pages;
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	997	struct sg_iovec sgvecs[];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	998	};
				999
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1000	static void bio_set_map_data(struct bio_map_data bmd, struct bio bio,
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1001	struct sg_iovec *iov, int iov_count,
				1002	int is_our_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1003	{
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1004	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
				1005	bmd->nr_sgvecs = iov_count;
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1006	bmd->is_our_pages = is_our_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1007	bio->bi_private = bmd;
				1008	}
				1009
Dan Carpenter	121f099	2011-11-16 09:21:50 +0100	[diff] [blame]	1010	static struct bio_map_data *bio_alloc_map_data(int nr_segs,
				1011	unsigned int iov_count,
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1012	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1013	{
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	1014	if (iov_count > UIO_MAXIOV)
				1015	return NULL;
				1016
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1017	return kmalloc(sizeof(struct bio_map_data) +
				1018	sizeof(struct sg_iovec) * iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1019	}
				1020
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1021	static int __bio_copy_iov(struct bio bio, struct sg_iovec iov, int iov_count,
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1022	int to_user, int from_user, int do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1023	{
				1024	int ret = 0, i;
				1025	struct bio_vec *bvec;
				1026	int iov_idx = 0;
				1027	unsigned int iov_off = 0;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1028
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1029	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1030	char *bv_addr = page_address(bvec->bv_page);
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1031	unsigned int bv_len = bvec->bv_len;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1032
				1033	while (bv_len && iov_idx < iov_count) {
				1034	unsigned int bytes;
Michal Simek	0e0c621	2009-06-10 12:57:07 -0700	[diff] [blame]	1035	char __user *iov_addr;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1036
				1037	bytes = min_t(unsigned int,
				1038	iov[iov_idx].iov_len - iov_off, bv_len);
				1039	iov_addr = iov[iov_idx].iov_base + iov_off;
				1040
				1041	if (!ret) {
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1042	if (to_user)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1043	ret = copy_to_user(iov_addr, bv_addr,
				1044	bytes);
				1045
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1046	if (from_user)
				1047	ret = copy_from_user(bv_addr, iov_addr,
				1048	bytes);
				1049
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1050	if (ret)
				1051	ret = -EFAULT;
				1052	}
				1053
				1054	bv_len -= bytes;
				1055	bv_addr += bytes;
				1056	iov_addr += bytes;
				1057	iov_off += bytes;
				1058
				1059	if (iov[iov_idx].iov_len == iov_off) {
				1060	iov_idx++;
				1061	iov_off = 0;
				1062	}
				1063	}
				1064
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1065	if (do_free_page)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1066	__free_page(bvec->bv_page);
				1067	}
				1068
				1069	return ret;
				1070	}
				1071
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1072	/**
				1073	* bio_uncopy_user - finish previously mapped bio
				1074	* @bio: bio being terminated
				1075	*
				1076	* Free pages allocated from bio_copy_user() and write back data
				1077	* to user space in case of a read.
				1078	*/
				1079	int bio_uncopy_user(struct bio *bio)
				1080	{
				1081	struct bio_map_data *bmd = bio->bi_private;
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1082	struct bio_vec *bvec;
				1083	int ret = 0, i;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1084
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1085	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1086	/*
				1087	* if we're in a workqueue, the request is orphaned, so
				1088	* don't copy into a random user address space, just free.
				1089	*/
				1090	if (current->mm)
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1091	ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
				1092	bio_data_dir(bio) == READ,
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1093	0, bmd->is_our_pages);
				1094	else if (bmd->is_our_pages)
				1095	bio_for_each_segment_all(bvec, bio, i)
				1096	__free_page(bvec->bv_page);
				1097	}
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1098	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1099	bio_put(bio);
				1100	return ret;
				1101	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1102	EXPORT_SYMBOL(bio_uncopy_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1103
				1104	/**
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1105	* bio_copy_user_iov - copy user data to bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1106	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1107	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1108	* @iov: the iovec.
				1109	* @iov_count: number of elements in the iovec
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1110	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1111	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1112	*
				1113	* Prepares and returns a bio for indirect user io, bouncing data
				1114	* to/from kernel pages as necessary. Must be paired with
				1115	* call bio_uncopy_user() on io completion.
				1116	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1117	struct bio bio_copy_user_iov(struct request_queue q,
				1118	struct rq_map_data *map_data,
				1119	struct sg_iovec *iov, int iov_count,
				1120	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1121	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1122	struct bio_map_data *bmd;
				1123	struct bio_vec *bvec;
				1124	struct page *page;
				1125	struct bio *bio;
				1126	int i, ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1127	int nr_pages = 0;
				1128	unsigned int len = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1129	unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1130
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1131	for (i = 0; i < iov_count; i++) {
				1132	unsigned long uaddr;
				1133	unsigned long end;
				1134	unsigned long start;
				1135
				1136	uaddr = (unsigned long)iov[i].iov_base;
				1137	end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1138	start = uaddr >> PAGE_SHIFT;
				1139
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1140	/*
				1141	* Overflow, abort
				1142	*/
				1143	if (end < start)
				1144	return ERR_PTR(-EINVAL);
				1145
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1146	nr_pages += end - start;
				1147	len += iov[i].iov_len;
				1148	}
				1149
FUJITA Tomonori	6983872	2009-04-28 20:24:29 +0200	[diff] [blame]	1150	if (offset)
				1151	nr_pages++;
				1152
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1153	bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1154	if (!bmd)
				1155	return ERR_PTR(-ENOMEM);
				1156
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1157	ret = -ENOMEM;
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1158	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1159	if (!bio)
				1160	goto out_bmd;
				1161
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1162	if (!write_to_vm)
				1163	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1164
				1165	ret = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1166
				1167	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1168	nr_pages = 1 << map_data->page_order;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1169	i = map_data->offset / PAGE_SIZE;
				1170	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1171	while (len) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1172	unsigned int bytes = PAGE_SIZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1173
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1174	bytes -= offset;
				1175
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1176	if (bytes > len)
				1177	bytes = len;
				1178
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1179	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1180	if (i == map_data->nr_entries * nr_pages) {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1181	ret = -ENOMEM;
				1182	break;
				1183	}
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1184
				1185	page = map_data->pages[i / nr_pages];
				1186	page += (i % nr_pages);
				1187
				1188	i++;
				1189	} else {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1190	page = alloc_page(q->bounce_gfp \| gfp_mask);
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1191	if (!page) {
				1192	ret = -ENOMEM;
				1193	break;
				1194	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1195	}
				1196
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1197	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1198	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1199
				1200	len -= bytes;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1201	offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1202	}
				1203
				1204	if (ret)
				1205	goto cleanup;
				1206
				1207	/*
				1208	* success
				1209	*/
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1210	if ((!write_to_vm && (!map_data \|\| !map_data->null_mapped)) \|\|
				1211	(map_data && map_data->from_user)) {
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1212	ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1213	if (ret)
				1214	goto cleanup;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1215	}
				1216
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1217	bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1218	return bio;
				1219	cleanup:
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1220	if (!map_data)
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1221	bio_for_each_segment_all(bvec, bio, i)
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1222	__free_page(bvec->bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1223
				1224	bio_put(bio);
				1225	out_bmd:
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1226	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1227	return ERR_PTR(ret);
				1228	}
				1229
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1230	/**
				1231	* bio_copy_user - copy user data to bio
				1232	* @q: destination block queue
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1233	* @map_data: pointer to the rq_map_data holding pages (if necessary)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1234	* @uaddr: start of user address
				1235	* @len: length in bytes
				1236	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1237	* @gfp_mask: memory allocation flags
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1238	*
				1239	* Prepares and returns a bio for indirect user io, bouncing data
				1240	* to/from kernel pages as necessary. Must be paired with
				1241	* call bio_uncopy_user() on io completion.
				1242	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1243	struct bio bio_copy_user(struct request_queue q, struct rq_map_data *map_data,
				1244	unsigned long uaddr, unsigned int len,
				1245	int write_to_vm, gfp_t gfp_mask)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1246	{
				1247	struct sg_iovec iov;
				1248
				1249	iov.iov_base = (void __user *)uaddr;
				1250	iov.iov_len = len;
				1251
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1252	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1253	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1254	EXPORT_SYMBOL(bio_copy_user);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1255
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1256	static struct bio __bio_map_user_iov(struct request_queue q,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1257	struct block_device *bdev,
				1258	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1259	int write_to_vm, gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1260	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1261	int i, j;
				1262	int nr_pages = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1263	struct page **pages;
				1264	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1265	int cur_page = 0;
				1266	int ret, offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1267
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1268	for (i = 0; i < iov_count; i++) {
				1269	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1270	unsigned long len = iov[i].iov_len;
				1271	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1272	unsigned long start = uaddr >> PAGE_SHIFT;
				1273
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1274	/*
				1275	* Overflow, abort
				1276	*/
				1277	if (end < start)
				1278	return ERR_PTR(-EINVAL);
				1279
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1280	nr_pages += end - start;
				1281	/*
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1282	* buffer must be aligned to at least hardsector size for now
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1283	*/
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1284	if (uaddr & queue_dma_alignment(q))
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1285	return ERR_PTR(-EINVAL);
				1286	}
				1287
				1288	if (!nr_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1289	return ERR_PTR(-EINVAL);
				1290
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1291	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1292	if (!bio)
				1293	return ERR_PTR(-ENOMEM);
				1294
				1295	ret = -ENOMEM;
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1296	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1297	if (!pages)
				1298	goto out;
				1299
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1300	for (i = 0; i < iov_count; i++) {
				1301	unsigned long uaddr = (unsigned long)iov[i].iov_base;
				1302	unsigned long len = iov[i].iov_len;
				1303	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1304	unsigned long start = uaddr >> PAGE_SHIFT;
				1305	const int local_nr_pages = end - start;
				1306	const int page_limit = cur_page + local_nr_pages;
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1307
Nick Piggin	f5dd33c	2008-07-25 19:45:25 -0700	[diff] [blame]	1308	ret = get_user_pages_fast(uaddr, local_nr_pages,
				1309	write_to_vm, &pages[cur_page]);
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1310	if (ret < local_nr_pages) {
				1311	ret = -EFAULT;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1312	goto out_unmap;
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1313	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1314
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1315	offset = uaddr & ~PAGE_MASK;
				1316	for (j = cur_page; j < page_limit; j++) {
				1317	unsigned int bytes = PAGE_SIZE - offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1318
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1319	if (len <= 0)
				1320	break;
				1321
				1322	if (bytes > len)
				1323	bytes = len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1324
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1325	/*
				1326	* sorry...
				1327	*/
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1328	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1329	bytes)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1330	break;
				1331
				1332	len -= bytes;
				1333	offset = 0;
				1334	}
				1335
				1336	cur_page = j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1337	/*
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1338	* release the pages we didn't map into the bio, if any
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1339	*/
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1340	while (j < page_limit)
				1341	page_cache_release(pages[j++]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1342	}
				1343
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1344	kfree(pages);
				1345
				1346	/*
				1347	* set data direction, and check if mapped pages need bouncing
				1348	*/
				1349	if (!write_to_vm)
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1350	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1351
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1352	bio->bi_bdev = bdev;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1353	bio->bi_flags \|= (1 << BIO_USER_MAPPED);
				1354	return bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1355
				1356	out_unmap:
				1357	for (i = 0; i < nr_pages; i++) {
				1358	if(!pages[i])
				1359	break;
				1360	page_cache_release(pages[i]);
				1361	}
				1362	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1363	kfree(pages);
				1364	bio_put(bio);
				1365	return ERR_PTR(ret);
				1366	}
				1367
				1368	/**
				1369	* bio_map_user - map user address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1370	* @q: the struct request_queue for the bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1371	* @bdev: destination block device
				1372	* @uaddr: start of user address
				1373	* @len: length in bytes
				1374	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1375	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1376	*
				1377	* Map the user space address into a bio suitable for io to a block
				1378	* device. Returns an error pointer in case of error.
				1379	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1380	struct bio bio_map_user(struct request_queue q, struct block_device *bdev,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1381	unsigned long uaddr, unsigned int len, int write_to_vm,
				1382	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1383	{
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1384	struct sg_iovec iov;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1385
viro@ZenIV.linux.org.uk	3f70353	2005-09-09 16:53:56 +0100	[diff] [blame]	1386	iov.iov_base = (void __user *)uaddr;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1387	iov.iov_len = len;
				1388
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1389	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1390	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1391	EXPORT_SYMBOL(bio_map_user);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1392
				1393	/**
				1394	* bio_map_user_iov - map user sg_iovec table into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1395	* @q: the struct request_queue for the bio
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1396	* @bdev: destination block device
				1397	* @iov: the iovec.
				1398	* @iov_count: number of elements in the iovec
				1399	* @write_to_vm: bool indicating writing to pages or not
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1400	* @gfp_mask: memory allocation flags
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1401	*
				1402	* Map the user space address into a bio suitable for io to a block
				1403	* device. Returns an error pointer in case of error.
				1404	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1405	struct bio bio_map_user_iov(struct request_queue q, struct block_device *bdev,
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1406	struct sg_iovec *iov, int iov_count,
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1407	int write_to_vm, gfp_t gfp_mask)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1408	{
				1409	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1410
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1411	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
				1412	gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1413	if (IS_ERR(bio))
				1414	return bio;
				1415
				1416	/*
				1417	* subtle -- if __bio_map_user() ended up bouncing a bio,
				1418	* it would normally disappear when its bi_end_io is run.
				1419	* however, we need it for the unmap, so grab an extra
				1420	* reference to it
				1421	*/
				1422	bio_get(bio);
				1423
Mike Christie	0e75f90	2006-12-01 10:40:55 +0100	[diff] [blame]	1424	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1425	}
				1426
				1427	static void __bio_unmap_user(struct bio *bio)
				1428	{
				1429	struct bio_vec *bvec;
				1430	int i;
				1431
				1432	/*
				1433	* make sure we dirty pages we wrote to
				1434	*/
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1435	bio_for_each_segment_all(bvec, bio, i) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1436	if (bio_data_dir(bio) == READ)
				1437	set_page_dirty_lock(bvec->bv_page);
				1438
				1439	page_cache_release(bvec->bv_page);
				1440	}
				1441
				1442	bio_put(bio);
				1443	}
				1444
				1445	/**
				1446	* bio_unmap_user - unmap a bio
				1447	* @bio: the bio being unmapped
				1448	*
				1449	* Unmap a bio previously mapped by bio_map_user(). Must be called with
				1450	* a process context.
				1451	*
				1452	* bio_unmap_user() may sleep.
				1453	*/
				1454	void bio_unmap_user(struct bio *bio)
				1455	{
				1456	__bio_unmap_user(bio);
				1457	bio_put(bio);
				1458	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1459	EXPORT_SYMBOL(bio_unmap_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1460
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1461	static void bio_map_kern_endio(struct bio *bio, int err)
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1462	{
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1463	bio_put(bio);
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1464	}
				1465
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1466	static struct bio __bio_map_kern(struct request_queue q, void *data,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1467	unsigned int len, gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1468	{
				1469	unsigned long kaddr = (unsigned long)data;
				1470	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1471	unsigned long start = kaddr >> PAGE_SHIFT;
				1472	const int nr_pages = end - start;
				1473	int offset, i;
				1474	struct bio *bio;
				1475
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1476	bio = bio_kmalloc(gfp_mask, nr_pages);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1477	if (!bio)
				1478	return ERR_PTR(-ENOMEM);
				1479
				1480	offset = offset_in_page(kaddr);
				1481	for (i = 0; i < nr_pages; i++) {
				1482	unsigned int bytes = PAGE_SIZE - offset;
				1483
				1484	if (len <= 0)
				1485	break;
				1486
				1487	if (bytes > len)
				1488	bytes = len;
				1489
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1490	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
				1491	offset) < bytes)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1492	break;
				1493
				1494	data += bytes;
				1495	len -= bytes;
				1496	offset = 0;
				1497	}
				1498
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1499	bio->bi_end_io = bio_map_kern_endio;
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1500	return bio;
				1501	}
				1502
				1503	/**
				1504	* bio_map_kern - map kernel address into bio
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1505	* @q: the struct request_queue for the bio
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1506	* @data: pointer to buffer to map
				1507	* @len: length in bytes
				1508	* @gfp_mask: allocation flags for bio allocation
				1509	*
				1510	* Map the kernel address into a bio suitable for io to a block
				1511	* device. Returns an error pointer in case of error.
				1512	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	1513	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1514	gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1515	{
				1516	struct bio *bio;
				1517
				1518	bio = __bio_map_kern(q, data, len, gfp_mask);
				1519	if (IS_ERR(bio))
				1520	return bio;
				1521
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1522	if (bio->bi_iter.bi_size == len)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1523	return bio;
				1524
				1525	/*
				1526	* Don't support partial mappings.
				1527	*/
				1528	bio_put(bio);
				1529	return ERR_PTR(-EINVAL);
				1530	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1531	EXPORT_SYMBOL(bio_map_kern);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1532
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1533	static void bio_copy_kern_endio(struct bio *bio, int err)
				1534	{
				1535	struct bio_vec *bvec;
				1536	const int read = bio_data_dir(bio) == READ;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1537	struct bio_map_data *bmd = bio->bi_private;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1538	int i;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1539	char *p = bmd->sgvecs[0].iov_base;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1540
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1541	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1542	char *addr = page_address(bvec->bv_page);
				1543
Tejun Heo	4fc981e	2009-05-19 18:33:06 +0900	[diff] [blame]	1544	if (read)
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1545	memcpy(p, addr, bvec->bv_len);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1546
				1547	__free_page(bvec->bv_page);
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1548	p += bvec->bv_len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1549	}
				1550
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1551	kfree(bmd);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1552	bio_put(bio);
				1553	}
				1554
				1555	/**
				1556	* bio_copy_kern - copy kernel address into bio
				1557	* @q: the struct request_queue for the bio
				1558	* @data: pointer to buffer to copy
				1559	* @len: length in bytes
				1560	* @gfp_mask: allocation flags for bio and page allocation
Randy Dunlap	ffee025	2008-04-30 09:08:54 +0200	[diff] [blame]	1561	* @reading: data direction is READ
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1562	*
				1563	* copy the kernel address into a bio suitable for io to a block
				1564	* device. Returns an error pointer in case of error.
				1565	*/
				1566	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1567	gfp_t gfp_mask, int reading)
				1568	{
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1569	struct bio *bio;
				1570	struct bio_vec *bvec;
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1571	int i;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1572
FUJITA Tomonori	4d8ab62	2008-08-28 15:05:57 +0900	[diff] [blame]	1573	bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
				1574	if (IS_ERR(bio))
				1575	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1576
				1577	if (!reading) {
				1578	void *p = data;
				1579
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1580	bio_for_each_segment_all(bvec, bio, i) {
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1581	char *addr = page_address(bvec->bv_page);
				1582
				1583	memcpy(addr, p, bvec->bv_len);
				1584	p += bvec->bv_len;
				1585	}
				1586	}
				1587
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1588	bio->bi_end_io = bio_copy_kern_endio;
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1589
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1590	return bio;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1591	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1592	EXPORT_SYMBOL(bio_copy_kern);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1593
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1594	/*
				1595	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1596	* for performing direct-IO in BIOs.
				1597	*
				1598	* The problem is that we cannot run set_page_dirty() from interrupt context
				1599	* because the required locks are not interrupt-safe. So what we can do is to
				1600	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1601	* check that the pages are still dirty. If so, fine. If not, redirty them
				1602	* in process context.
				1603	*
				1604	* We special-case compound pages here: normally this means reads into hugetlb
				1605	* pages. The logic in here doesn't really work right for compound pages
				1606	* because the VM does not uniformly chase down the head page in all cases.
				1607	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1608	* handle them at all. So we skip compound pages here at an early stage.
				1609	*
				1610	* Note that this code is very hard to test under normal circumstances because
				1611	* direct-io pins the pages with get_user_pages(). This makes
				1612	* is_page_cache_freeable return false, and the VM will not clean the pages.
Artem Bityutskiy	0d5c3eb	2012-07-25 18:12:08 +0300	[diff] [blame]	1613	* But other code (eg, flusher threads) could clean the pages if they are mapped
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1614	* pagecache.
				1615	*
				1616	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1617	* deferred bio dirtying paths.
				1618	*/
				1619
				1620	/*
				1621	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1622	*/
				1623	void bio_set_pages_dirty(struct bio *bio)
				1624	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1625	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1626	int i;
				1627
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1628	bio_for_each_segment_all(bvec, bio, i) {
				1629	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1630
				1631	if (page && !PageCompound(page))
				1632	set_page_dirty_lock(page);
				1633	}
				1634	}
				1635
Adrian Bunk	86b6c7a	2008-02-18 13:48:32 +0100	[diff] [blame]	1636	static void bio_release_pages(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1637	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1638	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1639	int i;
				1640
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1641	bio_for_each_segment_all(bvec, bio, i) {
				1642	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1643
				1644	if (page)
				1645	put_page(page);
				1646	}
				1647	}
				1648
				1649	/*
				1650	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1651	* If they are, then fine. If, however, some pages are clean then they must
				1652	* have been written out during the direct-IO read. So we take another ref on
				1653	* the BIO and the offending pages and re-dirty the pages in process context.
				1654	*
				1655	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1656	* here on. It will run one page_cache_release() against each page and will
				1657	* run one bio_put() against the BIO.
				1658	*/
				1659
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1660	static void bio_dirty_fn(struct work_struct *work);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1661
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1662	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1663	static DEFINE_SPINLOCK(bio_dirty_lock);
				1664	static struct bio *bio_dirty_list;
				1665
				1666	/*
				1667	* This runs in process context
				1668	*/
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1669	static void bio_dirty_fn(struct work_struct *work)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1670	{
				1671	unsigned long flags;
				1672	struct bio *bio;
				1673
				1674	spin_lock_irqsave(&bio_dirty_lock, flags);
				1675	bio = bio_dirty_list;
				1676	bio_dirty_list = NULL;
				1677	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1678
				1679	while (bio) {
				1680	struct bio *next = bio->bi_private;
				1681
				1682	bio_set_pages_dirty(bio);
				1683	bio_release_pages(bio);
				1684	bio_put(bio);
				1685	bio = next;
				1686	}
				1687	}
				1688
				1689	void bio_check_pages_dirty(struct bio *bio)
				1690	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1691	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1692	int nr_clean_pages = 0;
				1693	int i;
				1694
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1695	bio_for_each_segment_all(bvec, bio, i) {
				1696	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1697
				1698	if (PageDirty(page) \|\| PageCompound(page)) {
				1699	page_cache_release(page);
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1700	bvec->bv_page = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1701	} else {
				1702	nr_clean_pages++;
				1703	}
				1704	}
				1705
				1706	if (nr_clean_pages) {
				1707	unsigned long flags;
				1708
				1709	spin_lock_irqsave(&bio_dirty_lock, flags);
				1710	bio->bi_private = bio_dirty_list;
				1711	bio_dirty_list = bio;
				1712	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1713	schedule_work(&bio_dirty_work);
				1714	} else {
				1715	bio_put(bio);
				1716	}
				1717	}
				1718
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1719	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1720	void bio_flush_dcache_pages(struct bio *bi)
				1721	{
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1722	struct bio_vec bvec;
				1723	struct bvec_iter iter;
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1724
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1725	bio_for_each_segment(bvec, bi, iter)
				1726	flush_dcache_page(bvec.bv_page);
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1727	}
				1728	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1729	#endif
				1730
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1731	/**
				1732	* bio_endio - end I/O on a bio
				1733	* @bio: bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1734	* @error: error, if any
				1735	*
				1736	* Description:
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1737	* bio_endio() will end I/O on the whole bio. bio_endio() is the
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1738	* preferred way to end I/O on a bio, it takes care of clearing
				1739	* BIO_UPTODATE on error. @error is 0 on success, and and one of the
				1740	* established -Exxxx (-EIO, for instance) error values in case
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1741	* something went wrong. No one should call bi_end_io() directly on a
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1742	* bio unless they own it and thus know that it has an end_io
				1743	* function.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1744	**/
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1745	void bio_endio(struct bio *bio, int error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1746	{
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1747	while (bio) {
				1748	BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1749
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1750	if (error)
				1751	clear_bit(BIO_UPTODATE, &bio->bi_flags);
				1752	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				1753	error = -EIO;
				1754
				1755	if (!atomic_dec_and_test(&bio->bi_remaining))
				1756	return;
				1757
				1758	/*
				1759	* Need to have a real endio function for chained bios,
				1760	* otherwise various corner cases will break (like stacking
				1761	* block devices that save/restore bi_end_io) - however, we want
				1762	* to avoid unbounded recursion and blowing the stack. Tail call
				1763	* optimization would handle this, but compiling with frame
				1764	* pointers also disables gcc's sibling call optimization.
				1765	*/
				1766	if (bio->bi_end_io == bio_chain_endio) {
				1767	struct bio *parent = bio->bi_private;
				1768	bio_put(bio);
				1769	bio = parent;
				1770	} else {
				1771	if (bio->bi_end_io)
				1772	bio->bi_end_io(bio, error);
				1773	bio = NULL;
				1774	}
				1775	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1776	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1777	EXPORT_SYMBOL(bio_endio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1778
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1779	/**
				1780	* bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
				1781	* @bio: bio
				1782	* @error: error, if any
				1783	*
				1784	* For code that has saved and restored bi_end_io; thing hard before using this
				1785	* function, probably you should've cloned the entire bio.
				1786	**/
				1787	void bio_endio_nodec(struct bio *bio, int error)
				1788	{
				1789	atomic_inc(&bio->bi_remaining);
				1790	bio_endio(bio, error);
				1791	}
				1792	EXPORT_SYMBOL(bio_endio_nodec);
				1793
Kent Overstreet	20d0189	2013-11-23 18:21:01 -0800	[diff] [blame]	1794	/**
				1795	* bio_split - split a bio
				1796	* @bio: bio to split
				1797	* @sectors: number of sectors to split from the front of @bio
				1798	* @gfp: gfp mask
				1799	* @bs: bio set to allocate from
				1800	*
				1801	* Allocates and returns a new bio which represents @sectors from the start of
				1802	* @bio, and updates @bio to represent the remaining sectors.
				1803	*
				1804	* The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
				1805	* responsibility to ensure that @bio is not freed before the split.
				1806	*/
				1807	struct bio bio_split(struct bio bio, int sectors,
				1808	gfp_t gfp, struct bio_set *bs)
				1809	{
				1810	struct bio *split = NULL;
				1811
				1812	BUG_ON(sectors <= 0);
				1813	BUG_ON(sectors >= bio_sectors(bio));
				1814
				1815	split = bio_clone_fast(bio, gfp, bs);
				1816	if (!split)
				1817	return NULL;
				1818
				1819	split->bi_iter.bi_size = sectors << 9;
				1820
				1821	if (bio_integrity(split))
				1822	bio_integrity_trim(split, 0, sectors);
				1823
				1824	bio_advance(bio, split->bi_iter.bi_size);
				1825
				1826	return split;
				1827	}
				1828	EXPORT_SYMBOL(bio_split);
				1829
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1830	/**
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1831	* bio_trim - trim a bio
				1832	* @bio: bio to trim
				1833	* @offset: number of sectors to trim from the front of @bio
				1834	* @size: size we want to trim @bio to, in sectors
				1835	*/
				1836	void bio_trim(struct bio *bio, int offset, int size)
				1837	{
				1838	/* 'bio' is a cloned bio which we need to trim to match
				1839	* the given offset and size.
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1840	*/
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1841
				1842	size <<= 9;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1843	if (offset == 0 && size == bio->bi_iter.bi_size)
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1844	return;
				1845
				1846	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
				1847
				1848	bio_advance(bio, offset << 9);
				1849
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1850	bio->bi_iter.bi_size = size;
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1851	}
				1852	EXPORT_SYMBOL_GPL(bio_trim);
				1853
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1854	/*
				1855	* create memory pools for biovec's in a bio_set.
				1856	* use the global biovec slabs created for general use.
				1857	*/
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1858	mempool_t biovec_create_pool(struct bio_set bs, int pool_entries)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1859	{
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	1860	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1861
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1862	return mempool_create_slab_pool(pool_entries, bp->slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1863	}
				1864
				1865	void bioset_free(struct bio_set *bs)
				1866	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1867	if (bs->rescue_workqueue)
				1868	destroy_workqueue(bs->rescue_workqueue);
				1869
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1870	if (bs->bio_pool)
				1871	mempool_destroy(bs->bio_pool);
				1872
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1873	if (bs->bvec_pool)
				1874	mempool_destroy(bs->bvec_pool);
				1875
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1876	bioset_integrity_free(bs);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1877	bio_put_slab(bs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1878
				1879	kfree(bs);
				1880	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1881	EXPORT_SYMBOL(bioset_free);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1882
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1883	/**
				1884	* bioset_create - Create a bio_set
				1885	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1886	* @front_pad: Number of bytes to allocate in front of the returned bio
				1887	*
				1888	* Description:
				1889	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1890	* to ask for a number of bytes to be allocated in front of the bio.
				1891	* Front pad allocation is useful for embedding the bio inside
				1892	* another structure, to avoid allocating extra data to go with the bio.
				1893	* Note that the bio must be embedded at the END of that structure always,
				1894	* or things will break badly.
				1895	*/
				1896	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1897	{
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1898	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1899	struct bio_set *bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1900
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1901	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1902	if (!bs)
				1903	return NULL;
				1904
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1905	bs->front_pad = front_pad;
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1906
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1907	spin_lock_init(&bs->rescue_lock);
				1908	bio_list_init(&bs->rescue_list);
				1909	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1910
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1911	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1912	if (!bs->bio_slab) {
				1913	kfree(bs);
				1914	return NULL;
				1915	}
				1916
				1917	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1918	if (!bs->bio_pool)
				1919	goto bad;
				1920
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1921	bs->bvec_pool = biovec_create_pool(bs, pool_size);
				1922	if (!bs->bvec_pool)
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1923	goto bad;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1924
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1925	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1926	if (!bs->rescue_workqueue)
				1927	goto bad;
				1928
				1929	return bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1930	bad:
				1931	bioset_free(bs);
				1932	return NULL;
				1933	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1934	EXPORT_SYMBOL(bioset_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1935
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	1936	#ifdef CONFIG_BLK_CGROUP
				1937	/**
				1938	* bio_associate_current - associate a bio with %current
				1939	* @bio: target bio
				1940	*
				1941	* Associate @bio with %current if it hasn't been associated yet. Block
				1942	* layer will treat @bio as if it were issued by %current no matter which
				1943	* task actually issues it.
				1944	*
				1945	* This function takes an extra reference of @task's io_context and blkcg
				1946	* which will be put when @bio is released. The caller must own @bio,
				1947	* ensure %current->io_context exists, and is responsible for synchronizing
				1948	* calls to this function.
				1949	*/
				1950	int bio_associate_current(struct bio *bio)
				1951	{
				1952	struct io_context *ioc;
				1953	struct cgroup_subsys_state *css;
				1954
				1955	if (bio->bi_ioc)
				1956	return -EBUSY;
				1957
				1958	ioc = current->io_context;
				1959	if (!ioc)
				1960	return -ENOENT;
				1961
				1962	/* acquire active ref on @ioc and associate */
				1963	get_io_context_active(ioc);
				1964	bio->bi_ioc = ioc;
				1965
				1966	/* associate blkcg if exists */
				1967	rcu_read_lock();
Tejun Heo	8af01f5	2013-08-08 20:11:22 -0400	[diff] [blame]	1968	css = task_css(current, blkio_subsys_id);
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	1969	if (css && css_tryget(css))
				1970	bio->bi_css = css;
				1971	rcu_read_unlock();
				1972
				1973	return 0;
				1974	}
				1975
				1976	/**
				1977	* bio_disassociate_task - undo bio_associate_current()
				1978	* @bio: target bio
				1979	*/
				1980	void bio_disassociate_task(struct bio *bio)
				1981	{
				1982	if (bio->bi_ioc) {
				1983	put_io_context(bio->bi_ioc);
				1984	bio->bi_ioc = NULL;
				1985	}
				1986	if (bio->bi_css) {
				1987	css_put(bio->bi_css);
				1988	bio->bi_css = NULL;
				1989	}
				1990	}
				1991
				1992	#endif /* CONFIG_BLK_CGROUP */
				1993
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1994	static void __init biovec_init_slabs(void)
				1995	{
				1996	int i;
				1997
				1998	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
				1999	int size;
				2000	struct biovec_slab *bvs = bvec_slabs + i;
				2001
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2002	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2003	bvs->slab = NULL;
				2004	continue;
				2005	}
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2006
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2007	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2008	bvs->slab = kmem_cache_create(bvs->name, size, 0,
Paul Mundt	20c2df8	2007-07-20 10:11:58 +0900	[diff] [blame]	2009	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2010	}
				2011	}
				2012
				2013	static int __init init_bio(void)
				2014	{
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2015	bio_slab_max = 2;
				2016	bio_slab_nr = 0;
				2017	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				2018	if (!bio_slabs)
				2019	panic("bio: can't allocate bios\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2020
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	2021	bio_integrity_init();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2022	biovec_init_slabs();
				2023
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2024	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2025	if (!fs_bio_set)
				2026	panic("bio: can't allocate bios\n");
				2027
Martin K. Petersen	a91a278	2011-03-17 11:11:05 +0100	[diff] [blame]	2028	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				2029	panic("bio: can't create integrity pool\n");
				2030
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2031	return 0;
				2032	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2033	subsys_initcall(init_bio);