Blame - fs/btrfs/raid56.c - SHIFTPHONES/kernel/shift/mainline

blob: 7dd55448ac6836666408dab7658c99acfe1afa25 [file] [log] [blame]

David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 Fusion-io All rights reserved.
				3	* Copyright (C) 2012 Intel Corp. All rights reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public
				7	* License v2 as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				12	* General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public
				15	* License along with this program; if not, write to the
				16	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				17	* Boston, MA 021110-1307, USA.
				18	*/
				19	#include <linux/sched.h>
				20	#include <linux/wait.h>
				21	#include <linux/bio.h>
				22	#include <linux/slab.h>
				23	#include <linux/buffer_head.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/random.h>
				26	#include <linux/iocontext.h>
				27	#include <linux/capability.h>
				28	#include <linux/ratelimit.h>
				29	#include <linux/kthread.h>
				30	#include <linux/raid/pq.h>
				31	#include <linux/hash.h>
				32	#include <linux/list_sort.h>
				33	#include <linux/raid/xor.h>
David Sterba	818e010	2017-05-31 18:40:02 +0200	[diff] [blame]	34	#include <linux/mm.h>
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	35	#include <asm/div64.h>
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	36	#include "ctree.h"
				37	#include "extent_map.h"
				38	#include "disk-io.h"
				39	#include "transaction.h"
				40	#include "print-tree.h"
				41	#include "volumes.h"
				42	#include "raid56.h"
				43	#include "async-thread.h"
				44	#include "check-integrity.h"
				45	#include "rcu-string.h"
				46
				47	/* set when additional merges to this rbio are not allowed */
				48	#define RBIO_RMW_LOCKED_BIT 1
				49
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	50	/*
				51	* set when this rbio is sitting in the hash, but it is just a cache
				52	* of past RMW
				53	*/
				54	#define RBIO_CACHE_BIT 2
				55
				56	/*
				57	* set when it is safe to trust the stripe_pages for caching
				58	*/
				59	#define RBIO_CACHE_READY_BIT 3
				60
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	61	#define RBIO_CACHE_SIZE 1024
				62
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	63	enum btrfs_rbio_ops {
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	64	BTRFS_RBIO_WRITE,
				65	BTRFS_RBIO_READ_REBUILD,
				66	BTRFS_RBIO_PARITY_SCRUB,
				67	BTRFS_RBIO_REBUILD_MISSING,
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	68	};
				69
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	70	struct btrfs_raid_bio {
				71	struct btrfs_fs_info *fs_info;
				72	struct btrfs_bio *bbio;
				73
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	74	/* while we're doing rmw on a stripe
				75	* we put it into a hash table so we can
				76	* lock the stripe and merge more rbios
				77	* into it.
				78	*/
				79	struct list_head hash_list;
				80
				81	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	82	* LRU list for the stripe cache
				83	*/
				84	struct list_head stripe_cache;
				85
				86	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	87	* for scheduling work in the helper threads
				88	*/
				89	struct btrfs_work work;
				90
				91	/*
				92	* bio list and bio_list_lock are used
				93	* to add more bios into the stripe
				94	* in hopes of avoiding the full rmw
				95	*/
				96	struct bio_list bio_list;
				97	spinlock_t bio_list_lock;
				98
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	99	/* also protected by the bio_list_lock, the
				100	* plug list is used by the plugging code
				101	* to collect partial bios while plugged. The
				102	* stripe locking code also uses it to hand off
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	103	* the stripe lock to the next pending IO
				104	*/
				105	struct list_head plug_list;
				106
				107	/*
				108	* flags that tell us if it is safe to
				109	* merge with this bio
				110	*/
				111	unsigned long flags;
				112
				113	/* size of each individual stripe on disk */
				114	int stripe_len;
				115
				116	/* number of data stripes (no p/q) */
				117	int nr_data;
				118
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	119	int real_stripes;
				120
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	121	int stripe_npages;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	122	/*
				123	* set if we're doing a parity rebuild
				124	* for a read from higher up, which is handled
				125	* differently from a parity rebuild as part of
				126	* rmw
				127	*/
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	128	enum btrfs_rbio_ops operation;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	129
				130	/* first bad stripe */
				131	int faila;
				132
				133	/* second bad stripe (for raid6 use) */
				134	int failb;
				135
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	136	int scrubp;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	137	/*
				138	* number of pages needed to represent the full
				139	* stripe
				140	*/
				141	int nr_pages;
				142
				143	/*
				144	* size of all the bios in the bio_list. This
				145	* helps us decide if the rbio maps to a full
				146	* stripe or not
				147	*/
				148	int bio_list_bytes;
				149
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	150	int generic_bio_cnt;
				151
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	152	refcount_t refs;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	153
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	154	atomic_t stripes_pending;
				155
				156	atomic_t error;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	157	/*
				158	* these are two arrays of pointers. We allocate the
				159	* rbio big enough to hold them both and setup their
				160	* locations when the rbio is allocated
				161	*/
				162
				163	/* pointers to pages that we allocated for
				164	* reading/writing stripes directly from the disk (including P/Q)
				165	*/
				166	struct page **stripe_pages;
				167
				168	/*
				169	* pointers to the pages in the bio_list. Stored
				170	* here for faster lookup
				171	*/
				172	struct page **bio_pages;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	173
				174	/*
				175	* bitmap to record which horizontal stripe has data
				176	*/
				177	unsigned long *dbitmap;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	178	};
				179
				180	static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
				181	static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
				182	static void rmw_work(struct btrfs_work *work);
				183	static void read_rebuild_work(struct btrfs_work *work);
				184	static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
				185	static void async_read_rebuild(struct btrfs_raid_bio *rbio);
				186	static int fail_bio_stripe(struct btrfs_raid_bio rbio, struct bio bio);
				187	static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
				188	static void __free_raid_bio(struct btrfs_raid_bio *rbio);
				189	static void index_rbio_pages(struct btrfs_raid_bio *rbio);
				190	static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
				191
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	192	static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
				193	int need_check);
				194	static void async_scrub_parity(struct btrfs_raid_bio *rbio);
				195
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	196	/*
				197	* the stripe hash table is used for locking, and to collect
				198	* bios in hopes of making a full stripe
				199	*/
				200	int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
				201	{
				202	struct btrfs_stripe_hash_table *table;
				203	struct btrfs_stripe_hash_table *x;
				204	struct btrfs_stripe_hash *cur;
				205	struct btrfs_stripe_hash *h;
				206	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
				207	int i;
David Sterba	83c8266	2013-03-01 15:03:00 +0000	[diff] [blame]	208	int table_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	209
				210	if (info->stripe_hash_table)
				211	return 0;
				212
David Sterba	83c8266	2013-03-01 15:03:00 +0000	[diff] [blame]	213	/*
				214	* The table is large, starting with order 4 and can go as high as
				215	* order 7 in case lock debugging is turned on.
				216	*
				217	* Try harder to allocate and fallback to vmalloc to lower the chance
				218	* of a failing mount.
				219	*/
				220	table_size = sizeof(table) + sizeof(h) * num_entries;
David Sterba	818e010	2017-05-31 18:40:02 +0200	[diff] [blame]	221	table = kvzalloc(table_size, GFP_KERNEL);
				222	if (!table)
				223	return -ENOMEM;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	224
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	225	spin_lock_init(&table->cache_lock);
				226	INIT_LIST_HEAD(&table->stripe_cache);
				227
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	228	h = table->table;
				229
				230	for (i = 0; i < num_entries; i++) {
				231	cur = h + i;
				232	INIT_LIST_HEAD(&cur->hash_list);
				233	spin_lock_init(&cur->lock);
				234	init_waitqueue_head(&cur->wait);
				235	}
				236
				237	x = cmpxchg(&info->stripe_hash_table, NULL, table);
Wang Shilong	f749303	2014-11-22 21:13:10 +0800	[diff] [blame]	238	if (x)
				239	kvfree(x);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	240	return 0;
				241	}
				242
				243	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	244	* caching an rbio means to copy anything from the
				245	* bio_pages array into the stripe_pages array. We
				246	* use the page uptodate bit in the stripe cache array
				247	* to indicate if it has valid data
				248	*
				249	* once the caching is done, we set the cache ready
				250	* bit.
				251	*/
				252	static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
				253	{
				254	int i;
				255	char *s;
				256	char *d;
				257	int ret;
				258
				259	ret = alloc_rbio_pages(rbio);
				260	if (ret)
				261	return;
				262
				263	for (i = 0; i < rbio->nr_pages; i++) {
				264	if (!rbio->bio_pages[i])
				265	continue;
				266
				267	s = kmap(rbio->bio_pages[i]);
				268	d = kmap(rbio->stripe_pages[i]);
				269
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	270	memcpy(d, s, PAGE_SIZE);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	271
				272	kunmap(rbio->bio_pages[i]);
				273	kunmap(rbio->stripe_pages[i]);
				274	SetPageUptodate(rbio->stripe_pages[i]);
				275	}
				276	set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				277	}
				278
				279	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	280	* we hash on the first logical address of the stripe
				281	*/
				282	static int rbio_bucket(struct btrfs_raid_bio *rbio)
				283	{
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	284	u64 num = rbio->bbio->raid_map[0];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	285
				286	/*
				287	* we shift down quite a bit. We're using byte
				288	* addressing, and most of the lower bits are zeros.
				289	* This tends to upset hash_64, and it consistently
				290	* returns just one or two different values.
				291	*
				292	* shifting off the lower bits fixes things.
				293	*/
				294	return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
				295	}
				296
				297	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	298	* stealing an rbio means taking all the uptodate pages from the stripe
				299	* array in the source rbio and putting them into the destination rbio
				300	*/
				301	static void steal_rbio(struct btrfs_raid_bio src, struct btrfs_raid_bio dest)
				302	{
				303	int i;
				304	struct page *s;
				305	struct page *d;
				306
				307	if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
				308	return;
				309
				310	for (i = 0; i < dest->nr_pages; i++) {
				311	s = src->stripe_pages[i];
				312	if (!s \|\| !PageUptodate(s)) {
				313	continue;
				314	}
				315
				316	d = dest->stripe_pages[i];
				317	if (d)
				318	__free_page(d);
				319
				320	dest->stripe_pages[i] = s;
				321	src->stripe_pages[i] = NULL;
				322	}
				323	}
				324
				325	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	326	* merging means we take the bio_list from the victim and
				327	* splice it into the destination. The victim should
				328	* be discarded afterwards.
				329	*
				330	* must be called with dest->rbio_list_lock held
				331	*/
				332	static void merge_rbio(struct btrfs_raid_bio *dest,
				333	struct btrfs_raid_bio *victim)
				334	{
				335	bio_list_merge(&dest->bio_list, &victim->bio_list);
				336	dest->bio_list_bytes += victim->bio_list_bytes;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	337	dest->generic_bio_cnt += victim->generic_bio_cnt;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	338	bio_list_init(&victim->bio_list);
				339	}
				340
				341	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	342	* used to prune items that are in the cache. The caller
				343	* must hold the hash table lock.
				344	*/
				345	static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
				346	{
				347	int bucket = rbio_bucket(rbio);
				348	struct btrfs_stripe_hash_table *table;
				349	struct btrfs_stripe_hash *h;
				350	int freeit = 0;
				351
				352	/*
				353	* check the bit again under the hash table lock.
				354	*/
				355	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
				356	return;
				357
				358	table = rbio->fs_info->stripe_hash_table;
				359	h = table->table + bucket;
				360
				361	/* hold the lock for the bucket because we may be
				362	* removing it from the hash table
				363	*/
				364	spin_lock(&h->lock);
				365
				366	/*
				367	* hold the lock for the bio list because we need
				368	* to make sure the bio list is empty
				369	*/
				370	spin_lock(&rbio->bio_list_lock);
				371
				372	if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
				373	list_del_init(&rbio->stripe_cache);
				374	table->cache_size -= 1;
				375	freeit = 1;
				376
				377	/* if the bio list isn't empty, this rbio is
				378	* still involved in an IO. We take it out
				379	* of the cache list, and drop the ref that
				380	* was held for the list.
				381	*
				382	* If the bio_list was empty, we also remove
				383	* the rbio from the hash_table, and drop
				384	* the corresponding ref
				385	*/
				386	if (bio_list_empty(&rbio->bio_list)) {
				387	if (!list_empty(&rbio->hash_list)) {
				388	list_del_init(&rbio->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	389	refcount_dec(&rbio->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	390	BUG_ON(!list_empty(&rbio->plug_list));
				391	}
				392	}
				393	}
				394
				395	spin_unlock(&rbio->bio_list_lock);
				396	spin_unlock(&h->lock);
				397
				398	if (freeit)
				399	__free_raid_bio(rbio);
				400	}
				401
				402	/*
				403	* prune a given rbio from the cache
				404	*/
				405	static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
				406	{
				407	struct btrfs_stripe_hash_table *table;
				408	unsigned long flags;
				409
				410	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
				411	return;
				412
				413	table = rbio->fs_info->stripe_hash_table;
				414
				415	spin_lock_irqsave(&table->cache_lock, flags);
				416	__remove_rbio_from_cache(rbio);
				417	spin_unlock_irqrestore(&table->cache_lock, flags);
				418	}
				419
				420	/*
				421	* remove everything in the cache
				422	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	423	static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	424	{
				425	struct btrfs_stripe_hash_table *table;
				426	unsigned long flags;
				427	struct btrfs_raid_bio *rbio;
				428
				429	table = info->stripe_hash_table;
				430
				431	spin_lock_irqsave(&table->cache_lock, flags);
				432	while (!list_empty(&table->stripe_cache)) {
				433	rbio = list_entry(table->stripe_cache.next,
				434	struct btrfs_raid_bio,
				435	stripe_cache);
				436	__remove_rbio_from_cache(rbio);
				437	}
				438	spin_unlock_irqrestore(&table->cache_lock, flags);
				439	}
				440
				441	/*
				442	* remove all cached entries and free the hash table
				443	* used by unmount
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	444	*/
				445	void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
				446	{
				447	if (!info->stripe_hash_table)
				448	return;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	449	btrfs_clear_rbio_cache(info);
Wang Shilong	f749303	2014-11-22 21:13:10 +0800	[diff] [blame]	450	kvfree(info->stripe_hash_table);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	451	info->stripe_hash_table = NULL;
				452	}
				453
				454	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	455	* insert an rbio into the stripe cache. It
				456	* must have already been prepared by calling
				457	* cache_rbio_pages
				458	*
				459	* If this rbio was already cached, it gets
				460	* moved to the front of the lru.
				461	*
				462	* If the size of the rbio cache is too big, we
				463	* prune an item.
				464	*/
				465	static void cache_rbio(struct btrfs_raid_bio *rbio)
				466	{
				467	struct btrfs_stripe_hash_table *table;
				468	unsigned long flags;
				469
				470	if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
				471	return;
				472
				473	table = rbio->fs_info->stripe_hash_table;
				474
				475	spin_lock_irqsave(&table->cache_lock, flags);
				476	spin_lock(&rbio->bio_list_lock);
				477
				478	/* bump our ref if we were not in the list before */
				479	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	480	refcount_inc(&rbio->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	481
				482	if (!list_empty(&rbio->stripe_cache)){
				483	list_move(&rbio->stripe_cache, &table->stripe_cache);
				484	} else {
				485	list_add(&rbio->stripe_cache, &table->stripe_cache);
				486	table->cache_size += 1;
				487	}
				488
				489	spin_unlock(&rbio->bio_list_lock);
				490
				491	if (table->cache_size > RBIO_CACHE_SIZE) {
				492	struct btrfs_raid_bio *found;
				493
				494	found = list_entry(table->stripe_cache.prev,
				495	struct btrfs_raid_bio,
				496	stripe_cache);
				497
				498	if (found != rbio)
				499	__remove_rbio_from_cache(found);
				500	}
				501
				502	spin_unlock_irqrestore(&table->cache_lock, flags);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	503	}
				504
				505	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	506	* helper function to run the xor_blocks api. It is only
				507	* able to do MAX_XOR_BLOCKS at a time, so we need to
				508	* loop through.
				509	*/
				510	static void run_xor(void **pages, int src_cnt, ssize_t len)
				511	{
				512	int src_off = 0;
				513	int xor_src_cnt = 0;
				514	void *dest = pages[src_cnt];
				515
				516	while(src_cnt > 0) {
				517	xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
				518	xor_blocks(xor_src_cnt, len, dest, pages + src_off);
				519
				520	src_cnt -= xor_src_cnt;
				521	src_off += xor_src_cnt;
				522	}
				523	}
				524
				525	/*
				526	* returns true if the bio list inside this rbio
				527	* covers an entire stripe (no rmw required).
				528	* Must be called with the bio list lock held, or
				529	* at a time when you know it is impossible to add
				530	* new bios into the list
				531	*/
				532	static int __rbio_is_full(struct btrfs_raid_bio *rbio)
				533	{
				534	unsigned long size = rbio->bio_list_bytes;
				535	int ret = 1;
				536
				537	if (size != rbio->nr_data * rbio->stripe_len)
				538	ret = 0;
				539
				540	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
				541	return ret;
				542	}
				543
				544	static int rbio_is_full(struct btrfs_raid_bio *rbio)
				545	{
				546	unsigned long flags;
				547	int ret;
				548
				549	spin_lock_irqsave(&rbio->bio_list_lock, flags);
				550	ret = __rbio_is_full(rbio);
				551	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
				552	return ret;
				553	}
				554
				555	/*
				556	* returns 1 if it is safe to merge two rbios together.
				557	* The merging is safe if the two rbios correspond to
				558	* the same stripe and if they are both going in the same
				559	* direction (read vs write), and if neither one is
				560	* locked for final IO
				561	*
				562	* The caller is responsible for locking such that
				563	* rmw_locked is safe to test
				564	*/
				565	static int rbio_can_merge(struct btrfs_raid_bio *last,
				566	struct btrfs_raid_bio *cur)
				567	{
				568	if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) \|\|
				569	test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
				570	return 0;
				571
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	572	/*
				573	* we can't merge with cached rbios, since the
				574	* idea is that when we merge the destination
				575	* rbio is going to run our IO for us. We can
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	576	* steal from cached rbios though, other functions
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	577	* handle that.
				578	*/
				579	if (test_bit(RBIO_CACHE_BIT, &last->flags) \|\|
				580	test_bit(RBIO_CACHE_BIT, &cur->flags))
				581	return 0;
				582
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	583	if (last->bbio->raid_map[0] !=
				584	cur->bbio->raid_map[0])
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	585	return 0;
				586
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	587	/* we can't merge with different operations */
				588	if (last->operation != cur->operation)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	589	return 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	590	/*
				591	* We've need read the full stripe from the drive.
				592	* check and repair the parity and write the new results.
				593	*
				594	* We're not allowed to add any new bios to the
				595	* bio list here, anyone else that wants to
				596	* change this stripe needs to do their own rmw.
				597	*/
				598	if (last->operation == BTRFS_RBIO_PARITY_SCRUB \|\|
				599	cur->operation == BTRFS_RBIO_PARITY_SCRUB)
				600	return 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	601
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	602	if (last->operation == BTRFS_RBIO_REBUILD_MISSING \|\|
				603	cur->operation == BTRFS_RBIO_REBUILD_MISSING)
				604	return 0;
				605
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	606	return 1;
				607	}
				608
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	609	static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
				610	int index)
				611	{
				612	return stripe * rbio->stripe_npages + index;
				613	}
				614
				615	/*
				616	* these are just the pages from the rbio array, not from anything
				617	* the FS sent down to us
				618	*/
				619	static struct page rbio_stripe_page(struct btrfs_raid_bio rbio, int stripe,
				620	int index)
				621	{
				622	return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
				623	}
				624
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	625	/*
				626	* helper to index into the pstripe
				627	*/
				628	static struct page rbio_pstripe_page(struct btrfs_raid_bio rbio, int index)
				629	{
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	630	return rbio_stripe_page(rbio, rbio->nr_data, index);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	631	}
				632
				633	/*
				634	* helper to index into the qstripe, returns null
				635	* if there is no qstripe
				636	*/
				637	static struct page rbio_qstripe_page(struct btrfs_raid_bio rbio, int index)
				638	{
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	639	if (rbio->nr_data + 1 == rbio->real_stripes)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	640	return NULL;
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	641	return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	642	}
				643
				644	/*
				645	* The first stripe in the table for a logical address
				646	* has the lock. rbios are added in one of three ways:
				647	*
				648	* 1) Nobody has the stripe locked yet. The rbio is given
				649	* the lock and 0 is returned. The caller must start the IO
				650	* themselves.
				651	*
				652	* 2) Someone has the stripe locked, but we're able to merge
				653	* with the lock owner. The rbio is freed and the IO will
				654	* start automatically along with the existing rbio. 1 is returned.
				655	*
				656	* 3) Someone has the stripe locked, but we're not able to merge.
				657	* The rbio is added to the lock owner's plug list, or merged into
				658	* an rbio already on the plug list. When the lock owner unlocks,
				659	* the next rbio on the list is run and the IO is started automatically.
				660	* 1 is returned
				661	*
				662	* If we return 0, the caller still owns the rbio and must continue with
				663	* IO submission. If we return 1, the caller must assume the rbio has
				664	* already been freed.
				665	*/
				666	static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
				667	{
				668	int bucket = rbio_bucket(rbio);
				669	struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
				670	struct btrfs_raid_bio *cur;
				671	struct btrfs_raid_bio *pending;
				672	unsigned long flags;
				673	DEFINE_WAIT(wait);
				674	struct btrfs_raid_bio *freeit = NULL;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	675	struct btrfs_raid_bio *cache_drop = NULL;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	676	int ret = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	677
				678	spin_lock_irqsave(&h->lock, flags);
				679	list_for_each_entry(cur, &h->hash_list, hash_list) {
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	680	if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	681	spin_lock(&cur->bio_list_lock);
				682
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	683	/* can we steal this cached rbio's pages? */
				684	if (bio_list_empty(&cur->bio_list) &&
				685	list_empty(&cur->plug_list) &&
				686	test_bit(RBIO_CACHE_BIT, &cur->flags) &&
				687	!test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
				688	list_del_init(&cur->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	689	refcount_dec(&cur->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	690
				691	steal_rbio(cur, rbio);
				692	cache_drop = cur;
				693	spin_unlock(&cur->bio_list_lock);
				694
				695	goto lockit;
				696	}
				697
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	698	/* can we merge into the lock owner? */
				699	if (rbio_can_merge(cur, rbio)) {
				700	merge_rbio(cur, rbio);
				701	spin_unlock(&cur->bio_list_lock);
				702	freeit = rbio;
				703	ret = 1;
				704	goto out;
				705	}
				706
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	707
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	708	/*
				709	* we couldn't merge with the running
				710	* rbio, see if we can merge with the
				711	* pending ones. We don't have to
				712	* check for rmw_locked because there
				713	* is no way they are inside finish_rmw
				714	* right now
				715	*/
				716	list_for_each_entry(pending, &cur->plug_list,
				717	plug_list) {
				718	if (rbio_can_merge(pending, rbio)) {
				719	merge_rbio(pending, rbio);
				720	spin_unlock(&cur->bio_list_lock);
				721	freeit = rbio;
				722	ret = 1;
				723	goto out;
				724	}
				725	}
				726
				727	/* no merging, put us on the tail of the plug list,
				728	* our rbio will be started with the currently
				729	* running rbio unlocks
				730	*/
				731	list_add_tail(&rbio->plug_list, &cur->plug_list);
				732	spin_unlock(&cur->bio_list_lock);
				733	ret = 1;
				734	goto out;
				735	}
				736	}
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	737	lockit:
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	738	refcount_inc(&rbio->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	739	list_add(&rbio->hash_list, &h->hash_list);
				740	out:
				741	spin_unlock_irqrestore(&h->lock, flags);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	742	if (cache_drop)
				743	remove_rbio_from_cache(cache_drop);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	744	if (freeit)
				745	__free_raid_bio(freeit);
				746	return ret;
				747	}
				748
				749	/*
				750	* called as rmw or parity rebuild is completed. If the plug list has more
				751	* rbios waiting for this stripe, the next one on the list will be started
				752	*/
				753	static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
				754	{
				755	int bucket;
				756	struct btrfs_stripe_hash *h;
				757	unsigned long flags;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	758	int keep_cache = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	759
				760	bucket = rbio_bucket(rbio);
				761	h = rbio->fs_info->stripe_hash_table->table + bucket;
				762
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	763	if (list_empty(&rbio->plug_list))
				764	cache_rbio(rbio);
				765
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	766	spin_lock_irqsave(&h->lock, flags);
				767	spin_lock(&rbio->bio_list_lock);
				768
				769	if (!list_empty(&rbio->hash_list)) {
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	770	/*
				771	* if we're still cached and there is no other IO
				772	* to perform, just leave this rbio here for others
				773	* to steal from later
				774	*/
				775	if (list_empty(&rbio->plug_list) &&
				776	test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
				777	keep_cache = 1;
				778	clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				779	BUG_ON(!bio_list_empty(&rbio->bio_list));
				780	goto done;
				781	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	782
				783	list_del_init(&rbio->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	784	refcount_dec(&rbio->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	785
				786	/*
				787	* we use the plug list to hold all the rbios
				788	* waiting for the chance to lock this stripe.
				789	* hand the lock over to one of them.
				790	*/
				791	if (!list_empty(&rbio->plug_list)) {
				792	struct btrfs_raid_bio *next;
				793	struct list_head *head = rbio->plug_list.next;
				794
				795	next = list_entry(head, struct btrfs_raid_bio,
				796	plug_list);
				797
				798	list_del_init(&rbio->plug_list);
				799
				800	list_add(&next->hash_list, &h->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	801	refcount_inc(&next->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	802	spin_unlock(&rbio->bio_list_lock);
				803	spin_unlock_irqrestore(&h->lock, flags);
				804
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	805	if (next->operation == BTRFS_RBIO_READ_REBUILD)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	806	async_read_rebuild(next);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	807	else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
				808	steal_rbio(rbio, next);
				809	async_read_rebuild(next);
				810	} else if (next->operation == BTRFS_RBIO_WRITE) {
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	811	steal_rbio(rbio, next);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	812	async_rmw_stripe(next);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	813	} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
				814	steal_rbio(rbio, next);
				815	async_scrub_parity(next);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	816	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	817
				818	goto done_nolock;
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	819	/*
				820	* The barrier for this waitqueue_active is not needed,
				821	* we're protected by h->lock and can't miss a wakeup.
				822	*/
				823	} else if (waitqueue_active(&h->wait)) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	824	spin_unlock(&rbio->bio_list_lock);
				825	spin_unlock_irqrestore(&h->lock, flags);
				826	wake_up(&h->wait);
				827	goto done_nolock;
				828	}
				829	}
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	830	done:
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	831	spin_unlock(&rbio->bio_list_lock);
				832	spin_unlock_irqrestore(&h->lock, flags);
				833
				834	done_nolock:
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	835	if (!keep_cache)
				836	remove_rbio_from_cache(rbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	837	}
				838
				839	static void __free_raid_bio(struct btrfs_raid_bio *rbio)
				840	{
				841	int i;
				842
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	843	if (!refcount_dec_and_test(&rbio->refs))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	844	return;
				845
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	846	WARN_ON(!list_empty(&rbio->stripe_cache));
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	847	WARN_ON(!list_empty(&rbio->hash_list));
				848	WARN_ON(!bio_list_empty(&rbio->bio_list));
				849
				850	for (i = 0; i < rbio->nr_pages; i++) {
				851	if (rbio->stripe_pages[i]) {
				852	__free_page(rbio->stripe_pages[i]);
				853	rbio->stripe_pages[i] = NULL;
				854	}
				855	}
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	856
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	857	btrfs_put_bbio(rbio->bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	858	kfree(rbio);
				859	}
				860
				861	static void free_raid_bio(struct btrfs_raid_bio *rbio)
				862	{
				863	unlock_stripe(rbio);
				864	__free_raid_bio(rbio);
				865	}
				866
				867	/*
				868	* this frees the rbio and runs through all the bios in the
				869	* bio_list and calls end_io on them
				870	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	871	static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	872	{
				873	struct bio *cur = bio_list_get(&rbio->bio_list);
				874	struct bio *next;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	875
				876	if (rbio->generic_bio_cnt)
				877	btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
				878
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	879	free_raid_bio(rbio);
				880
				881	while (cur) {
				882	next = cur->bi_next;
				883	cur->bi_next = NULL;
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	884	cur->bi_error = err;
				885	bio_endio(cur);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	886	cur = next;
				887	}
				888	}
				889
				890	/*
				891	* end io function used by finish_rmw. When we finally
				892	* get here, we've written a full stripe
				893	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	894	static void raid_write_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	895	{
				896	struct btrfs_raid_bio *rbio = bio->bi_private;
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	897	int err = bio->bi_error;
Zhao Lei	a6111d11b	2016-01-12 17:52:13 +0800	[diff] [blame]	898	int max_errors;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	899
				900	if (err)
				901	fail_bio_stripe(rbio, bio);
				902
				903	bio_put(bio);
				904
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	905	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	906	return;
				907
				908	err = 0;
				909
				910	/* OK, we have read all the stripes we need to. */
Zhao Lei	a6111d11b	2016-01-12 17:52:13 +0800	[diff] [blame]	911	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
				912	0 : rbio->bbio->max_errors;
				913	if (atomic_read(&rbio->error) > max_errors)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	914	err = -EIO;
				915
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	916	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	917	}
				918
				919	/*
				920	* the read/modify/write code wants to use the original bio for
				921	* any pages it included, and then use the rbio for everything
				922	* else. This function decides if a given index (stripe number)
				923	* and page number in that stripe fall inside the original bio
				924	* or the rbio.
				925	*
				926	* if you set bio_list_only, you'll get a NULL back for any ranges
				927	* that are outside the bio_list
				928	*
				929	* This doesn't take any refs on anything, you get a bare page pointer
				930	* and the caller must bump refs as required.
				931	*
				932	* You must call index_rbio_pages once before you can trust
				933	* the answers from this function.
				934	*/
				935	static struct page page_in_rbio(struct btrfs_raid_bio rbio,
				936	int index, int pagenr, int bio_list_only)
				937	{
				938	int chunk_page;
				939	struct page *p = NULL;
				940
				941	chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
				942
				943	spin_lock_irq(&rbio->bio_list_lock);
				944	p = rbio->bio_pages[chunk_page];
				945	spin_unlock_irq(&rbio->bio_list_lock);
				946
				947	if (p \|\| bio_list_only)
				948	return p;
				949
				950	return rbio->stripe_pages[chunk_page];
				951	}
				952
				953	/*
				954	* number of pages we need for the entire stripe across all the
				955	* drives
				956	*/
				957	static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
				958	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	959	return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	960	}
				961
				962	/*
				963	* allocation and initial setup for the btrfs_raid_bio. Not
				964	* this does not allocate any pages for rbio->pages.
				965	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	966	static struct btrfs_raid_bio alloc_rbio(struct btrfs_fs_info fs_info,
				967	struct btrfs_bio *bbio,
				968	u64 stripe_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	969	{
				970	struct btrfs_raid_bio *rbio;
				971	int nr_data = 0;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	972	int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
				973	int num_pages = rbio_nr_pages(stripe_len, real_stripes);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	974	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	975	void *p;
				976
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	977	rbio = kzalloc(sizeof(rbio) + num_pages sizeof(struct page ) 2 +
Zhao Lei	bfca9a6	2014-12-08 19:55:57 +0800	[diff] [blame]	978	DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
				979	sizeof(long), GFP_NOFS);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	980	if (!rbio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	981	return ERR_PTR(-ENOMEM);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	982
				983	bio_list_init(&rbio->bio_list);
				984	INIT_LIST_HEAD(&rbio->plug_list);
				985	spin_lock_init(&rbio->bio_list_lock);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	986	INIT_LIST_HEAD(&rbio->stripe_cache);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	987	INIT_LIST_HEAD(&rbio->hash_list);
				988	rbio->bbio = bbio;
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	989	rbio->fs_info = fs_info;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	990	rbio->stripe_len = stripe_len;
				991	rbio->nr_pages = num_pages;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	992	rbio->real_stripes = real_stripes;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	993	rbio->stripe_npages = stripe_npages;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	994	rbio->faila = -1;
				995	rbio->failb = -1;
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	996	refcount_set(&rbio->refs, 1);
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	997	atomic_set(&rbio->error, 0);
				998	atomic_set(&rbio->stripes_pending, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	999
				1000	/*
				1001	* the stripe_pages and bio_pages array point to the extra
				1002	* memory we allocated past the end of the rbio
				1003	*/
				1004	p = rbio + 1;
				1005	rbio->stripe_pages = p;
				1006	rbio->bio_pages = p + sizeof(struct page ) num_pages;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1007	rbio->dbitmap = p + sizeof(struct page ) num_pages * 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1008
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1009	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
				1010	nr_data = real_stripes - 1;
				1011	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1012	nr_data = real_stripes - 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1013	else
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1014	BUG();
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1015
				1016	rbio->nr_data = nr_data;
				1017	return rbio;
				1018	}
				1019
				1020	/* allocate pages for all the stripes in the bio, including parity */
				1021	static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
				1022	{
				1023	int i;
				1024	struct page *page;
				1025
				1026	for (i = 0; i < rbio->nr_pages; i++) {
				1027	if (rbio->stripe_pages[i])
				1028	continue;
				1029	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				1030	if (!page)
				1031	return -ENOMEM;
				1032	rbio->stripe_pages[i] = page;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1033	}
				1034	return 0;
				1035	}
				1036
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	1037	/* only allocate pages for p/q stripes */
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1038	static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
				1039	{
				1040	int i;
				1041	struct page *page;
				1042
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	1043	i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1044
				1045	for (; i < rbio->nr_pages; i++) {
				1046	if (rbio->stripe_pages[i])
				1047	continue;
				1048	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				1049	if (!page)
				1050	return -ENOMEM;
				1051	rbio->stripe_pages[i] = page;
				1052	}
				1053	return 0;
				1054	}
				1055
				1056	/*
				1057	* add a single page from a specific stripe into our list of bios for IO
				1058	* this will try to merge into existing bios if possible, and returns
				1059	* zero if all went well.
				1060	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	1061	static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
				1062	struct bio_list *bio_list,
				1063	struct page *page,
				1064	int stripe_nr,
				1065	unsigned long page_index,
				1066	unsigned long bio_max_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1067	{
				1068	struct bio *last = bio_list->tail;
				1069	u64 last_end = 0;
				1070	int ret;
				1071	struct bio *bio;
				1072	struct btrfs_bio_stripe *stripe;
				1073	u64 disk_start;
				1074
				1075	stripe = &rbio->bbio->stripes[stripe_nr];
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1076	disk_start = stripe->physical + (page_index << PAGE_SHIFT);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1077
				1078	/* if the device is missing, just fail this stripe */
				1079	if (!stripe->dev->bdev)
				1080	return fail_rbio_index(rbio, stripe_nr);
				1081
				1082	/* see if we can add this page onto our existing bio */
				1083	if (last) {
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1084	last_end = (u64)last->bi_iter.bi_sector << 9;
				1085	last_end += last->bi_iter.bi_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1086
				1087	/*
				1088	* we can't merge these if they are from different
				1089	* devices or if they are not contiguous
				1090	*/
				1091	if (last_end == disk_start && stripe->dev->bdev &&
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1092	!last->bi_error &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1093	last->bi_bdev == stripe->dev->bdev) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1094	ret = bio_add_page(last, page, PAGE_SIZE, 0);
				1095	if (ret == PAGE_SIZE)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1096	return 0;
				1097	}
				1098	}
				1099
				1100	/* put a new bio on the list */
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	1101	bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1102	bio->bi_iter.bi_size = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1103	bio->bi_bdev = stripe->dev->bdev;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1104	bio->bi_iter.bi_sector = disk_start >> 9;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1105
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1106	bio_add_page(bio, page, PAGE_SIZE, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1107	bio_list_add(bio_list, bio);
				1108	return 0;
				1109	}
				1110
				1111	/*
				1112	* while we're doing the read/modify/write cycle, we could
				1113	* have errors in reading pages off the disk. This checks
				1114	* for errors and if we're not able to read the page it'll
				1115	* trigger parity reconstruction. The rmw will be finished
				1116	* after we've reconstructed the failed stripes
				1117	*/
				1118	static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
				1119	{
				1120	if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1121	BUG_ON(rbio->faila == rbio->real_stripes - 1);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1122	__raid56_parity_recover(rbio);
				1123	} else {
				1124	finish_rmw(rbio);
				1125	}
				1126	}
				1127
				1128	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1129	* helper function to walk our bio list and populate the bio_pages array with
				1130	* the result. This seems expensive, but it is faster than constantly
				1131	* searching through the bio list as we setup the IO in finish_rmw or stripe
				1132	* reconstruction.
				1133	*
				1134	* This must be called before you trust the answers from page_in_rbio
				1135	*/
				1136	static void index_rbio_pages(struct btrfs_raid_bio *rbio)
				1137	{
				1138	struct bio *bio;
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1139	struct bio_vec *bvec;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1140	u64 start;
				1141	unsigned long stripe_offset;
				1142	unsigned long page_index;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1143	int i;
				1144
				1145	spin_lock_irq(&rbio->bio_list_lock);
				1146	bio_list_for_each(bio, &rbio->bio_list) {
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1147	start = (u64)bio->bi_iter.bi_sector << 9;
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1148	stripe_offset = start - rbio->bbio->raid_map[0];
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1149	page_index = stripe_offset >> PAGE_SHIFT;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1150
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1151	bio_for_each_segment_all(bvec, bio, i)
				1152	rbio->bio_pages[page_index + i] = bvec->bv_page;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1153	}
				1154	spin_unlock_irq(&rbio->bio_list_lock);
				1155	}
				1156
				1157	/*
				1158	* this is called from one of two situations. We either
				1159	* have a full stripe from the higher layers, or we've read all
				1160	* the missing bits off disk.
				1161	*
				1162	* This will calculate the parity and then send down any
				1163	* changed blocks.
				1164	*/
				1165	static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
				1166	{
				1167	struct btrfs_bio *bbio = rbio->bbio;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1168	void *pointers[rbio->real_stripes];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1169	int nr_data = rbio->nr_data;
				1170	int stripe;
				1171	int pagenr;
				1172	int p_stripe = -1;
				1173	int q_stripe = -1;
				1174	struct bio_list bio_list;
				1175	struct bio *bio;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1176	int ret;
				1177
				1178	bio_list_init(&bio_list);
				1179
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1180	if (rbio->real_stripes - rbio->nr_data == 1) {
				1181	p_stripe = rbio->real_stripes - 1;
				1182	} else if (rbio->real_stripes - rbio->nr_data == 2) {
				1183	p_stripe = rbio->real_stripes - 2;
				1184	q_stripe = rbio->real_stripes - 1;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1185	} else {
				1186	BUG();
				1187	}
				1188
				1189	/* at this point we either have a full stripe,
				1190	* or we've read the full stripe from the drive.
				1191	* recalculate the parity and write the new results.
				1192	*
				1193	* We're not allowed to add any new bios to the
				1194	* bio list here, anyone else that wants to
				1195	* change this stripe needs to do their own rmw.
				1196	*/
				1197	spin_lock_irq(&rbio->bio_list_lock);
				1198	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				1199	spin_unlock_irq(&rbio->bio_list_lock);
				1200
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1201	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1202
				1203	/*
				1204	* now that we've set rmw_locked, run through the
				1205	* bio list one last time and map the page pointers
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1206	*
				1207	* We don't cache full rbios because we're assuming
				1208	* the higher layers are unlikely to use this area of
				1209	* the disk again soon. If they do use it again,
				1210	* hopefully they will send another full bio.
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1211	*/
				1212	index_rbio_pages(rbio);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1213	if (!rbio_is_full(rbio))
				1214	cache_rbio_pages(rbio);
				1215	else
				1216	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1217
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1218	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1219	struct page *p;
				1220	/* first collect one page from each data stripe */
				1221	for (stripe = 0; stripe < nr_data; stripe++) {
				1222	p = page_in_rbio(rbio, stripe, pagenr, 0);
				1223	pointers[stripe] = kmap(p);
				1224	}
				1225
				1226	/* then add the parity stripe */
				1227	p = rbio_pstripe_page(rbio, pagenr);
				1228	SetPageUptodate(p);
				1229	pointers[stripe++] = kmap(p);
				1230
				1231	if (q_stripe != -1) {
				1232
				1233	/*
				1234	* raid6, add the qstripe and call the
				1235	* library function to fill in our p/q
				1236	*/
				1237	p = rbio_qstripe_page(rbio, pagenr);
				1238	SetPageUptodate(p);
				1239	pointers[stripe++] = kmap(p);
				1240
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1241	raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1242	pointers);
				1243	} else {
				1244	/* raid5 */
				1245	memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1246	run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1247	}
				1248
				1249
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1250	for (stripe = 0; stripe < rbio->real_stripes; stripe++)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1251	kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
				1252	}
				1253
				1254	/*
				1255	* time to start writing. Make bios for everything from the
				1256	* higher layers (the bio_list in our rbio) and our p/q. Ignore
				1257	* everything else.
				1258	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1259	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1260	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1261	struct page *page;
				1262	if (stripe < rbio->nr_data) {
				1263	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1264	if (!page)
				1265	continue;
				1266	} else {
				1267	page = rbio_stripe_page(rbio, stripe, pagenr);
				1268	}
				1269
				1270	ret = rbio_add_io_page(rbio, &bio_list,
				1271	page, stripe, pagenr, rbio->stripe_len);
				1272	if (ret)
				1273	goto cleanup;
				1274	}
				1275	}
				1276
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1277	if (likely(!bbio->num_tgtdevs))
				1278	goto write_data;
				1279
				1280	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
				1281	if (!bbio->tgtdev_map[stripe])
				1282	continue;
				1283
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1284	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1285	struct page *page;
				1286	if (stripe < rbio->nr_data) {
				1287	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1288	if (!page)
				1289	continue;
				1290	} else {
				1291	page = rbio_stripe_page(rbio, stripe, pagenr);
				1292	}
				1293
				1294	ret = rbio_add_io_page(rbio, &bio_list, page,
				1295	rbio->bbio->tgtdev_map[stripe],
				1296	pagenr, rbio->stripe_len);
				1297	if (ret)
				1298	goto cleanup;
				1299	}
				1300	}
				1301
				1302	write_data:
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1303	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
				1304	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1305
				1306	while (1) {
				1307	bio = bio_list_pop(&bio_list);
				1308	if (!bio)
				1309	break;
				1310
				1311	bio->bi_private = rbio;
				1312	bio->bi_end_io = raid_write_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	1313	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	1314
				1315	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1316	}
				1317	return;
				1318
				1319	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1320	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1321	}
				1322
				1323	/*
				1324	* helper to find the stripe number for a given bio. Used to figure out which
				1325	* stripe has failed. This expects the bio to correspond to a physical disk,
				1326	* so it looks up based on physical sector numbers.
				1327	*/
				1328	static int find_bio_stripe(struct btrfs_raid_bio *rbio,
				1329	struct bio *bio)
				1330	{
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1331	u64 physical = bio->bi_iter.bi_sector;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1332	u64 stripe_start;
				1333	int i;
				1334	struct btrfs_bio_stripe *stripe;
				1335
				1336	physical <<= 9;
				1337
				1338	for (i = 0; i < rbio->bbio->num_stripes; i++) {
				1339	stripe = &rbio->bbio->stripes[i];
				1340	stripe_start = stripe->physical;
				1341	if (physical >= stripe_start &&
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1342	physical < stripe_start + rbio->stripe_len &&
				1343	bio->bi_bdev == stripe->dev->bdev) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1344	return i;
				1345	}
				1346	}
				1347	return -1;
				1348	}
				1349
				1350	/*
				1351	* helper to find the stripe number for a given
				1352	* bio (before mapping). Used to figure out which stripe has
				1353	* failed. This looks up based on logical block numbers.
				1354	*/
				1355	static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
				1356	struct bio *bio)
				1357	{
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1358	u64 logical = bio->bi_iter.bi_sector;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1359	u64 stripe_start;
				1360	int i;
				1361
				1362	logical <<= 9;
				1363
				1364	for (i = 0; i < rbio->nr_data; i++) {
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1365	stripe_start = rbio->bbio->raid_map[i];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1366	if (logical >= stripe_start &&
				1367	logical < stripe_start + rbio->stripe_len) {
				1368	return i;
				1369	}
				1370	}
				1371	return -1;
				1372	}
				1373
				1374	/*
				1375	* returns -EIO if we had too many failures
				1376	*/
				1377	static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
				1378	{
				1379	unsigned long flags;
				1380	int ret = 0;
				1381
				1382	spin_lock_irqsave(&rbio->bio_list_lock, flags);
				1383
				1384	/* we already know this stripe is bad, move on */
				1385	if (rbio->faila == failed \|\| rbio->failb == failed)
				1386	goto out;
				1387
				1388	if (rbio->faila == -1) {
				1389	/* first failure on this rbio */
				1390	rbio->faila = failed;
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1391	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1392	} else if (rbio->failb == -1) {
				1393	/* second failure on this rbio */
				1394	rbio->failb = failed;
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1395	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1396	} else {
				1397	ret = -EIO;
				1398	}
				1399	out:
				1400	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
				1401
				1402	return ret;
				1403	}
				1404
				1405	/*
				1406	* helper to fail a stripe based on a physical disk
				1407	* bio.
				1408	*/
				1409	static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
				1410	struct bio *bio)
				1411	{
				1412	int failed = find_bio_stripe(rbio, bio);
				1413
				1414	if (failed < 0)
				1415	return -EIO;
				1416
				1417	return fail_rbio_index(rbio, failed);
				1418	}
				1419
				1420	/*
				1421	* this sets each page in the bio uptodate. It should only be used on private
				1422	* rbio pages, nothing that comes in from the higher layers
				1423	*/
				1424	static void set_bio_pages_uptodate(struct bio *bio)
				1425	{
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1426	struct bio_vec *bvec;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1427	int i;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1428
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1429	bio_for_each_segment_all(bvec, bio, i)
				1430	SetPageUptodate(bvec->bv_page);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1431	}
				1432
				1433	/*
				1434	* end io for the read phase of the rmw cycle. All the bios here are physical
				1435	* stripe bios we've read from the disk so we can recalculate the parity of the
				1436	* stripe.
				1437	*
				1438	* This will usually kick off finish_rmw once all the bios are read in, but it
				1439	* may trigger parity reconstruction if we had any errors along the way
				1440	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1441	static void raid_rmw_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1442	{
				1443	struct btrfs_raid_bio *rbio = bio->bi_private;
				1444
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1445	if (bio->bi_error)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1446	fail_bio_stripe(rbio, bio);
				1447	else
				1448	set_bio_pages_uptodate(bio);
				1449
				1450	bio_put(bio);
				1451
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1452	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1453	return;
				1454
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1455	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1456	goto cleanup;
				1457
				1458	/*
				1459	* this will normally call finish_rmw to start our write
				1460	* but if there are any failed stripes we'll reconstruct
				1461	* from parity first
				1462	*/
				1463	validate_rbio_for_rmw(rbio);
				1464	return;
				1465
				1466	cleanup:
				1467
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1468	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1469	}
				1470
				1471	static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
				1472	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1473	btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
				1474	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1475	}
				1476
				1477	static void async_read_rebuild(struct btrfs_raid_bio *rbio)
				1478	{
Liu Bo	9e0af23	2014-08-15 23:36:53 +0800	[diff] [blame]	1479	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				1480	read_rebuild_work, NULL, NULL);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1481
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1482	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1483	}
				1484
				1485	/*
				1486	* the stripe must be locked by the caller. It will
				1487	* unlock after all the writes are done
				1488	*/
				1489	static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
				1490	{
				1491	int bios_to_read = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1492	struct bio_list bio_list;
				1493	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1494	int pagenr;
				1495	int stripe;
				1496	struct bio *bio;
				1497
				1498	bio_list_init(&bio_list);
				1499
				1500	ret = alloc_rbio_pages(rbio);
				1501	if (ret)
				1502	goto cleanup;
				1503
				1504	index_rbio_pages(rbio);
				1505
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1506	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1507	/*
				1508	* build a list of bios to read all the missing parts of this
				1509	* stripe
				1510	*/
				1511	for (stripe = 0; stripe < rbio->nr_data; stripe++) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1512	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1513	struct page *page;
				1514	/*
				1515	* we want to find all the pages missing from
				1516	* the rbio and read them from the disk. If
				1517	* page_in_rbio finds a page in the bio list
				1518	* we don't need to read it off the stripe.
				1519	*/
				1520	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1521	if (page)
				1522	continue;
				1523
				1524	page = rbio_stripe_page(rbio, stripe, pagenr);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1525	/*
				1526	* the bio cache may have handed us an uptodate
				1527	* page. If so, be happy and use it
				1528	*/
				1529	if (PageUptodate(page))
				1530	continue;
				1531
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1532	ret = rbio_add_io_page(rbio, &bio_list, page,
				1533	stripe, pagenr, rbio->stripe_len);
				1534	if (ret)
				1535	goto cleanup;
				1536	}
				1537	}
				1538
				1539	bios_to_read = bio_list_size(&bio_list);
				1540	if (!bios_to_read) {
				1541	/*
				1542	* this can happen if others have merged with
				1543	* us, it means there is nothing left to read.
				1544	* But if there are missing devices it may not be
				1545	* safe to do the full stripe write yet.
				1546	*/
				1547	goto finish;
				1548	}
				1549
				1550	/*
				1551	* the bbio may be freed once we submit the last bio. Make sure
				1552	* not to touch it after that
				1553	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1554	atomic_set(&rbio->stripes_pending, bios_to_read);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1555	while (1) {
				1556	bio = bio_list_pop(&bio_list);
				1557	if (!bio)
				1558	break;
				1559
				1560	bio->bi_private = rbio;
				1561	bio->bi_end_io = raid_rmw_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	1562	bio_set_op_attrs(bio, REQ_OP_READ, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1563
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1564	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1565
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	1566	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1567	}
				1568	/* the actual write will happen once the reads are done */
				1569	return 0;
				1570
				1571	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1572	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1573	return -EIO;
				1574
				1575	finish:
				1576	validate_rbio_for_rmw(rbio);
				1577	return 0;
				1578	}
				1579
				1580	/*
				1581	* if the upper layers pass in a full stripe, we thank them by only allocating
				1582	* enough pages to hold the parity, and sending it all down quickly.
				1583	*/
				1584	static int full_stripe_write(struct btrfs_raid_bio *rbio)
				1585	{
				1586	int ret;
				1587
				1588	ret = alloc_rbio_parity_pages(rbio);
Miao Xie	3cd846d	2013-07-22 16:36:57 +0800	[diff] [blame]	1589	if (ret) {
				1590	__free_raid_bio(rbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1591	return ret;
Miao Xie	3cd846d	2013-07-22 16:36:57 +0800	[diff] [blame]	1592	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1593
				1594	ret = lock_stripe_add(rbio);
				1595	if (ret == 0)
				1596	finish_rmw(rbio);
				1597	return 0;
				1598	}
				1599
				1600	/*
				1601	* partial stripe writes get handed over to async helpers.
				1602	* We're really hoping to merge a few more writes into this
				1603	* rbio before calculating new parity
				1604	*/
				1605	static int partial_stripe_write(struct btrfs_raid_bio *rbio)
				1606	{
				1607	int ret;
				1608
				1609	ret = lock_stripe_add(rbio);
				1610	if (ret == 0)
				1611	async_rmw_stripe(rbio);
				1612	return 0;
				1613	}
				1614
				1615	/*
				1616	* sometimes while we were reading from the drive to
				1617	* recalculate parity, enough new bios come into create
				1618	* a full stripe. So we do a check here to see if we can
				1619	* go directly to finish_rmw
				1620	*/
				1621	static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
				1622	{
				1623	/* head off into rmw land if we don't have a full stripe */
				1624	if (!rbio_is_full(rbio))
				1625	return partial_stripe_write(rbio);
				1626	return full_stripe_write(rbio);
				1627	}
				1628
				1629	/*
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1630	* We use plugging call backs to collect full stripes.
				1631	* Any time we get a partial stripe write while plugged
				1632	* we collect it into a list. When the unplug comes down,
				1633	* we sort the list by logical block number and merge
				1634	* everything we can into the same rbios
				1635	*/
				1636	struct btrfs_plug_cb {
				1637	struct blk_plug_cb cb;
				1638	struct btrfs_fs_info *info;
				1639	struct list_head rbio_list;
				1640	struct btrfs_work work;
				1641	};
				1642
				1643	/*
				1644	* rbios on the plug list are sorted for easier merging.
				1645	*/
				1646	static int plug_cmp(void priv, struct list_head a, struct list_head *b)
				1647	{
				1648	struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
				1649	plug_list);
				1650	struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
				1651	plug_list);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1652	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
				1653	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1654
				1655	if (a_sector < b_sector)
				1656	return -1;
				1657	if (a_sector > b_sector)
				1658	return 1;
				1659	return 0;
				1660	}
				1661
				1662	static void run_plug(struct btrfs_plug_cb *plug)
				1663	{
				1664	struct btrfs_raid_bio *cur;
				1665	struct btrfs_raid_bio *last = NULL;
				1666
				1667	/*
				1668	* sort our plug list then try to merge
				1669	* everything we can in hopes of creating full
				1670	* stripes.
				1671	*/
				1672	list_sort(NULL, &plug->rbio_list, plug_cmp);
				1673	while (!list_empty(&plug->rbio_list)) {
				1674	cur = list_entry(plug->rbio_list.next,
				1675	struct btrfs_raid_bio, plug_list);
				1676	list_del_init(&cur->plug_list);
				1677
				1678	if (rbio_is_full(cur)) {
				1679	/* we have a full stripe, send it down */
				1680	full_stripe_write(cur);
				1681	continue;
				1682	}
				1683	if (last) {
				1684	if (rbio_can_merge(last, cur)) {
				1685	merge_rbio(last, cur);
				1686	__free_raid_bio(cur);
				1687	continue;
				1688
				1689	}
				1690	__raid56_parity_write(last);
				1691	}
				1692	last = cur;
				1693	}
				1694	if (last) {
				1695	__raid56_parity_write(last);
				1696	}
				1697	kfree(plug);
				1698	}
				1699
				1700	/*
				1701	* if the unplug comes from schedule, we have to push the
				1702	* work off to a helper thread
				1703	*/
				1704	static void unplug_work(struct btrfs_work *work)
				1705	{
				1706	struct btrfs_plug_cb *plug;
				1707	plug = container_of(work, struct btrfs_plug_cb, work);
				1708	run_plug(plug);
				1709	}
				1710
				1711	static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
				1712	{
				1713	struct btrfs_plug_cb *plug;
				1714	plug = container_of(cb, struct btrfs_plug_cb, cb);
				1715
				1716	if (from_schedule) {
Liu Bo	9e0af23	2014-08-15 23:36:53 +0800	[diff] [blame]	1717	btrfs_init_work(&plug->work, btrfs_rmw_helper,
				1718	unplug_work, NULL, NULL);
Qu Wenruo	d05a33a	2014-02-28 10:46:11 +0800	[diff] [blame]	1719	btrfs_queue_work(plug->info->rmw_workers,
				1720	&plug->work);
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1721	return;
				1722	}
				1723	run_plug(plug);
				1724	}
				1725
				1726	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1727	* our main entry point for writes from the rest of the FS.
				1728	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	1729	int raid56_parity_write(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1730	struct btrfs_bio *bbio, u64 stripe_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1731	{
				1732	struct btrfs_raid_bio *rbio;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1733	struct btrfs_plug_cb *plug = NULL;
				1734	struct blk_plug_cb *cb;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1735	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1736
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	1737	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	1738	if (IS_ERR(rbio)) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	1739	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1740	return PTR_ERR(rbio);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	1741	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1742	bio_list_add(&rbio->bio_list, bio);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1743	rbio->bio_list_bytes = bio->bi_iter.bi_size;
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1744	rbio->operation = BTRFS_RBIO_WRITE;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1745
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1746	btrfs_bio_counter_inc_noblocked(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1747	rbio->generic_bio_cnt = 1;
				1748
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1749	/*
				1750	* don't plug on full rbios, just get them out the door
				1751	* as quickly as we can
				1752	*/
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1753	if (rbio_is_full(rbio)) {
				1754	ret = full_stripe_write(rbio);
				1755	if (ret)
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1756	btrfs_bio_counter_dec(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1757	return ret;
				1758	}
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1759
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1760	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1761	if (cb) {
				1762	plug = container_of(cb, struct btrfs_plug_cb, cb);
				1763	if (!plug->info) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1764	plug->info = fs_info;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1765	INIT_LIST_HEAD(&plug->rbio_list);
				1766	}
				1767	list_add_tail(&rbio->plug_list, &plug->rbio_list);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1768	ret = 0;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1769	} else {
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1770	ret = __raid56_parity_write(rbio);
				1771	if (ret)
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1772	btrfs_bio_counter_dec(fs_info);
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1773	}
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1774	return ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1775	}
				1776
				1777	/*
				1778	* all parity reconstruction happens here. We've read in everything
				1779	* we can find from the drives and this does the heavy lifting of
				1780	* sorting the good from the bad.
				1781	*/
				1782	static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
				1783	{
				1784	int pagenr, stripe;
				1785	void **pointers;
				1786	int faila = -1, failb = -1;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1787	struct page *page;
				1788	int err;
				1789	int i;
				1790
David Sterba	31e818f	2015-02-20 18:00:26 +0100	[diff] [blame]	1791	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1792	if (!pointers) {
				1793	err = -ENOMEM;
				1794	goto cleanup_io;
				1795	}
				1796
				1797	faila = rbio->faila;
				1798	failb = rbio->failb;
				1799
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1800	if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1801	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1802	spin_lock_irq(&rbio->bio_list_lock);
				1803	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				1804	spin_unlock_irq(&rbio->bio_list_lock);
				1805	}
				1806
				1807	index_rbio_pages(rbio);
				1808
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1809	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1810	/*
				1811	* Now we just use bitmap to mark the horizontal stripes in
				1812	* which we have data when doing parity scrub.
				1813	*/
				1814	if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
				1815	!test_bit(pagenr, rbio->dbitmap))
				1816	continue;
				1817
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1818	/* setup our array of pointers with pages
				1819	* from each stripe
				1820	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1821	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1822	/*
				1823	* if we're rebuilding a read, we have to use
				1824	* pages from the bio list
				1825	*/
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1826	if ((rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1827	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1828	(stripe == faila \|\| stripe == failb)) {
				1829	page = page_in_rbio(rbio, stripe, pagenr, 0);
				1830	} else {
				1831	page = rbio_stripe_page(rbio, stripe, pagenr);
				1832	}
				1833	pointers[stripe] = kmap(page);
				1834	}
				1835
				1836	/* all raid6 handling here */
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1837	if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1838	/*
				1839	* single failure, rebuild from parity raid5
				1840	* style
				1841	*/
				1842	if (failb < 0) {
				1843	if (faila == rbio->nr_data) {
				1844	/*
				1845	* Just the P stripe has failed, without
				1846	* a bad data or Q stripe.
				1847	* TODO, we should redo the xor here.
				1848	*/
				1849	err = -EIO;
				1850	goto cleanup;
				1851	}
				1852	/*
				1853	* a single failure in raid6 is rebuilt
				1854	* in the pstripe code below
				1855	*/
				1856	goto pstripe;
				1857	}
				1858
				1859	/* make sure our ps and qs are in order */
				1860	if (faila > failb) {
				1861	int tmp = failb;
				1862	failb = faila;
				1863	faila = tmp;
				1864	}
				1865
				1866	/* if the q stripe is failed, do a pstripe reconstruction
				1867	* from the xors.
				1868	* If both the q stripe and the P stripe are failed, we're
				1869	* here due to a crc mismatch and we can't give them the
				1870	* data they want
				1871	*/
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1872	if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
				1873	if (rbio->bbio->raid_map[faila] ==
				1874	RAID5_P_STRIPE) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1875	err = -EIO;
				1876	goto cleanup;
				1877	}
				1878	/*
				1879	* otherwise we have one bad data stripe and
				1880	* a good P stripe. raid5!
				1881	*/
				1882	goto pstripe;
				1883	}
				1884
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1885	if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1886	raid6_datap_recov(rbio->real_stripes,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1887	PAGE_SIZE, faila, pointers);
				1888	} else {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1889	raid6_2data_recov(rbio->real_stripes,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1890	PAGE_SIZE, faila, failb,
				1891	pointers);
				1892	}
				1893	} else {
				1894	void *p;
				1895
				1896	/* rebuild from P stripe here (raid5 or raid6) */
				1897	BUG_ON(failb != -1);
				1898	pstripe:
				1899	/* Copy parity block into failed block to start with */
				1900	memcpy(pointers[faila],
				1901	pointers[rbio->nr_data],
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1902	PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1903
				1904	/* rearrange the pointer array */
				1905	p = pointers[faila];
				1906	for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
				1907	pointers[stripe] = pointers[stripe + 1];
				1908	pointers[rbio->nr_data - 1] = p;
				1909
				1910	/* xor in the rest */
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1911	run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1912	}
				1913	/* if we're doing this rebuild as part of an rmw, go through
				1914	* and set all of our private rbio pages in the
				1915	* failed stripes as uptodate. This way finish_rmw will
				1916	* know they can be trusted. If this was a read reconstruction,
				1917	* other endio functions will fiddle the uptodate bits
				1918	*/
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1919	if (rbio->operation == BTRFS_RBIO_WRITE) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1920	for (i = 0; i < rbio->stripe_npages; i++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1921	if (faila != -1) {
				1922	page = rbio_stripe_page(rbio, faila, i);
				1923	SetPageUptodate(page);
				1924	}
				1925	if (failb != -1) {
				1926	page = rbio_stripe_page(rbio, failb, i);
				1927	SetPageUptodate(page);
				1928	}
				1929	}
				1930	}
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1931	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1932	/*
				1933	* if we're rebuilding a read, we have to use
				1934	* pages from the bio list
				1935	*/
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1936	if ((rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1937	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1938	(stripe == faila \|\| stripe == failb)) {
				1939	page = page_in_rbio(rbio, stripe, pagenr, 0);
				1940	} else {
				1941	page = rbio_stripe_page(rbio, stripe, pagenr);
				1942	}
				1943	kunmap(page);
				1944	}
				1945	}
				1946
				1947	err = 0;
				1948	cleanup:
				1949	kfree(pointers);
				1950
				1951	cleanup_io:
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1952	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	1953	if (err == 0)
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1954	cache_rbio_pages(rbio);
				1955	else
				1956	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				1957
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1958	rbio_orig_end_io(rbio, err);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1959	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
Linus Torvalds	2236597	2015-09-05 15:14:43 -0700	[diff] [blame]	1960	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1961	} else if (err == 0) {
				1962	rbio->faila = -1;
				1963	rbio->failb = -1;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1964
				1965	if (rbio->operation == BTRFS_RBIO_WRITE)
				1966	finish_rmw(rbio);
				1967	else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
				1968	finish_parity_scrub(rbio, 0);
				1969	else
				1970	BUG();
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1971	} else {
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1972	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1973	}
				1974	}
				1975
				1976	/*
				1977	* This is called only for stripes we've read from disk to
				1978	* reconstruct the parity.
				1979	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1980	static void raid_recover_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1981	{
				1982	struct btrfs_raid_bio *rbio = bio->bi_private;
				1983
				1984	/*
				1985	* we only read stripe pages off the disk, set them
				1986	* up to date if there were no errors
				1987	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1988	if (bio->bi_error)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1989	fail_bio_stripe(rbio, bio);
				1990	else
				1991	set_bio_pages_uptodate(bio);
				1992	bio_put(bio);
				1993
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1994	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1995	return;
				1996
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1997	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1998	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1999	else
				2000	__raid_recover_end_io(rbio);
				2001	}
				2002
				2003	/*
				2004	* reads everything we need off the disk to reconstruct
				2005	* the parity. endio handlers trigger final reconstruction
				2006	* when the IO is done.
				2007	*
				2008	* This is used both for reads from the higher layers and for
				2009	* parity construction required to finish a rmw cycle.
				2010	*/
				2011	static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
				2012	{
				2013	int bios_to_read = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2014	struct bio_list bio_list;
				2015	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2016	int pagenr;
				2017	int stripe;
				2018	struct bio *bio;
				2019
				2020	bio_list_init(&bio_list);
				2021
				2022	ret = alloc_rbio_pages(rbio);
				2023	if (ret)
				2024	goto cleanup;
				2025
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2026	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2027
				2028	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	2029	* read everything that hasn't failed. Thanks to the
				2030	* stripe cache, it is possible that some or all of these
				2031	* pages are going to be uptodate.
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2032	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2033	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Liu Bo	5588383	2014-06-24 15:39:16 +0800	[diff] [blame]	2034	if (rbio->faila == stripe \|\| rbio->failb == stripe) {
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2035	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2036	continue;
Liu Bo	5588383	2014-06-24 15:39:16 +0800	[diff] [blame]	2037	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2038
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	2039	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2040	struct page *p;
				2041
				2042	/*
				2043	* the rmw code may have already read this
				2044	* page in
				2045	*/
				2046	p = rbio_stripe_page(rbio, stripe, pagenr);
				2047	if (PageUptodate(p))
				2048	continue;
				2049
				2050	ret = rbio_add_io_page(rbio, &bio_list,
				2051	rbio_stripe_page(rbio, stripe, pagenr),
				2052	stripe, pagenr, rbio->stripe_len);
				2053	if (ret < 0)
				2054	goto cleanup;
				2055	}
				2056	}
				2057
				2058	bios_to_read = bio_list_size(&bio_list);
				2059	if (!bios_to_read) {
				2060	/*
				2061	* we might have no bios to read just because the pages
				2062	* were up to date, or we might have no bios to read because
				2063	* the devices were gone.
				2064	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2065	if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2066	__raid_recover_end_io(rbio);
				2067	goto out;
				2068	} else {
				2069	goto cleanup;
				2070	}
				2071	}
				2072
				2073	/*
				2074	* the bbio may be freed once we submit the last bio. Make sure
				2075	* not to touch it after that
				2076	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2077	atomic_set(&rbio->stripes_pending, bios_to_read);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2078	while (1) {
				2079	bio = bio_list_pop(&bio_list);
				2080	if (!bio)
				2081	break;
				2082
				2083	bio->bi_private = rbio;
				2084	bio->bi_end_io = raid_recover_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2085	bio_set_op_attrs(bio, REQ_OP_READ, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2086
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2087	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2088
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2089	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2090	}
				2091	out:
				2092	return 0;
				2093
				2094	cleanup:
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2095	if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				2096	rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2097	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2098	return -EIO;
				2099	}
				2100
				2101	/*
				2102	* the main entry point for reads from the higher layers. This
				2103	* is really only called when the normal read path had a failure,
				2104	* so we assume the bio they send down corresponds to a failed part
				2105	* of the drive.
				2106	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2107	int raid56_parity_recover(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2108	struct btrfs_bio *bbio, u64 stripe_len,
				2109	int mirror_num, int generic_io)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2110	{
				2111	struct btrfs_raid_bio *rbio;
				2112	int ret;
				2113
Liu Bo	abad60c	2017-03-29 10:54:26 -0700	[diff] [blame]	2114	if (generic_io) {
				2115	ASSERT(bbio->mirror_num == mirror_num);
				2116	btrfs_io_bio(bio)->mirror_num = mirror_num;
				2117	}
				2118
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2119	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	2120	if (IS_ERR(rbio)) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2121	if (generic_io)
				2122	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2123	return PTR_ERR(rbio);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	2124	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2125
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	2126	rbio->operation = BTRFS_RBIO_READ_REBUILD;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2127	bio_list_add(&rbio->bio_list, bio);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	2128	rbio->bio_list_bytes = bio->bi_iter.bi_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2129
				2130	rbio->faila = find_logical_bio_stripe(rbio, bio);
				2131	if (rbio->faila == -1) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2132	btrfs_warn(fs_info,
Liu Bo	e46a28c	2016-07-29 10:57:55 -0700	[diff] [blame]	2133	"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
				2134	__func__, (u64)bio->bi_iter.bi_sector << 9,
				2135	(u64)bio->bi_iter.bi_size, bbio->map_type);
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2136	if (generic_io)
				2137	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2138	kfree(rbio);
				2139	return -EIO;
				2140	}
				2141
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2142	if (generic_io) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2143	btrfs_bio_counter_inc_noblocked(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2144	rbio->generic_bio_cnt = 1;
				2145	} else {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2146	btrfs_get_bbio(bbio);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2147	}
				2148
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2149	/*
				2150	* reconstruct from the q stripe if they are
				2151	* asking for mirror 3
				2152	*/
				2153	if (mirror_num == 3)
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2154	rbio->failb = rbio->real_stripes - 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2155
				2156	ret = lock_stripe_add(rbio);
				2157
				2158	/*
				2159	* __raid56_parity_recover will end the bio with
				2160	* any errors it hits. We don't want to return
				2161	* its error value up the stack because our caller
				2162	* will end up calling bio_endio with any nonzero
				2163	* return
				2164	*/
				2165	if (ret == 0)
				2166	__raid56_parity_recover(rbio);
				2167	/*
				2168	* our rbio has been added to the list of
				2169	* rbios that will be handled after the
				2170	* currently lock owner is done
				2171	*/
				2172	return 0;
				2173
				2174	}
				2175
				2176	static void rmw_work(struct btrfs_work *work)
				2177	{
				2178	struct btrfs_raid_bio *rbio;
				2179
				2180	rbio = container_of(work, struct btrfs_raid_bio, work);
				2181	raid56_rmw_stripe(rbio);
				2182	}
				2183
				2184	static void read_rebuild_work(struct btrfs_work *work)
				2185	{
				2186	struct btrfs_raid_bio *rbio;
				2187
				2188	rbio = container_of(work, struct btrfs_raid_bio, work);
				2189	__raid56_parity_recover(rbio);
				2190	}
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2191
				2192	/*
				2193	* The following code is used to scrub/replace the parity stripe
				2194	*
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2195	* Caller must have already increased bio_counter for getting @bbio.
				2196	*
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2197	* Note: We need make sure all the pages that add into the scrub/replace
				2198	* raid bio are correct and not be changed during the scrub/replace. That
				2199	* is those pages just hold metadata or file data with checksum.
				2200	*/
				2201
				2202	struct btrfs_raid_bio *
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2203	raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2204	struct btrfs_bio *bbio, u64 stripe_len,
				2205	struct btrfs_device *scrub_dev,
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2206	unsigned long *dbitmap, int stripe_nsectors)
				2207	{
				2208	struct btrfs_raid_bio *rbio;
				2209	int i;
				2210
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2211	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2212	if (IS_ERR(rbio))
				2213	return NULL;
				2214	bio_list_add(&rbio->bio_list, bio);
				2215	/*
				2216	* This is a special bio which is used to hold the completion handler
				2217	* and make the scrub rbio is similar to the other types
				2218	*/
				2219	ASSERT(!bio->bi_iter.bi_size);
				2220	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
				2221
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2222	for (i = 0; i < rbio->real_stripes; i++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2223	if (bbio->stripes[i].dev == scrub_dev) {
				2224	rbio->scrubp = i;
				2225	break;
				2226	}
				2227	}
				2228
				2229	/* Now we just support the sectorsize equals to page size */
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2230	ASSERT(fs_info->sectorsize == PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2231	ASSERT(rbio->stripe_npages == stripe_nsectors);
				2232	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
				2233
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2234	/*
				2235	* We have already increased bio_counter when getting bbio, record it
				2236	* so we can free it at rbio_orig_end_io().
				2237	*/
				2238	rbio->generic_bio_cnt = 1;
				2239
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2240	return rbio;
				2241	}
				2242
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2243	/* Used for both parity scrub and missing. */
				2244	void raid56_add_scrub_pages(struct btrfs_raid_bio rbio, struct page page,
				2245	u64 logical)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2246	{
				2247	int stripe_offset;
				2248	int index;
				2249
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2250	ASSERT(logical >= rbio->bbio->raid_map[0]);
				2251	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2252	rbio->stripe_len * rbio->nr_data);
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2253	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2254	index = stripe_offset >> PAGE_SHIFT;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2255	rbio->bio_pages[index] = page;
				2256	}
				2257
				2258	/*
				2259	* We just scrub the parity that we have correct data on the same horizontal,
				2260	* so we needn't allocate all pages for all the stripes.
				2261	*/
				2262	static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
				2263	{
				2264	int i;
				2265	int bit;
				2266	int index;
				2267	struct page *page;
				2268
				2269	for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2270	for (i = 0; i < rbio->real_stripes; i++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2271	index = i * rbio->stripe_npages + bit;
				2272	if (rbio->stripe_pages[index])
				2273	continue;
				2274
				2275	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2276	if (!page)
				2277	return -ENOMEM;
				2278	rbio->stripe_pages[index] = page;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2279	}
				2280	}
				2281	return 0;
				2282	}
				2283
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2284	static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
				2285	int need_check)
				2286	{
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2287	struct btrfs_bio *bbio = rbio->bbio;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2288	void *pointers[rbio->real_stripes];
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2289	DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2290	int nr_data = rbio->nr_data;
				2291	int stripe;
				2292	int pagenr;
				2293	int p_stripe = -1;
				2294	int q_stripe = -1;
				2295	struct page *p_page = NULL;
				2296	struct page *q_page = NULL;
				2297	struct bio_list bio_list;
				2298	struct bio *bio;
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2299	int is_replace = 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2300	int ret;
				2301
				2302	bio_list_init(&bio_list);
				2303
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2304	if (rbio->real_stripes - rbio->nr_data == 1) {
				2305	p_stripe = rbio->real_stripes - 1;
				2306	} else if (rbio->real_stripes - rbio->nr_data == 2) {
				2307	p_stripe = rbio->real_stripes - 2;
				2308	q_stripe = rbio->real_stripes - 1;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2309	} else {
				2310	BUG();
				2311	}
				2312
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2313	if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
				2314	is_replace = 1;
				2315	bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
				2316	}
				2317
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2318	/*
				2319	* Because the higher layers(scrubber) are unlikely to
				2320	* use this area of the disk again soon, so don't cache
				2321	* it.
				2322	*/
				2323	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				2324
				2325	if (!need_check)
				2326	goto writeback;
				2327
				2328	p_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2329	if (!p_page)
				2330	goto cleanup;
				2331	SetPageUptodate(p_page);
				2332
				2333	if (q_stripe != -1) {
				2334	q_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2335	if (!q_page) {
				2336	__free_page(p_page);
				2337	goto cleanup;
				2338	}
				2339	SetPageUptodate(q_page);
				2340	}
				2341
				2342	atomic_set(&rbio->error, 0);
				2343
				2344	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2345	struct page *p;
				2346	void *parity;
				2347	/* first collect one page from each data stripe */
				2348	for (stripe = 0; stripe < nr_data; stripe++) {
				2349	p = page_in_rbio(rbio, stripe, pagenr, 0);
				2350	pointers[stripe] = kmap(p);
				2351	}
				2352
				2353	/* then add the parity stripe */
				2354	pointers[stripe++] = kmap(p_page);
				2355
				2356	if (q_stripe != -1) {
				2357
				2358	/*
				2359	* raid6, add the qstripe and call the
				2360	* library function to fill in our p/q
				2361	*/
				2362	pointers[stripe++] = kmap(q_page);
				2363
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2364	raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2365	pointers);
				2366	} else {
				2367	/* raid5 */
				2368	memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2369	run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2370	}
				2371
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	2372	/* Check scrubbing parity and repair it */
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2373	p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2374	parity = kmap(p);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2375	if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
				2376	memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2377	else
				2378	/* Parity is right, needn't writeback */
				2379	bitmap_clear(rbio->dbitmap, pagenr, 1);
				2380	kunmap(p);
				2381
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2382	for (stripe = 0; stripe < rbio->real_stripes; stripe++)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2383	kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
				2384	}
				2385
				2386	__free_page(p_page);
				2387	if (q_page)
				2388	__free_page(q_page);
				2389
				2390	writeback:
				2391	/*
				2392	* time to start writing. Make bios for everything from the
				2393	* higher layers (the bio_list in our rbio) and our p/q. Ignore
				2394	* everything else.
				2395	*/
				2396	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2397	struct page *page;
				2398
				2399	page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2400	ret = rbio_add_io_page(rbio, &bio_list,
				2401	page, rbio->scrubp, pagenr, rbio->stripe_len);
				2402	if (ret)
				2403	goto cleanup;
				2404	}
				2405
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2406	if (!is_replace)
				2407	goto submit_write;
				2408
				2409	for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
				2410	struct page *page;
				2411
				2412	page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2413	ret = rbio_add_io_page(rbio, &bio_list, page,
				2414	bbio->tgtdev_map[rbio->scrubp],
				2415	pagenr, rbio->stripe_len);
				2416	if (ret)
				2417	goto cleanup;
				2418	}
				2419
				2420	submit_write:
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2421	nr_data = bio_list_size(&bio_list);
				2422	if (!nr_data) {
				2423	/* Every parity is right */
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2424	rbio_orig_end_io(rbio, 0);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2425	return;
				2426	}
				2427
				2428	atomic_set(&rbio->stripes_pending, nr_data);
				2429
				2430	while (1) {
				2431	bio = bio_list_pop(&bio_list);
				2432	if (!bio)
				2433	break;
				2434
				2435	bio->bi_private = rbio;
Zhao Lei	a6111d11b	2016-01-12 17:52:13 +0800	[diff] [blame]	2436	bio->bi_end_io = raid_write_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2437	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2438
				2439	submit_bio(bio);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2440	}
				2441	return;
				2442
				2443	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2444	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2445	}
				2446
				2447	static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
				2448	{
				2449	if (stripe >= 0 && stripe < rbio->nr_data)
				2450	return 1;
				2451	return 0;
				2452	}
				2453
				2454	/*
				2455	* While we're doing the parity check and repair, we could have errors
				2456	* in reading pages off the disk. This checks for errors and if we're
				2457	* not able to read the page it'll trigger parity reconstruction. The
				2458	* parity scrub will be finished after we've reconstructed the failed
				2459	* stripes
				2460	*/
				2461	static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
				2462	{
				2463	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
				2464	goto cleanup;
				2465
				2466	if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
				2467	int dfail = 0, failp = -1;
				2468
				2469	if (is_data_stripe(rbio, rbio->faila))
				2470	dfail++;
				2471	else if (is_parity_stripe(rbio->faila))
				2472	failp = rbio->faila;
				2473
				2474	if (is_data_stripe(rbio, rbio->failb))
				2475	dfail++;
				2476	else if (is_parity_stripe(rbio->failb))
				2477	failp = rbio->failb;
				2478
				2479	/*
				2480	* Because we can not use a scrubbing parity to repair
				2481	* the data, so the capability of the repair is declined.
				2482	* (In the case of RAID5, we can not repair anything)
				2483	*/
				2484	if (dfail > rbio->bbio->max_errors - 1)
				2485	goto cleanup;
				2486
				2487	/*
				2488	* If all data is good, only parity is correctly, just
				2489	* repair the parity.
				2490	*/
				2491	if (dfail == 0) {
				2492	finish_parity_scrub(rbio, 0);
				2493	return;
				2494	}
				2495
				2496	/*
				2497	* Here means we got one corrupted data stripe and one
				2498	* corrupted parity on RAID6, if the corrupted parity
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	2499	* is scrubbing parity, luckily, use the other one to repair
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2500	* the data, or we can not repair the data stripe.
				2501	*/
				2502	if (failp != rbio->scrubp)
				2503	goto cleanup;
				2504
				2505	__raid_recover_end_io(rbio);
				2506	} else {
				2507	finish_parity_scrub(rbio, 1);
				2508	}
				2509	return;
				2510
				2511	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2512	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2513	}
				2514
				2515	/*
				2516	* end io for the read phase of the rmw cycle. All the bios here are physical
				2517	* stripe bios we've read from the disk so we can recalculate the parity of the
				2518	* stripe.
				2519	*
				2520	* This will usually kick off finish_rmw once all the bios are read in, but it
				2521	* may trigger parity reconstruction if we had any errors along the way
				2522	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2523	static void raid56_parity_scrub_end_io(struct bio *bio)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2524	{
				2525	struct btrfs_raid_bio *rbio = bio->bi_private;
				2526
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2527	if (bio->bi_error)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2528	fail_bio_stripe(rbio, bio);
				2529	else
				2530	set_bio_pages_uptodate(bio);
				2531
				2532	bio_put(bio);
				2533
				2534	if (!atomic_dec_and_test(&rbio->stripes_pending))
				2535	return;
				2536
				2537	/*
				2538	* this will normally call finish_rmw to start our write
				2539	* but if there are any failed stripes we'll reconstruct
				2540	* from parity first
				2541	*/
				2542	validate_rbio_for_parity_scrub(rbio);
				2543	}
				2544
				2545	static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
				2546	{
				2547	int bios_to_read = 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2548	struct bio_list bio_list;
				2549	int ret;
				2550	int pagenr;
				2551	int stripe;
				2552	struct bio *bio;
				2553
				2554	ret = alloc_rbio_essential_pages(rbio);
				2555	if (ret)
				2556	goto cleanup;
				2557
				2558	bio_list_init(&bio_list);
				2559
				2560	atomic_set(&rbio->error, 0);
				2561	/*
				2562	* build a list of bios to read all the missing parts of this
				2563	* stripe
				2564	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2565	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2566	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2567	struct page *page;
				2568	/*
				2569	* we want to find all the pages missing from
				2570	* the rbio and read them from the disk. If
				2571	* page_in_rbio finds a page in the bio list
				2572	* we don't need to read it off the stripe.
				2573	*/
				2574	page = page_in_rbio(rbio, stripe, pagenr, 1);
				2575	if (page)
				2576	continue;
				2577
				2578	page = rbio_stripe_page(rbio, stripe, pagenr);
				2579	/*
				2580	* the bio cache may have handed us an uptodate
				2581	* page. If so, be happy and use it
				2582	*/
				2583	if (PageUptodate(page))
				2584	continue;
				2585
				2586	ret = rbio_add_io_page(rbio, &bio_list, page,
				2587	stripe, pagenr, rbio->stripe_len);
				2588	if (ret)
				2589	goto cleanup;
				2590	}
				2591	}
				2592
				2593	bios_to_read = bio_list_size(&bio_list);
				2594	if (!bios_to_read) {
				2595	/*
				2596	* this can happen if others have merged with
				2597	* us, it means there is nothing left to read.
				2598	* But if there are missing devices it may not be
				2599	* safe to do the full stripe write yet.
				2600	*/
				2601	goto finish;
				2602	}
				2603
				2604	/*
				2605	* the bbio may be freed once we submit the last bio. Make sure
				2606	* not to touch it after that
				2607	*/
				2608	atomic_set(&rbio->stripes_pending, bios_to_read);
				2609	while (1) {
				2610	bio = bio_list_pop(&bio_list);
				2611	if (!bio)
				2612	break;
				2613
				2614	bio->bi_private = rbio;
				2615	bio->bi_end_io = raid56_parity_scrub_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2616	bio_set_op_attrs(bio, REQ_OP_READ, 0);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2617
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2618	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2619
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2620	submit_bio(bio);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2621	}
				2622	/* the actual write will happen once the reads are done */
				2623	return;
				2624
				2625	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2626	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2627	return;
				2628
				2629	finish:
				2630	validate_rbio_for_parity_scrub(rbio);
				2631	}
				2632
				2633	static void scrub_parity_work(struct btrfs_work *work)
				2634	{
				2635	struct btrfs_raid_bio *rbio;
				2636
				2637	rbio = container_of(work, struct btrfs_raid_bio, work);
				2638	raid56_parity_scrub_stripe(rbio);
				2639	}
				2640
				2641	static void async_scrub_parity(struct btrfs_raid_bio *rbio)
				2642	{
				2643	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				2644	scrub_parity_work, NULL, NULL);
				2645
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2646	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2647	}
				2648
				2649	void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
				2650	{
				2651	if (!lock_stripe_add(rbio))
				2652	async_scrub_parity(rbio);
				2653	}
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2654
				2655	/* The following code is used for dev replace of a missing RAID 5/6 device. */
				2656
				2657	struct btrfs_raid_bio *
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2658	raid56_alloc_missing_rbio(struct btrfs_fs_info fs_info, struct bio bio,
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2659	struct btrfs_bio *bbio, u64 length)
				2660	{
				2661	struct btrfs_raid_bio *rbio;
				2662
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2663	rbio = alloc_rbio(fs_info, bbio, length);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2664	if (IS_ERR(rbio))
				2665	return NULL;
				2666
				2667	rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
				2668	bio_list_add(&rbio->bio_list, bio);
				2669	/*
				2670	* This is a special bio which is used to hold the completion handler
				2671	* and make the scrub rbio is similar to the other types
				2672	*/
				2673	ASSERT(!bio->bi_iter.bi_size);
				2674
				2675	rbio->faila = find_logical_bio_stripe(rbio, bio);
				2676	if (rbio->faila == -1) {
				2677	BUG();
				2678	kfree(rbio);
				2679	return NULL;
				2680	}
				2681
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2682	/*
				2683	* When we get bbio, we have already increased bio_counter, record it
				2684	* so we can free it at rbio_orig_end_io()
				2685	*/
				2686	rbio->generic_bio_cnt = 1;
				2687
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2688	return rbio;
				2689	}
				2690
				2691	static void missing_raid56_work(struct btrfs_work *work)
				2692	{
				2693	struct btrfs_raid_bio *rbio;
				2694
				2695	rbio = container_of(work, struct btrfs_raid_bio, work);
				2696	__raid56_parity_recover(rbio);
				2697	}
				2698
				2699	static void async_missing_raid56(struct btrfs_raid_bio *rbio)
				2700	{
				2701	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				2702	missing_raid56_work, NULL, NULL);
				2703
				2704	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
				2705	}
				2706
				2707	void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
				2708	{
				2709	if (!lock_stripe_add(rbio))
				2710	async_missing_raid56(rbio);
				2711	}