[PATCH] md: Fixed refcounting/locking when attempting read error correction in raid10
We need to hold a reference to rdevs while reading and writing to attempt to
correct read errors. This reference must be taken under an rcu lock.
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ddc1dfc..1440935 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1407,36 +1407,45 @@
if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9;
+ rcu_read_lock();
do {
int d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
- test_bit(In_sync, &rdev->flags) &&
- sync_page_io(rdev->bdev,
- r10_bio->devs[sl].addr +
- sect + rdev->data_offset,
- s<<9,
- conf->tmppage, READ))
- success = 1;
- else {
- sl++;
- if (sl == conf->copies)
- sl = 0;
+ test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ success = sync_page_io(rdev->bdev,
+ r10_bio->devs[sl].addr +
+ sect + rdev->data_offset,
+ s<<9,
+ conf->tmppage, READ);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
+ if (success)
+ break;
}
+ sl++;
+ if (sl == conf->copies)
+ sl = 0;
} while (!success && sl != r10_bio->read_slot);
+ rcu_read_unlock();
if (success) {
int start = sl;
/* write it back and re-read */
+ rcu_read_lock();
while (sl != r10_bio->read_slot) {
int d;
if (sl==0)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
@@ -1444,6 +1453,8 @@
s<<9, conf->tmppage, WRITE) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
}
sl = start;
@@ -1453,17 +1464,22 @@
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
- rdev = conf->mirrors[d].rdev;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
if (sync_page_io(rdev->bdev,
r10_bio->devs[sl].addr +
sect + rdev->data_offset,
s<<9, conf->tmppage, READ) == 0)
/* Well, this device is dead */
md_error(mddev, rdev);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
}
+ rcu_read_unlock();
} else {
/* Cannot read from anywhere -- bye bye array */
md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);