block: add a bi_error field to struct bio
Currently we have two different ways to signal an I/O error on a BIO:
(1) by clearing the BIO_UPTODATE flag
(2) by returning a Linux errno value to the bi_end_io callback
The first one has the drawback of only communicating a single possible
error (-EIO), and the second one has the drawback of not beeing persistent
when bios are queued up, and are not passed along from child to parent
bio in the ever more popular chaining scenario. Having both mechanisms
available has the additional drawback of utterly confusing driver authors
and introducing bugs where various I/O submitters only deal with one of
them, and the others have to add boilerplate code to deal with both kinds
of error returns.
So add a new bi_error field to store an errno value directly in struct
bio and remove the existing mechanisms to clean all this up.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 940f2f3..929e9a2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -101,7 +101,7 @@
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped);
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
-static void end_reshape_write(struct bio *bio, int error);
+static void end_reshape_write(struct bio *bio);
static void end_reshape(struct r10conf *conf);
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -307,9 +307,9 @@
} else
done = 1;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
if (done) {
- bio_endio(bio, 0);
+ bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
* to go idle.
@@ -358,9 +358,9 @@
return r10_bio->devs[slot].devnum;
}
-static void raid10_end_read_request(struct bio *bio, int error)
+static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int uptodate = !bio->bi_error;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
@@ -438,9 +438,8 @@
}
}
-static void raid10_end_write_request(struct bio *bio, int error)
+static void raid10_end_write_request(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
int dev;
int dec_rdev = 1;
@@ -460,7 +459,7 @@
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (!uptodate) {
+ if (bio->bi_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@@ -957,7 +956,7 @@
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
- bio_endio(bio, 0);
+ bio_endio(bio);
else
generic_make_request(bio);
bio = next;
@@ -1133,7 +1132,7 @@
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */
- bio_endio(bio, 0);
+ bio_endio(bio);
else
generic_make_request(bio);
bio = next;
@@ -1916,7 +1915,7 @@
return err;
}
-static void end_sync_read(struct bio *bio, int error)
+static void end_sync_read(struct bio *bio)
{
struct r10bio *r10_bio = bio->bi_private;
struct r10conf *conf = r10_bio->mddev->private;
@@ -1928,7 +1927,7 @@
} else
d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
- if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (!bio->bi_error)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
@@ -1977,9 +1976,8 @@
}
}
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_write(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
@@ -1996,7 +1994,7 @@
else
rdev = conf->mirrors[d].rdev;
- if (!uptodate) {
+ if (bio->bi_error) {
if (repl)
md_error(mddev, rdev);
else {
@@ -2044,7 +2042,7 @@
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
+ if (!r10_bio->devs[i].bio->bi_error)
break;
if (i == conf->copies)
@@ -2064,7 +2062,7 @@
continue;
if (i == first)
continue;
- if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+ if (!r10_bio->devs[i].bio->bi_error) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
@@ -2706,8 +2704,7 @@
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (test_bit(BIO_UPTODATE,
- &r10_bio->devs[m].bio->bi_flags)) {
+ if (!r10_bio->devs[m].bio->bi_error) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2722,8 +2719,8 @@
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (test_bit(BIO_UPTODATE,
- &r10_bio->devs[m].repl_bio->bi_flags)) {
+
+ if (!r10_bio->devs[m].repl_bio->bi_error) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2748,8 +2745,7 @@
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL &&
- !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ } else if (bio != NULL && bio->bi_error) {
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
set_bit(R10BIO_Degraded,
@@ -3263,7 +3259,7 @@
bio = r10_bio->devs[i].bio;
bio_reset(bio);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
if (conf->mirrors[d].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
continue;
@@ -3300,7 +3296,7 @@
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
bio_reset(bio);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = -EIO;
sector = r10_bio->devs[i].addr;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -3377,7 +3373,7 @@
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- set_bit(BIO_UPTODATE, &bio->bi_flags);
+ bio->bi_error = 0;
generic_make_request(bio);
}
}
@@ -4380,7 +4376,7 @@
read_bio->bi_end_io = end_sync_read;
read_bio->bi_rw = READ;
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- __set_bit(BIO_UPTODATE, &read_bio->bi_flags);
+ read_bio->bi_error = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
@@ -4601,9 +4597,8 @@
return 0;
}
-static void end_reshape_write(struct bio *bio, int error)
+static void end_reshape_write(struct bio *bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
struct mddev *mddev = r10_bio->mddev;
struct r10conf *conf = mddev->private;
@@ -4620,7 +4615,7 @@
rdev = conf->mirrors[d].rdev;
}
- if (!uptodate) {
+ if (bio->bi_error) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}