block: use normal I/O path for discard requests
prepare_discard_fn() was being called in a place where memory allocation
was effectively impossible. This makes it inappropriate for all but
the most trivial translations of Linux's DISCARD operation to the block
command set. Additionally adding a payload there makes the ownership
of the bio backing unclear as it's now allocated by the device driver
and not the submitter as usual.
It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether
the queue supports discard operations or not. blkdev_issue_discard now
allocates a one-page, sector-length payload which is the right thing
for the common ATA and SCSI implementations.
The mtd implementation of prepare_discard_fn() is replaced with simply
checking for the request being a discard.
Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx>
which did the prepare_discard_fn but not the different payload allocation
yet.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab3..21f5025 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@
if (bio->bi_private)
complete(bio->bi_private);
+ __free_page(bio_page(bio));
bio_put(bio);
}
@@ -372,26 +373,44 @@
struct request_queue *q = bdev_get_queue(bdev);
int type = flags & DISCARD_FL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
+ struct bio *bio;
+ struct page *page;
int ret = 0;
if (!q)
return -ENXIO;
- if (!q->prepare_discard_fn)
+ if (!blk_queue_discard(q))
return -EOPNOTSUPP;
while (nr_sects && !ret) {
- struct bio *bio = bio_alloc(gfp_mask, 0);
- if (!bio)
- return -ENOMEM;
+ unsigned int sector_size = q->limits.logical_block_size;
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ goto out;
+ bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
if (flags & DISCARD_FL_WAIT)
bio->bi_private = &wait;
- bio->bi_sector = sector;
+ /*
+ * Add a zeroed one-sector payload as that's what
+ * our current implementations need. If we'll ever need
+ * more the interface will need revisiting.
+ */
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto out_free_bio;
+ if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+ goto out_free_page;
+ /*
+ * And override the bio size - the way discard works we
+ * touch many more blocks on disk than the actual payload
+ * length.
+ */
if (nr_sects > queue_max_hw_sectors(q)) {
bio->bi_size = queue_max_hw_sectors(q) << 9;
nr_sects -= queue_max_hw_sectors(q);
@@ -414,5 +433,11 @@
bio_put(bio);
}
return ret;
+out_free_page:
+ __free_page(page);
+out_free_bio:
+ bio_put(bio);
+out:
+ return -ENOMEM;
}
EXPORT_SYMBOL(blkdev_issue_discard);