dm cache: promotion optimisation for writes
If a write block triggers promotion and covers a whole block we can
avoid a copy.
Introduce dm_{hook,unhook}_bio to simplify saving and restoring bio
fields (bi_private is now used by overwrite). Switch writethrough
support over to using these helpers too.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 4b56406..655994f 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -61,6 +61,34 @@
/*----------------------------------------------------------------*/
+/*
+ * There are a couple of places where we let a bio run, but want to do some
+ * work before calling its endio function. We do this by temporarily
+ * changing the endio fn.
+ */
+struct dm_hook_info {
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+};
+
+static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
+ bio_end_io_t *bi_end_io, void *bi_private)
+{
+ h->bi_end_io = bio->bi_end_io;
+ h->bi_private = bio->bi_private;
+
+ bio->bi_end_io = bi_end_io;
+ bio->bi_private = bi_private;
+}
+
+static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
+{
+ bio->bi_end_io = h->bi_end_io;
+ bio->bi_private = h->bi_private;
+}
+
+/*----------------------------------------------------------------*/
+
#define PRISON_CELLS 1024
#define MIGRATION_POOL_SIZE 128
#define COMMIT_PERIOD HZ
@@ -214,7 +242,7 @@
*/
struct cache *cache;
dm_cblock_t cblock;
- bio_end_io_t *saved_bi_end_io;
+ struct dm_hook_info hook_info;
struct dm_bio_details bio_details;
};
@@ -231,6 +259,7 @@
bool writeback:1;
bool demote:1;
bool promote:1;
+ bool requeue_holder:1;
struct dm_bio_prison_cell *old_ocell;
struct dm_bio_prison_cell *new_ocell;
@@ -666,7 +695,8 @@
static void writethrough_endio(struct bio *bio, int err)
{
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
- bio->bi_end_io = pb->saved_bi_end_io;
+
+ dm_unhook_bio(&pb->hook_info, bio);
if (err) {
bio_endio(bio, err);
@@ -697,9 +727,8 @@
pb->cache = cache;
pb->cblock = cblock;
- pb->saved_bi_end_io = bio->bi_end_io;
+ dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
dm_bio_record(&pb->bio_details, bio);
- bio->bi_end_io = writethrough_endio;
remap_to_origin_clear_discard(pb->cache, bio, oblock);
}
@@ -841,7 +870,12 @@
cleanup_migration(mg);
} else {
- cell_defer(cache, mg->new_ocell, true);
+ if (mg->requeue_holder)
+ cell_defer(cache, mg->new_ocell, true);
+ else {
+ bio_endio(mg->new_ocell->holder, 0);
+ cell_defer(cache, mg->new_ocell, false);
+ }
clear_dirty(cache, mg->new_oblock, mg->cblock);
cleanup_migration(mg);
}
@@ -892,6 +926,42 @@
}
}
+static void overwrite_endio(struct bio *bio, int err)
+{
+ struct dm_cache_migration *mg = bio->bi_private;
+ struct cache *cache = mg->cache;
+ size_t pb_data_size = get_per_bio_data_size(cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+ unsigned long flags;
+
+ if (err)
+ mg->err = true;
+
+ spin_lock_irqsave(&cache->lock, flags);
+ list_add_tail(&mg->list, &cache->completed_migrations);
+ dm_unhook_bio(&pb->hook_info, bio);
+ mg->requeue_holder = false;
+ spin_unlock_irqrestore(&cache->lock, flags);
+
+ wake_worker(cache);
+}
+
+static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
+{
+ size_t pb_data_size = get_per_bio_data_size(mg->cache);
+ struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+ dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
+ remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+ generic_make_request(bio);
+}
+
+static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
+{
+ return (bio_data_dir(bio) == WRITE) &&
+ (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
+}
+
static void avoid_copy(struct dm_cache_migration *mg)
{
atomic_inc(&mg->cache->stats.copies_avoided);
@@ -906,9 +976,17 @@
if (mg->writeback || mg->demote)
avoid = !is_dirty(cache, mg->cblock) ||
is_discarded_oblock(cache, mg->old_oblock);
- else
+ else {
+ struct bio *bio = mg->new_ocell->holder;
+
avoid = is_discarded_oblock(cache, mg->new_oblock);
+ if (!avoid && bio_writes_complete_block(cache, bio)) {
+ issue_overwrite(mg, bio);
+ return;
+ }
+ }
+
avoid ? avoid_copy(mg) : issue_copy_real(mg);
}
@@ -998,6 +1076,7 @@
mg->writeback = false;
mg->demote = false;
mg->promote = true;
+ mg->requeue_holder = true;
mg->cache = cache;
mg->new_oblock = oblock;
mg->cblock = cblock;
@@ -1019,6 +1098,7 @@
mg->writeback = true;
mg->demote = false;
mg->promote = false;
+ mg->requeue_holder = true;
mg->cache = cache;
mg->old_oblock = oblock;
mg->cblock = cblock;
@@ -1042,6 +1122,7 @@
mg->writeback = false;
mg->demote = true;
mg->promote = true;
+ mg->requeue_holder = true;
mg->cache = cache;
mg->old_oblock = old_oblock;
mg->new_oblock = new_oblock;