dm: allow active and inactive tables to share dm_devs
Until this change, when loading a new DM table, DM core would re-open
all of the devices in the DM table. Now, DM core will avoid redundant
device opens (and closes when destroying the old table) if the old
table already has a device open using the same mode. This is achieved
by managing reference counts on the table_devices that DM core now
stores in the mapped_device structure (rather than in the dm_table
structure). So a mapped_device's active and inactive dm_tables' dm_dev
lists now just point to the dm_devs stored in the mapped_device's
table_devices list.
This improvement in DM core's device reference counting has the
side-effect of fixing a long-standing limitation of the multipath
target: a DM multipath table couldn't include any paths that were unusable
(failed). For example: if all paths have failed and you add a new,
working, path to the table; you can't use it since the table load would
fail due to it still containing failed paths. Now a re-load of a
multipath table can include failed devices and when those devices become
active again they can be used instantly.
The device list code in dm.c isn't a straight copy/paste from the code in
dm-table.c, but it's very close (aside from some variable renames). One
subtle difference is that find_table_device for the tables_devices list
will only match devices with the same name and mode. This is because we
don't want to upgrade a device's mode in the active table when an
inactive table is loaded.
Access to the mapped_device structure's tables_devices list requires a
mutex (tables_devices_lock), so that tables cannot be created and
destroyed concurrently.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 56a2c74..58f3927 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -142,6 +142,9 @@
*/
struct dm_table *map;
+ struct list_head table_devices;
+ struct mutex table_devices_lock;
+
unsigned long flags;
struct request_queue *queue;
@@ -212,6 +215,12 @@
struct bio_set *bs;
};
+struct table_device {
+ struct list_head list;
+ atomic_t count;
+ struct dm_dev dm_dev;
+};
+
#define RESERVED_BIO_BASED_IOS 16
#define RESERVED_REQUEST_BASED_IOS 256
#define RESERVED_MAX_IOS 1024
@@ -670,6 +679,120 @@
}
/*
+ * Open a table device so we can use it as a map destination.
+ */
+static int open_table_device(struct table_device *td, dev_t dev,
+ struct mapped_device *md)
+{
+ static char *_claim_ptr = "I belong to device-mapper";
+ struct block_device *bdev;
+
+ int r;
+
+ BUG_ON(td->dm_dev.bdev);
+
+ bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+
+ r = bd_link_disk_holder(bdev, dm_disk(md));
+ if (r) {
+ blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
+ return r;
+ }
+
+ td->dm_dev.bdev = bdev;
+ return 0;
+}
+
+/*
+ * Close a table device that we've been using.
+ */
+static void close_table_device(struct table_device *td, struct mapped_device *md)
+{
+ if (!td->dm_dev.bdev)
+ return;
+
+ bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
+ blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+ td->dm_dev.bdev = NULL;
+}
+
+static struct table_device *find_table_device(struct list_head *l, dev_t dev,
+ fmode_t mode) {
+ struct table_device *td;
+
+ list_for_each_entry(td, l, list)
+ if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
+ return td;
+
+ return NULL;
+}
+
+int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+ struct dm_dev **result) {
+ int r;
+ struct table_device *td;
+
+ mutex_lock(&md->table_devices_lock);
+ td = find_table_device(&md->table_devices, dev, mode);
+ if (!td) {
+ td = kmalloc(sizeof(*td), GFP_KERNEL);
+ if (!td) {
+ mutex_unlock(&md->table_devices_lock);
+ return -ENOMEM;
+ }
+
+ td->dm_dev.mode = mode;
+ td->dm_dev.bdev = NULL;
+
+ if ((r = open_table_device(td, dev, md))) {
+ mutex_unlock(&md->table_devices_lock);
+ kfree(td);
+ return r;
+ }
+
+ format_dev_t(td->dm_dev.name, dev);
+
+ atomic_set(&td->count, 0);
+ list_add(&td->list, &md->table_devices);
+ }
+ atomic_inc(&td->count);
+ mutex_unlock(&md->table_devices_lock);
+
+ *result = &td->dm_dev;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_get_table_device);
+
+void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
+{
+ struct table_device *td = container_of(d, struct table_device, dm_dev);
+
+ mutex_lock(&md->table_devices_lock);
+ if (atomic_dec_and_test(&td->count)) {
+ close_table_device(td, md);
+ list_del(&td->list);
+ kfree(td);
+ }
+ mutex_unlock(&md->table_devices_lock);
+}
+EXPORT_SYMBOL(dm_put_table_device);
+
+static void free_table_devices(struct list_head *devices)
+{
+ struct list_head *tmp, *next;
+
+ list_for_each_safe(tmp, next, devices) {
+ struct table_device *td = list_entry(tmp, struct table_device, list);
+
+ DMWARN("dm_destroy: %s still exists with %d references",
+ td->dm_dev.name, atomic_read(&td->count));
+ kfree(td);
+ }
+}
+
+/*
* Get the geometry associated with a dm device
*/
int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
@@ -1944,12 +2067,14 @@
md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock);
mutex_init(&md->type_lock);
+ mutex_init(&md->table_devices_lock);
spin_lock_init(&md->deferred_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
atomic_set(&md->event_nr, 0);
atomic_set(&md->uevent_seq, 0);
INIT_LIST_HEAD(&md->uevent_list);
+ INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
md->queue = blk_alloc_queue(GFP_KERNEL);
@@ -2035,6 +2160,7 @@
blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
cleanup_srcu_struct(&md->io_barrier);
+ free_table_devices(&md->table_devices);
free_minor(minor);
spin_lock(&_minor_lock);