blob: 4284ad8d989297e853bb61934bc95f4fbfbc55ac [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
Milan Broz784aae72009-01-06 03:05:12 +00003 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * This file is released under the GPL.
6 */
7
Mike Snitzer4cc96132016-05-12 16:28:10 -04008#include "dm-core.h"
9#include "dm-rq.h"
Mike Anderson51e5b2b2007-10-19 22:48:00 +010010#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070011
12#include <linux/init.h>
13#include <linux/module.h>
Arjan van de Ven48c9c272006-03-27 01:18:20 -080014#include <linux/mutex.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010015#include <linux/sched/signal.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/blkpg.h>
17#include <linux/bio.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/mempool.h>
Dan Williamsf26c5712017-04-12 12:35:44 -070019#include <linux/dax.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <linux/slab.h>
21#include <linux/idr.h>
Dan Williams7e026c82017-05-29 12:57:56 -070022#include <linux/uio.h>
Darrick J. Wong3ac51e72006-03-27 01:17:54 -080023#include <linux/hdreg.h>
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +010024#include <linux/delay.h>
Mike Snitzerffcc3932014-10-28 18:34:52 -040025#include <linux/wait.h>
Christoph Hellwig71cdb692015-10-15 14:10:51 +020026#include <linux/pr.h>
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +030027#include <linux/refcount.h>
Li Zefan55782132009-06-09 13:43:05 +080028
Alasdair G Kergon72d94862006-06-26 00:27:35 -070029#define DM_MSG_PREFIX "core"
30
Milan Broz60935eb2009-06-22 10:12:30 +010031/*
32 * Cookies are numeric values sent with CHANGE and REMOVE
33 * uevents while resuming, removing or renaming the device.
34 */
35#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
36#define DM_COOKIE_LENGTH 24
37
Linus Torvalds1da177e2005-04-16 15:20:36 -070038static const char *_name = DM_NAME;
39
40static unsigned int major = 0;
41static unsigned int _major = 0;
42
Alasdair G Kergond15b7742011-08-02 12:32:01 +010043static DEFINE_IDR(_minor_idr);
44
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -070045static DEFINE_SPINLOCK(_minor_lock);
Mikulas Patocka2c140a22013-11-01 18:27:41 -040046
47static void do_deferred_remove(struct work_struct *w);
48
49static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
50
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -040051static struct workqueue_struct *deferred_remove_workqueue;
52
Mikulas Patocka93e64422017-01-16 16:05:59 -050053atomic_t dm_global_event_nr = ATOMIC_INIT(0);
54DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
55
Mikulas Patocka62e08242017-09-20 07:29:49 -040056void dm_issue_global_event(void)
57{
58 atomic_inc(&dm_global_event_nr);
59 wake_up(&dm_global_eventq);
60}
61
Linus Torvalds1da177e2005-04-16 15:20:36 -070062/*
Mike Snitzer64f52b02017-12-11 23:17:47 -050063 * One of these is allocated (on-stack) per original bio.
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 */
Mike Snitzer64f52b02017-12-11 23:17:47 -050065struct clone_info {
66 struct mapped_device *md;
67 struct dm_table *map;
68 struct bio *bio;
69 struct dm_io *io;
70 sector_t sector;
71 unsigned sector_count;
72};
73
74/*
75 * One of these is allocated per clone bio.
76 */
77#define DM_TIO_MAGIC 7282014
78struct dm_target_io {
79 unsigned magic;
80 struct dm_io *io;
81 struct dm_target *ti;
82 unsigned target_bio_nr;
83 unsigned *len_ptr;
84 bool inside_dm_io;
85 struct bio clone;
86};
87
88/*
89 * One of these is allocated per original bio.
90 * It contains the first clone used for that original.
91 */
92#define DM_IO_MAGIC 5191977
Linus Torvalds1da177e2005-04-16 15:20:36 -070093struct dm_io {
Mike Snitzer64f52b02017-12-11 23:17:47 -050094 unsigned magic;
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 struct mapped_device *md;
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +020096 blk_status_t status;
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 atomic_t io_count;
Mike Snitzer745dc572017-12-11 20:51:50 -050098 struct bio *orig_bio;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -080099 unsigned long start_time;
Kiyoshi Uedaf88fb982009-10-16 23:18:15 +0100100 spinlock_t endio_lock;
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400101 struct dm_stats_aux stats_aux;
Mike Snitzer64f52b02017-12-11 23:17:47 -0500102 /* last member of dm_target_io is 'struct bio' */
103 struct dm_target_io tio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104};
105
Mike Snitzer64f52b02017-12-11 23:17:47 -0500106void *dm_per_bio_data(struct bio *bio, size_t data_size)
107{
108 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
109 if (!tio->inside_dm_io)
110 return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
111 return (char *)bio - offsetof(struct dm_target_io, clone) - offsetof(struct dm_io, tio) - data_size;
112}
113EXPORT_SYMBOL_GPL(dm_per_bio_data);
114
115struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
116{
117 struct dm_io *io = (struct dm_io *)((char *)data + data_size);
118 if (io->magic == DM_IO_MAGIC)
119 return (struct bio *)((char *)io + offsetof(struct dm_io, tio) + offsetof(struct dm_target_io, clone));
120 BUG_ON(io->magic != DM_TIO_MAGIC);
121 return (struct bio *)((char *)io + offsetof(struct dm_target_io, clone));
122}
123EXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
124
125unsigned dm_bio_get_target_bio_nr(const struct bio *bio)
126{
127 return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
128}
129EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
130
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -0700131#define MINOR_ALLOCED ((void *)-1)
132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Bits for the md->flags field.
135 */
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +0100136#define DMF_BLOCK_IO_FOR_SUSPEND 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137#define DMF_SUSPENDED 1
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -0800138#define DMF_FROZEN 2
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700139#define DMF_FREEING 3
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700140#define DMF_DELETING 4
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800141#define DMF_NOFLUSH_SUSPENDING 5
Kent Overstreet8ae12662015-04-27 23:48:34 -0700142#define DMF_DEFERRED_REMOVE 6
143#define DMF_SUSPENDED_INTERNALLY 7
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
Mike Snitzer115485e2016-02-22 12:16:21 -0500145#define DM_NUMA_NODE NUMA_NO_NODE
Mike Snitzer115485e2016-02-22 12:16:21 -0500146static int dm_numa_node = DM_NUMA_NODE;
Mike Snitzerfaad87d2016-01-28 16:52:56 -0500147
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +0100148/*
149 * For mempools pre-allocation at the table loading time.
150 */
151struct dm_md_mempools {
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +0100152 struct bio_set *bs;
Mike Snitzer64f52b02017-12-11 23:17:47 -0500153 struct bio_set *io_bs;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +0100154};
155
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500156struct table_device {
157 struct list_head list;
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +0300158 refcount_t count;
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500159 struct dm_dev dm_dev;
160};
161
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000162static struct kmem_cache *_rq_tio_cache;
Mike Snitzer1ae49ea2014-12-05 17:11:05 -0500163static struct kmem_cache *_rq_cache;
Kent Overstreet94818742012-09-07 13:44:01 -0700164
Mike Snitzerf4790822013-09-12 18:06:12 -0400165/*
Mike Snitzere8603132013-09-12 18:06:12 -0400166 * Bio-based DM's mempools' reserved IOs set by the user.
167 */
Mike Snitzer4cc96132016-05-12 16:28:10 -0400168#define RESERVED_BIO_BASED_IOS 16
Mike Snitzere8603132013-09-12 18:06:12 -0400169static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
170
Mike Snitzer115485e2016-02-22 12:16:21 -0500171static int __dm_get_module_param_int(int *module_param, int min, int max)
172{
Mark Rutland6aa7de02017-10-23 14:07:29 -0700173 int param = READ_ONCE(*module_param);
Mike Snitzer115485e2016-02-22 12:16:21 -0500174 int modified_param = 0;
175 bool modified = true;
176
177 if (param < min)
178 modified_param = min;
179 else if (param > max)
180 modified_param = max;
181 else
182 modified = false;
183
184 if (modified) {
185 (void)cmpxchg(module_param, param, modified_param);
186 param = modified_param;
187 }
188
189 return param;
190}
191
Mike Snitzer4cc96132016-05-12 16:28:10 -0400192unsigned __dm_get_module_param(unsigned *module_param,
193 unsigned def, unsigned max)
Mike Snitzerf4790822013-09-12 18:06:12 -0400194{
Mark Rutland6aa7de02017-10-23 14:07:29 -0700195 unsigned param = READ_ONCE(*module_param);
Mike Snitzer09c2d532015-02-27 22:25:26 -0500196 unsigned modified_param = 0;
Mike Snitzerf4790822013-09-12 18:06:12 -0400197
Mike Snitzer09c2d532015-02-27 22:25:26 -0500198 if (!param)
199 modified_param = def;
200 else if (param > max)
201 modified_param = max;
Mike Snitzerf4790822013-09-12 18:06:12 -0400202
Mike Snitzer09c2d532015-02-27 22:25:26 -0500203 if (modified_param) {
204 (void)cmpxchg(module_param, param, modified_param);
205 param = modified_param;
Mike Snitzerf4790822013-09-12 18:06:12 -0400206 }
207
Mike Snitzer09c2d532015-02-27 22:25:26 -0500208 return param;
Mike Snitzerf4790822013-09-12 18:06:12 -0400209}
210
Mike Snitzere8603132013-09-12 18:06:12 -0400211unsigned dm_get_reserved_bio_based_ios(void)
212{
Mike Snitzer09c2d532015-02-27 22:25:26 -0500213 return __dm_get_module_param(&reserved_bio_based_ios,
Mike Snitzer4cc96132016-05-12 16:28:10 -0400214 RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
Mike Snitzere8603132013-09-12 18:06:12 -0400215}
216EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
217
Mike Snitzer115485e2016-02-22 12:16:21 -0500218static unsigned dm_get_numa_node(void)
219{
220 return __dm_get_module_param_int(&dm_numa_node,
221 DM_NUMA_NODE, num_online_nodes() - 1);
222}
223
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224static int __init local_init(void)
225{
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100226 int r = -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000228 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
229 if (!_rq_tio_cache)
Mike Snitzerdde1e1e2017-12-11 23:28:13 -0500230 return r;
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000231
Mike Snitzereca7ee62016-02-20 13:45:38 -0500232 _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
Mike Snitzer1ae49ea2014-12-05 17:11:05 -0500233 __alignof__(struct request), 0, NULL);
234 if (!_rq_cache)
235 goto out_free_rq_tio_cache;
236
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100237 r = dm_uevent_init();
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100238 if (r)
Mike Snitzer1ae49ea2014-12-05 17:11:05 -0500239 goto out_free_rq_cache;
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100240
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -0400241 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
242 if (!deferred_remove_workqueue) {
243 r = -ENOMEM;
244 goto out_uevent_exit;
245 }
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 _major = major;
248 r = register_blkdev(_major, _name);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100249 if (r < 0)
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -0400250 goto out_free_workqueue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
252 if (!_major)
253 _major = r;
254
255 return 0;
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100256
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -0400257out_free_workqueue:
258 destroy_workqueue(deferred_remove_workqueue);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100259out_uevent_exit:
260 dm_uevent_exit();
Mike Snitzer1ae49ea2014-12-05 17:11:05 -0500261out_free_rq_cache:
262 kmem_cache_destroy(_rq_cache);
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000263out_free_rq_tio_cache:
264 kmem_cache_destroy(_rq_tio_cache);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100265
266 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267}
268
269static void local_exit(void)
270{
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400271 flush_scheduled_work();
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -0400272 destroy_workqueue(deferred_remove_workqueue);
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400273
Mike Snitzer1ae49ea2014-12-05 17:11:05 -0500274 kmem_cache_destroy(_rq_cache);
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000275 kmem_cache_destroy(_rq_tio_cache);
Akinobu Mita00d59402007-07-17 04:03:46 -0700276 unregister_blkdev(_major, _name);
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100277 dm_uevent_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
279 _major = 0;
280
281 DMINFO("cleaned up");
282}
283
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000284static int (*_inits[])(void) __initdata = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 local_init,
286 dm_target_init,
287 dm_linear_init,
288 dm_stripe_init,
Mikulas Patocka952b3552009-12-10 23:51:57 +0000289 dm_io_init,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100290 dm_kcopyd_init,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 dm_interface_init,
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400292 dm_statistics_init,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293};
294
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000295static void (*_exits[])(void) = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 local_exit,
297 dm_target_exit,
298 dm_linear_exit,
299 dm_stripe_exit,
Mikulas Patocka952b3552009-12-10 23:51:57 +0000300 dm_io_exit,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100301 dm_kcopyd_exit,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 dm_interface_exit,
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400303 dm_statistics_exit,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304};
305
306static int __init dm_init(void)
307{
308 const int count = ARRAY_SIZE(_inits);
309
310 int r, i;
311
312 for (i = 0; i < count; i++) {
313 r = _inits[i]();
314 if (r)
315 goto bad;
316 }
317
318 return 0;
319
320 bad:
321 while (i--)
322 _exits[i]();
323
324 return r;
325}
326
327static void __exit dm_exit(void)
328{
329 int i = ARRAY_SIZE(_exits);
330
331 while (i--)
332 _exits[i]();
Alasdair G Kergond15b7742011-08-02 12:32:01 +0100333
334 /*
335 * Should be empty by this point.
336 */
Alasdair G Kergond15b7742011-08-02 12:32:01 +0100337 idr_destroy(&_minor_idr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338}
339
340/*
341 * Block device functions
342 */
Mike Anderson432a2122009-12-10 23:52:20 +0000343int dm_deleting_md(struct mapped_device *md)
344{
345 return test_bit(DMF_DELETING, &md->flags);
346}
347
Al Virofe5f9f22008-03-02 10:29:31 -0500348static int dm_blk_open(struct block_device *bdev, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349{
350 struct mapped_device *md;
351
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700352 spin_lock(&_minor_lock);
353
Al Virofe5f9f22008-03-02 10:29:31 -0500354 md = bdev->bd_disk->private_data;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700355 if (!md)
356 goto out;
357
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700358 if (test_bit(DMF_FREEING, &md->flags) ||
Mike Anderson432a2122009-12-10 23:52:20 +0000359 dm_deleting_md(md)) {
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700360 md = NULL;
361 goto out;
362 }
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 dm_get(md);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700365 atomic_inc(&md->open_count);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700366out:
367 spin_unlock(&_minor_lock);
368
369 return md ? 0 : -ENXIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370}
371
Al Virodb2a1442013-05-05 21:52:57 -0400372static void dm_blk_close(struct gendisk *disk, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373{
Mike Snitzer63a4f062015-03-23 17:01:43 -0400374 struct mapped_device *md;
Arnd Bergmann6e9624b2010-08-07 18:25:34 +0200375
Milan Broz4a1aeb92011-01-13 19:59:48 +0000376 spin_lock(&_minor_lock);
377
Mike Snitzer63a4f062015-03-23 17:01:43 -0400378 md = disk->private_data;
379 if (WARN_ON(!md))
380 goto out;
381
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400382 if (atomic_dec_and_test(&md->open_count) &&
383 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
Mikulas Patockaacfe0ad2014-06-14 13:44:31 -0400384 queue_work(deferred_remove_workqueue, &deferred_remove_work);
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400385
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 dm_put(md);
Mike Snitzer63a4f062015-03-23 17:01:43 -0400387out:
Milan Broz4a1aeb92011-01-13 19:59:48 +0000388 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389}
390
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700391int dm_open_count(struct mapped_device *md)
392{
393 return atomic_read(&md->open_count);
394}
395
396/*
397 * Guarantees nothing is using the device before it's deleted.
398 */
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400399int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700400{
401 int r = 0;
402
403 spin_lock(&_minor_lock);
404
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400405 if (dm_open_count(md)) {
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700406 r = -EBUSY;
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400407 if (mark_deferred)
408 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
409 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
410 r = -EEXIST;
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700411 else
412 set_bit(DMF_DELETING, &md->flags);
413
414 spin_unlock(&_minor_lock);
415
416 return r;
417}
418
Mikulas Patocka2c140a22013-11-01 18:27:41 -0400419int dm_cancel_deferred_remove(struct mapped_device *md)
420{
421 int r = 0;
422
423 spin_lock(&_minor_lock);
424
425 if (test_bit(DMF_DELETING, &md->flags))
426 r = -EBUSY;
427 else
428 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
429
430 spin_unlock(&_minor_lock);
431
432 return r;
433}
434
435static void do_deferred_remove(struct work_struct *w)
436{
437 dm_deferred_remove();
438}
439
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400440sector_t dm_get_size(struct mapped_device *md)
441{
442 return get_capacity(md->disk);
443}
444
Mike Snitzer9974fa22014-02-28 15:33:43 +0100445struct request_queue *dm_get_md_queue(struct mapped_device *md)
446{
447 return md->queue;
448}
449
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400450struct dm_stats *dm_get_stats(struct mapped_device *md)
451{
452 return &md->stats;
453}
454
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800455static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
456{
457 struct mapped_device *md = bdev->bd_disk->private_data;
458
459 return dm_get_geometry(md, geo);
460}
461
Mike Snitzer956a4022016-02-18 16:13:51 -0500462static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
463 struct block_device **bdev,
464 fmode_t *mode)
Milan Brozaa129a22006-10-03 01:15:15 -0700465{
Mike Snitzer66482022016-02-18 15:44:39 -0500466 struct dm_target *tgt;
Hannes Reinecke6c182cd2013-07-10 23:41:15 +0100467 struct dm_table *map;
Mike Snitzer956a4022016-02-18 16:13:51 -0500468 int srcu_idx, r;
Milan Brozaa129a22006-10-03 01:15:15 -0700469
Hannes Reinecke6c182cd2013-07-10 23:41:15 +0100470retry:
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200471 r = -ENOTTY;
Mike Snitzer956a4022016-02-18 16:13:51 -0500472 map = dm_get_live_table(md, &srcu_idx);
Milan Brozaa129a22006-10-03 01:15:15 -0700473 if (!map || !dm_table_get_size(map))
474 goto out;
475
476 /* We only support devices that have a single target */
477 if (dm_table_get_num_targets(map) != 1)
478 goto out;
479
Mike Snitzer66482022016-02-18 15:44:39 -0500480 tgt = dm_table_get_target(map, 0);
481 if (!tgt->type->prepare_ioctl)
Mike Snitzer4d341d82014-11-16 14:21:47 -0500482 goto out;
Milan Brozaa129a22006-10-03 01:15:15 -0700483
Kiyoshi Ueda4f186f82009-12-10 23:52:26 +0000484 if (dm_suspended_md(md)) {
Milan Brozaa129a22006-10-03 01:15:15 -0700485 r = -EAGAIN;
486 goto out;
487 }
488
Mike Snitzer66482022016-02-18 15:44:39 -0500489 r = tgt->type->prepare_ioctl(tgt, bdev, mode);
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200490 if (r < 0)
491 goto out;
492
Mike Snitzer956a4022016-02-18 16:13:51 -0500493 bdgrab(*bdev);
494 dm_put_live_table(md, srcu_idx);
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200495 return r;
Milan Brozaa129a22006-10-03 01:15:15 -0700496
497out:
Mike Snitzer956a4022016-02-18 16:13:51 -0500498 dm_put_live_table(md, srcu_idx);
Junichi Nomura5bbbfdf2015-11-17 09:39:26 +0000499 if (r == -ENOTCONN && !fatal_signal_pending(current)) {
Hannes Reinecke6c182cd2013-07-10 23:41:15 +0100500 msleep(10);
501 goto retry;
502 }
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200503 return r;
504}
Hannes Reinecke6c182cd2013-07-10 23:41:15 +0100505
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200506static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
507 unsigned int cmd, unsigned long arg)
508{
509 struct mapped_device *md = bdev->bd_disk->private_data;
Mike Snitzer956a4022016-02-18 16:13:51 -0500510 int r;
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200511
Mike Snitzer956a4022016-02-18 16:13:51 -0500512 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200513 if (r < 0)
514 return r;
515
516 if (r > 0) {
517 /*
Christoph Hellwige980f622017-02-04 10:45:03 +0100518 * Target determined this ioctl is being issued against a
519 * subset of the parent bdev; require extra privileges.
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200520 */
Christoph Hellwige980f622017-02-04 10:45:03 +0100521 if (!capable(CAP_SYS_RAWIO)) {
522 DMWARN_LIMIT(
523 "%s: sending ioctl %x to DM device without required privilege.",
524 current->comm, cmd);
525 r = -ENOIOCTLCMD;
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200526 goto out;
Christoph Hellwige980f622017-02-04 10:45:03 +0100527 }
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200528 }
529
Mike Snitzer66482022016-02-18 15:44:39 -0500530 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
Christoph Hellwige56f81e2015-10-15 14:10:50 +0200531out:
Mike Snitzer956a4022016-02-18 16:13:51 -0500532 bdput(bdev);
Milan Brozaa129a22006-10-03 01:15:15 -0700533 return r;
534}
535
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100536static struct dm_io *alloc_io(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537{
Mike Snitzer64f52b02017-12-11 23:17:47 -0500538 struct dm_io *io;
539 struct dm_target_io *tio;
540 struct bio *clone;
541
542 clone = bio_alloc_bioset(GFP_NOIO, 0, md->io_bs);
543 if (!clone)
544 return NULL;
545
546 tio = container_of(clone, struct dm_target_io, clone);
547 tio->inside_dm_io = true;
548 tio->io = NULL;
549
550 io = container_of(tio, struct dm_io, tio);
551 io->magic = DM_IO_MAGIC;
552
553 return io;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554}
555
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100556static void free_io(struct mapped_device *md, struct dm_io *io)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557{
Mike Snitzer64f52b02017-12-11 23:17:47 -0500558 bio_put(&io->tio.clone);
559}
560
561static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
562 unsigned target_bio_nr, gfp_t gfp_mask)
563{
564 struct dm_target_io *tio;
565
566 if (!ci->io->tio.io) {
567 /* the dm_target_io embedded in ci->io is available */
568 tio = &ci->io->tio;
569 } else {
570 struct bio *clone = bio_alloc_bioset(gfp_mask, 0, ci->md->bs);
571 if (!clone)
572 return NULL;
573
574 tio = container_of(clone, struct dm_target_io, clone);
575 tio->inside_dm_io = false;
576 }
577
578 tio->magic = DM_TIO_MAGIC;
579 tio->io = ci->io;
580 tio->ti = ti;
581 tio->target_bio_nr = target_bio_nr;
582
583 return tio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584}
585
Mike Snitzercfae7522016-04-11 12:05:38 -0400586static void free_tio(struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587{
Mike Snitzer64f52b02017-12-11 23:17:47 -0500588 if (tio->inside_dm_io)
589 return;
Mikulas Patockadba14162012-10-12 21:02:15 +0100590 bio_put(&tio->clone);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591}
592
Mike Snitzer4cc96132016-05-12 16:28:10 -0400593int md_in_flight(struct mapped_device *md)
Kiyoshi Ueda90abb8c2009-12-10 23:52:13 +0000594{
595 return atomic_read(&md->pending[READ]) +
596 atomic_read(&md->pending[WRITE]);
597}
598
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800599static void start_io_acct(struct dm_io *io)
600{
601 struct mapped_device *md = io->md;
Mike Snitzer745dc572017-12-11 20:51:50 -0500602 struct bio *bio = io->orig_bio;
Tejun Heoc9959052008-08-25 19:47:21 +0900603 int cpu;
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400604 int rw = bio_data_dir(bio);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800605
606 io->start_time = jiffies;
607
Tejun Heo074a7ac2008-08-25 19:56:14 +0900608 cpu = part_stat_lock();
Jens Axboed62e26b2017-06-30 21:55:08 -0600609 part_round_stats(md->queue, cpu, &dm_disk(md)->part0);
Tejun Heo074a7ac2008-08-25 19:56:14 +0900610 part_stat_unlock();
Shaohua Li1e9bb882011-03-22 08:35:35 +0100611 atomic_set(&dm_disk(md)->part0.in_flight[rw],
612 atomic_inc_return(&md->pending[rw]));
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400613
614 if (unlikely(dm_stats_used(&md->stats)))
Mike Christie528ec5a2016-06-05 14:32:03 -0500615 dm_stats_account_io(&md->stats, bio_data_dir(bio),
616 bio->bi_iter.bi_sector, bio_sectors(bio),
617 false, 0, &io->stats_aux);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800618}
619
Mikulas Patockad221d2e2008-11-13 23:39:10 +0000620static void end_io_acct(struct dm_io *io)
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800621{
622 struct mapped_device *md = io->md;
Mike Snitzer745dc572017-12-11 20:51:50 -0500623 struct bio *bio = io->orig_bio;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800624 unsigned long duration = jiffies - io->start_time;
Gu Zheng18c0b222014-11-24 11:05:26 +0800625 int pending;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800626 int rw = bio_data_dir(bio);
627
Jens Axboed62e26b2017-06-30 21:55:08 -0600628 generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800629
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400630 if (unlikely(dm_stats_used(&md->stats)))
Mike Christie528ec5a2016-06-05 14:32:03 -0500631 dm_stats_account_io(&md->stats, bio_data_dir(bio),
632 bio->bi_iter.bi_sector, bio_sectors(bio),
633 true, duration, &io->stats_aux);
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -0400634
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100635 /*
636 * After this is decremented the bio must not be touched if it is
Tejun Heod87f4c12010-09-03 11:56:19 +0200637 * a flush.
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100638 */
Shaohua Li1e9bb882011-03-22 08:35:35 +0100639 pending = atomic_dec_return(&md->pending[rw]);
640 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
Nikanth Karthikesan316d3152009-10-06 20:16:55 +0200641 pending += atomic_read(&md->pending[rw^0x1]);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800642
Mikulas Patockad221d2e2008-11-13 23:39:10 +0000643 /* nudge anyone waiting on suspend queue */
644 if (!pending)
645 wake_up(&md->wait);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800646}
647
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648/*
649 * Add the bio to the list of deferred io.
650 */
Mikulas Patocka92c63902009-04-09 00:27:15 +0100651static void queue_io(struct mapped_device *md, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652{
Kiyoshi Ueda054474202010-09-08 18:07:01 +0200653 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
Kiyoshi Ueda054474202010-09-08 18:07:01 +0200655 spin_lock_irqsave(&md->deferred_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 bio_list_add(&md->deferred, bio);
Kiyoshi Ueda054474202010-09-08 18:07:01 +0200657 spin_unlock_irqrestore(&md->deferred_lock, flags);
Tejun Heo6a8736d2010-09-08 18:07:00 +0200658 queue_work(md->wq, &md->work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659}
660
661/*
662 * Everyone (including functions in this file), should use this
663 * function to access the md->map field, and make sure they call
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +0100664 * dm_put_live_table() when finished.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 */
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +0100666struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +0100668 *srcu_idx = srcu_read_lock(&md->io_barrier);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +0100670 return srcu_dereference(md->map, &md->io_barrier);
671}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +0100673void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
674{
675 srcu_read_unlock(&md->io_barrier, srcu_idx);
676}
677
678void dm_sync_table(struct mapped_device *md)
679{
680 synchronize_srcu(&md->io_barrier);
681 synchronize_rcu_expedited();
682}
683
684/*
685 * A fast alternative to dm_get_live_table/dm_put_live_table.
686 * The caller must not block between these two functions.
687 */
688static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
689{
690 rcu_read_lock();
691 return rcu_dereference(md->map);
692}
693
694static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
695{
696 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697}
698
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800699/*
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500700 * Open a table device so we can use it as a map destination.
701 */
702static int open_table_device(struct table_device *td, dev_t dev,
703 struct mapped_device *md)
704{
705 static char *_claim_ptr = "I belong to device-mapper";
706 struct block_device *bdev;
707
708 int r;
709
710 BUG_ON(td->dm_dev.bdev);
711
712 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
713 if (IS_ERR(bdev))
714 return PTR_ERR(bdev);
715
716 r = bd_link_disk_holder(bdev, dm_disk(md));
717 if (r) {
718 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
719 return r;
720 }
721
722 td->dm_dev.bdev = bdev;
Dan Williams817bf402017-04-12 13:37:44 -0700723 td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500724 return 0;
725}
726
727/*
728 * Close a table device that we've been using.
729 */
730static void close_table_device(struct table_device *td, struct mapped_device *md)
731{
732 if (!td->dm_dev.bdev)
733 return;
734
735 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
736 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
Dan Williams817bf402017-04-12 13:37:44 -0700737 put_dax(td->dm_dev.dax_dev);
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500738 td->dm_dev.bdev = NULL;
Dan Williams817bf402017-04-12 13:37:44 -0700739 td->dm_dev.dax_dev = NULL;
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500740}
741
742static struct table_device *find_table_device(struct list_head *l, dev_t dev,
743 fmode_t mode) {
744 struct table_device *td;
745
746 list_for_each_entry(td, l, list)
747 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
748 return td;
749
750 return NULL;
751}
752
753int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
754 struct dm_dev **result) {
755 int r;
756 struct table_device *td;
757
758 mutex_lock(&md->table_devices_lock);
759 td = find_table_device(&md->table_devices, dev, mode);
760 if (!td) {
Mike Snitzer115485e2016-02-22 12:16:21 -0500761 td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500762 if (!td) {
763 mutex_unlock(&md->table_devices_lock);
764 return -ENOMEM;
765 }
766
767 td->dm_dev.mode = mode;
768 td->dm_dev.bdev = NULL;
769
770 if ((r = open_table_device(td, dev, md))) {
771 mutex_unlock(&md->table_devices_lock);
772 kfree(td);
773 return r;
774 }
775
776 format_dev_t(td->dm_dev.name, dev);
777
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +0300778 refcount_set(&td->count, 1);
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500779 list_add(&td->list, &md->table_devices);
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +0300780 } else {
781 refcount_inc(&td->count);
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500782 }
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500783 mutex_unlock(&md->table_devices_lock);
784
785 *result = &td->dm_dev;
786 return 0;
787}
788EXPORT_SYMBOL_GPL(dm_get_table_device);
789
790void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
791{
792 struct table_device *td = container_of(d, struct table_device, dm_dev);
793
794 mutex_lock(&md->table_devices_lock);
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +0300795 if (refcount_dec_and_test(&td->count)) {
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500796 close_table_device(td, md);
797 list_del(&td->list);
798 kfree(td);
799 }
800 mutex_unlock(&md->table_devices_lock);
801}
802EXPORT_SYMBOL(dm_put_table_device);
803
804static void free_table_devices(struct list_head *devices)
805{
806 struct list_head *tmp, *next;
807
808 list_for_each_safe(tmp, next, devices) {
809 struct table_device *td = list_entry(tmp, struct table_device, list);
810
811 DMWARN("dm_destroy: %s still exists with %d references",
Elena Reshetovab0b4d7c2017-10-20 10:37:39 +0300812 td->dm_dev.name, refcount_read(&td->count));
Benjamin Marzinski86f11522014-08-13 13:53:43 -0500813 kfree(td);
814 }
815}
816
817/*
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800818 * Get the geometry associated with a dm device
819 */
820int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
821{
822 *geo = md->geometry;
823
824 return 0;
825}
826
827/*
828 * Set the geometry of a device.
829 */
830int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
831{
832 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
833
834 if (geo->start > sz) {
835 DMWARN("Start sector is beyond the geometry limits.");
836 return -EINVAL;
837 }
838
839 md->geometry = *geo;
840
841 return 0;
842}
843
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800844static int __noflush_suspending(struct mapped_device *md)
845{
846 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
847}
848
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849/*
850 * Decrements the number of outstanding ios that a bio has been
851 * cloned into, completing the original io if necc.
852 */
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200853static void dec_pending(struct dm_io *io, blk_status_t error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854{
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800855 unsigned long flags;
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200856 blk_status_t io_error;
Milan Brozb35f8ca2009-03-16 17:44:36 +0000857 struct bio *bio;
858 struct mapped_device *md = io->md;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800859
860 /* Push-back supersedes any I/O errors */
Kiyoshi Uedaf88fb982009-10-16 23:18:15 +0100861 if (unlikely(error)) {
862 spin_lock_irqsave(&io->endio_lock, flags);
Mike Snitzer745dc572017-12-11 20:51:50 -0500863 if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(md)))
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200864 io->status = error;
Kiyoshi Uedaf88fb982009-10-16 23:18:15 +0100865 spin_unlock_irqrestore(&io->endio_lock, flags);
866 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867
868 if (atomic_dec_and_test(&io->io_count)) {
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200869 if (io->status == BLK_STS_DM_REQUEUE) {
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800870 /*
871 * Target requested pushing back the I/O.
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800872 */
Mikulas Patocka022c2612009-04-02 19:55:39 +0100873 spin_lock_irqsave(&md->deferred_lock, flags);
Tejun Heo6a8736d2010-09-08 18:07:00 +0200874 if (__noflush_suspending(md))
Mike Snitzer745dc572017-12-11 20:51:50 -0500875 /* NOTE early return due to BLK_STS_DM_REQUEUE below */
876 bio_list_add_head(&md->deferred, io->orig_bio);
Tejun Heo6a8736d2010-09-08 18:07:00 +0200877 else
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800878 /* noflush suspend was interrupted. */
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200879 io->status = BLK_STS_IOERR;
Mikulas Patocka022c2612009-04-02 19:55:39 +0100880 spin_unlock_irqrestore(&md->deferred_lock, flags);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800881 }
882
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200883 io_error = io->status;
Mike Snitzer745dc572017-12-11 20:51:50 -0500884 bio = io->orig_bio;
Tejun Heo6a8736d2010-09-08 18:07:00 +0200885 end_io_acct(io);
886 free_io(md, io);
Jens Axboe2056a782006-03-23 20:00:26 +0100887
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200888 if (io_error == BLK_STS_DM_REQUEUE)
Tejun Heo6a8736d2010-09-08 18:07:00 +0200889 return;
890
Jens Axboe1eff9d32016-08-05 15:35:16 -0600891 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100892 /*
Tejun Heo6a8736d2010-09-08 18:07:00 +0200893 * Preflush done for flush with data, reissue
Mike Christie28a8f0d2016-06-05 14:32:25 -0500894 * without REQ_PREFLUSH.
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100895 */
Jens Axboe1eff9d32016-08-05 15:35:16 -0600896 bio->bi_opf &= ~REQ_PREFLUSH;
Tejun Heo6a8736d2010-09-08 18:07:00 +0200897 queue_io(md, bio);
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100898 } else {
Mike Snitzerb372d362010-09-08 18:07:01 +0200899 /* done with normal IO or empty flush */
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200900 bio->bi_status = io_error;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200901 bio_endio(bio);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800902 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 }
904}
905
Mike Snitzer4cc96132016-05-12 16:28:10 -0400906void disable_write_same(struct mapped_device *md)
Mike Snitzer7eee4ae2014-06-02 15:50:06 -0400907{
908 struct queue_limits *limits = dm_get_queue_limits(md);
909
910 /* device doesn't really support WRITE SAME, disable it */
911 limits->max_write_same_sectors = 0;
912}
913
Christoph Hellwigac62d622017-04-05 19:21:05 +0200914void disable_write_zeroes(struct mapped_device *md)
915{
916 struct queue_limits *limits = dm_get_queue_limits(md);
917
918 /* device doesn't really support WRITE ZEROES, disable it */
919 limits->max_write_zeroes_sectors = 0;
920}
921
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200922static void clone_endio(struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923{
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200924 blk_status_t error = bio->bi_status;
Mikulas Patockabfc6d412014-03-04 18:24:49 -0500925 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
Milan Brozb35f8ca2009-03-16 17:44:36 +0000926 struct dm_io *io = tio->io;
Stefan Bader9faf4002006-10-03 01:15:41 -0700927 struct mapped_device *md = tio->io->md;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 dm_endio_fn endio = tio->ti->type->end_io;
929
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200930 if (unlikely(error == BLK_STS_TARGET)) {
Christoph Hellwigac62d622017-04-05 19:21:05 +0200931 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
Christoph Hellwig74d46992017-08-23 19:10:32 +0200932 !bio->bi_disk->queue->limits.max_write_same_sectors)
Christoph Hellwigac62d622017-04-05 19:21:05 +0200933 disable_write_same(md);
934 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
Christoph Hellwig74d46992017-08-23 19:10:32 +0200935 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
Christoph Hellwigac62d622017-04-05 19:21:05 +0200936 disable_write_zeroes(md);
937 }
Mike Snitzer7eee4ae2014-06-02 15:50:06 -0400938
Christoph Hellwig1be56902017-06-03 09:38:03 +0200939 if (endio) {
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200940 int r = endio(tio->ti, bio, &error);
Christoph Hellwig1be56902017-06-03 09:38:03 +0200941 switch (r) {
942 case DM_ENDIO_REQUEUE:
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +0200943 error = BLK_STS_DM_REQUEUE;
Christoph Hellwig1be56902017-06-03 09:38:03 +0200944 /*FALLTHRU*/
945 case DM_ENDIO_DONE:
946 break;
947 case DM_ENDIO_INCOMPLETE:
948 /* The target will handle the io */
949 return;
950 default:
951 DMWARN("unimplemented target endio return value: %d", r);
952 BUG();
953 }
954 }
955
Mike Snitzercfae7522016-04-11 12:05:38 -0400956 free_tio(tio);
Milan Brozb35f8ca2009-03-16 17:44:36 +0000957 dec_pending(io, error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958}
959
Mike Snitzer78d8e582015-06-26 10:01:13 -0400960/*
Mike Snitzer56a67df2010-08-12 04:14:10 +0100961 * Return maximum size of I/O possible at the supplied sector up to the current
962 * target boundary.
963 */
964static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965{
Mike Snitzer56a67df2010-08-12 04:14:10 +0100966 sector_t target_offset = dm_target_offset(ti, sector);
967
968 return ti->len - target_offset;
969}
970
971static sector_t max_io_len(sector_t sector, struct dm_target *ti)
972{
973 sector_t len = max_io_len_target_boundary(sector, ti);
Mike Snitzer542f9032012-07-27 15:08:00 +0100974 sector_t offset, max_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975
976 /*
Mike Snitzer542f9032012-07-27 15:08:00 +0100977 * Does the target need to split even further?
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 */
Mike Snitzer542f9032012-07-27 15:08:00 +0100979 if (ti->max_io_len) {
980 offset = dm_target_offset(ti, sector);
981 if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
982 max_len = sector_div(offset, ti->max_io_len);
983 else
984 max_len = offset & (ti->max_io_len - 1);
985 max_len = ti->max_io_len - max_len;
986
987 if (len > max_len)
988 len = max_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 }
990
991 return len;
992}
993
Mike Snitzer542f9032012-07-27 15:08:00 +0100994int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
995{
996 if (len > UINT_MAX) {
997 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
998 (unsigned long long)len, UINT_MAX);
999 ti->error = "Maximum size of target IO is too large";
1000 return -EINVAL;
1001 }
1002
1003 ti->max_io_len = (uint32_t) len;
1004
1005 return 0;
1006}
1007EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1008
Dan Williamsf26c5712017-04-12 12:35:44 -07001009static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
1010 sector_t sector, int *srcu_idx)
Toshi Kani545ed202016-06-22 17:54:53 -06001011{
Toshi Kani545ed202016-06-22 17:54:53 -06001012 struct dm_table *map;
1013 struct dm_target *ti;
Toshi Kani545ed202016-06-22 17:54:53 -06001014
Dan Williamsf26c5712017-04-12 12:35:44 -07001015 map = dm_get_live_table(md, srcu_idx);
Toshi Kani545ed202016-06-22 17:54:53 -06001016 if (!map)
Dan Williamsf26c5712017-04-12 12:35:44 -07001017 return NULL;
Toshi Kani545ed202016-06-22 17:54:53 -06001018
1019 ti = dm_table_find_target(map, sector);
1020 if (!dm_target_is_valid(ti))
Dan Williamsf26c5712017-04-12 12:35:44 -07001021 return NULL;
1022
1023 return ti;
1024}
1025
1026static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
1027 long nr_pages, void **kaddr, pfn_t *pfn)
1028{
1029 struct mapped_device *md = dax_get_private(dax_dev);
1030 sector_t sector = pgoff * PAGE_SECTORS;
1031 struct dm_target *ti;
1032 long len, ret = -EIO;
1033 int srcu_idx;
1034
1035 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1036
1037 if (!ti)
Toshi Kani545ed202016-06-22 17:54:53 -06001038 goto out;
Dan Williamsf26c5712017-04-12 12:35:44 -07001039 if (!ti->type->direct_access)
1040 goto out;
1041 len = max_io_len(sector, ti) / PAGE_SECTORS;
1042 if (len < 1)
1043 goto out;
1044 nr_pages = min(len, nr_pages);
Toshi Kani545ed202016-06-22 17:54:53 -06001045 if (ti->type->direct_access)
Dan Williams817bf402017-04-12 13:37:44 -07001046 ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
1047
Dan Williamsf26c5712017-04-12 12:35:44 -07001048 out:
Toshi Kani545ed202016-06-22 17:54:53 -06001049 dm_put_live_table(md, srcu_idx);
Dan Williamsf26c5712017-04-12 12:35:44 -07001050
1051 return ret;
Toshi Kani545ed202016-06-22 17:54:53 -06001052}
1053
Dan Williams7e026c82017-05-29 12:57:56 -07001054static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
1055 void *addr, size_t bytes, struct iov_iter *i)
1056{
1057 struct mapped_device *md = dax_get_private(dax_dev);
1058 sector_t sector = pgoff * PAGE_SECTORS;
1059 struct dm_target *ti;
1060 long ret = 0;
1061 int srcu_idx;
1062
1063 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1064
1065 if (!ti)
1066 goto out;
1067 if (!ti->type->dax_copy_from_iter) {
1068 ret = copy_from_iter(addr, bytes, i);
1069 goto out;
1070 }
1071 ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
1072 out:
1073 dm_put_live_table(md, srcu_idx);
1074
1075 return ret;
1076}
1077
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001078/*
1079 * A target may call dm_accept_partial_bio only from the map routine. It is
NeilBrownc06b3e52017-11-21 08:44:35 -05001080 * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001081 *
1082 * dm_accept_partial_bio informs the dm that the target only wants to process
1083 * additional n_sectors sectors of the bio and the rest of the data should be
1084 * sent in a next bio.
1085 *
1086 * A diagram that explains the arithmetics:
1087 * +--------------------+---------------+-------+
1088 * | 1 | 2 | 3 |
1089 * +--------------------+---------------+-------+
1090 *
1091 * <-------------- *tio->len_ptr --------------->
1092 * <------- bi_size ------->
1093 * <-- n_sectors -->
1094 *
1095 * Region 1 was already iterated over with bio_advance or similar function.
1096 * (it may be empty if the target doesn't use bio_advance)
1097 * Region 2 is the remaining bio size that the target wants to process.
1098 * (it may be empty if region 1 is non-empty, although there is no reason
1099 * to make it empty)
1100 * The target requires that region 3 is to be sent in the next bio.
1101 *
1102 * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
1103 * the partially processed part (the sum of regions 1+2) must be the same for all
1104 * copies of the bio.
1105 */
1106void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1107{
1108 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1109 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
Jens Axboe1eff9d32016-08-05 15:35:16 -06001110 BUG_ON(bio->bi_opf & REQ_PREFLUSH);
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001111 BUG_ON(bi_size > *tio->len_ptr);
1112 BUG_ON(n_sectors > bi_size);
1113 *tio->len_ptr -= bi_size - n_sectors;
1114 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1115}
1116EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1117
Mikulas Patockad67a5f42017-02-15 11:26:10 -05001118/*
Damien Le Moal10999302017-05-08 16:40:48 -07001119 * The zone descriptors obtained with a zone report indicate
1120 * zone positions within the target device. The zone descriptors
1121 * must be remapped to match their position within the dm device.
1122 * A target may call dm_remap_zone_report after completion of a
1123 * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained
1124 * from the target device mapping to the dm device.
1125 */
1126void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
1127{
1128#ifdef CONFIG_BLK_DEV_ZONED
1129 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
Mike Snitzer745dc572017-12-11 20:51:50 -05001130 struct bio *report_bio = tio->io->orig_bio;
Damien Le Moal10999302017-05-08 16:40:48 -07001131 struct blk_zone_report_hdr *hdr = NULL;
1132 struct blk_zone *zone;
1133 unsigned int nr_rep = 0;
1134 unsigned int ofst;
1135 struct bio_vec bvec;
1136 struct bvec_iter iter;
1137 void *addr;
1138
1139 if (bio->bi_status)
1140 return;
1141
1142 /*
1143 * Remap the start sector of the reported zones. For sequential zones,
1144 * also remap the write pointer position.
1145 */
1146 bio_for_each_segment(bvec, report_bio, iter) {
1147 addr = kmap_atomic(bvec.bv_page);
1148
1149 /* Remember the report header in the first page */
1150 if (!hdr) {
1151 hdr = addr;
1152 ofst = sizeof(struct blk_zone_report_hdr);
1153 } else
1154 ofst = 0;
1155
1156 /* Set zones start sector */
1157 while (hdr->nr_zones && ofst < bvec.bv_len) {
1158 zone = addr + ofst;
1159 if (zone->start >= start + ti->len) {
1160 hdr->nr_zones = 0;
1161 break;
1162 }
1163 zone->start = zone->start + ti->begin - start;
1164 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
1165 if (zone->cond == BLK_ZONE_COND_FULL)
1166 zone->wp = zone->start + zone->len;
1167 else if (zone->cond == BLK_ZONE_COND_EMPTY)
1168 zone->wp = zone->start;
1169 else
1170 zone->wp = zone->wp + ti->begin - start;
1171 }
1172 ofst += sizeof(struct blk_zone);
1173 hdr->nr_zones--;
1174 nr_rep++;
1175 }
1176
1177 if (addr != hdr)
1178 kunmap_atomic(addr);
1179
1180 if (!hdr->nr_zones)
1181 break;
1182 }
1183
1184 if (hdr) {
1185 hdr->nr_zones = nr_rep;
1186 kunmap_atomic(hdr);
1187 }
1188
1189 bio_advance(report_bio, report_bio->bi_iter.bi_size);
1190
1191#else /* !CONFIG_BLK_DEV_ZONED */
1192 bio->bi_status = BLK_STS_NOTSUPP;
1193#endif
1194}
1195EXPORT_SYMBOL_GPL(dm_remap_zone_report);
1196
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001197static void __map_bio(struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198{
1199 int r;
Jens Axboe2056a782006-03-23 20:00:26 +01001200 sector_t sector;
Mikulas Patockadba14162012-10-12 21:02:15 +01001201 struct bio *clone = &tio->clone;
Mike Snitzer64f52b02017-12-11 23:17:47 -05001202 struct dm_io *io = tio->io;
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001203 struct dm_target *ti = tio->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 clone->bi_end_io = clone_endio;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
1207 /*
1208 * Map the clone. If r == 0 we don't need to do
1209 * anything, the target has assumed ownership of
1210 * this io.
1211 */
Mike Snitzer64f52b02017-12-11 23:17:47 -05001212 atomic_inc(&io->io_count);
Kent Overstreet4f024f32013-10-11 15:44:27 -07001213 sector = clone->bi_iter.bi_sector;
Mikulas Patockad67a5f42017-02-15 11:26:10 -05001214
Mikulas Patocka7de3ee52012-12-21 20:23:41 +00001215 r = ti->type->map(ti, clone);
Christoph Hellwig846785e2017-06-03 09:38:02 +02001216 switch (r) {
1217 case DM_MAPIO_SUBMITTED:
1218 break;
1219 case DM_MAPIO_REMAPPED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 /* the bio has been remapped so dispatch it */
Christoph Hellwig74d46992017-08-23 19:10:32 +02001221 trace_block_bio_remap(clone->bi_disk->queue, clone,
Mike Snitzer64f52b02017-12-11 23:17:47 -05001222 bio_dev(io->orig_bio), sector);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 generic_make_request(clone);
Christoph Hellwig846785e2017-06-03 09:38:02 +02001224 break;
1225 case DM_MAPIO_KILL:
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02001226 free_tio(tio);
Mike Snitzer64f52b02017-12-11 23:17:47 -05001227 dec_pending(io, BLK_STS_IOERR);
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02001228 break;
Christoph Hellwig846785e2017-06-03 09:38:02 +02001229 case DM_MAPIO_REQUEUE:
Mike Snitzercfae7522016-04-11 12:05:38 -04001230 free_tio(tio);
Mike Snitzer64f52b02017-12-11 23:17:47 -05001231 dec_pending(io, BLK_STS_DM_REQUEUE);
Christoph Hellwig846785e2017-06-03 09:38:02 +02001232 break;
1233 default:
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -08001234 DMWARN("unimplemented target map return value: %d", r);
1235 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 }
1237}
1238
Mikulas Patockae0d66092014-03-14 18:40:39 -04001239static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001240{
Kent Overstreet4f024f32013-10-11 15:44:27 -07001241 bio->bi_iter.bi_sector = sector;
1242 bio->bi_iter.bi_size = to_bytes(len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243}
1244
1245/*
1246 * Creates a bio that consists of range of complete bvecs.
1247 */
Mike Snitzerc80914e2016-03-02 12:33:03 -05001248static int clone_bio(struct dm_target_io *tio, struct bio *bio,
1249 sector_t sector, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250{
Mikulas Patockadba14162012-10-12 21:02:15 +01001251 struct bio *clone = &tio->clone;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001253 __bio_clone_fast(clone, bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
Mikulas Patockae2460f22017-04-18 16:51:48 -04001255 if (unlikely(bio_integrity(bio) != NULL)) {
1256 int r;
1257
1258 if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
1259 !dm_target_passes_integrity(tio->ti->type))) {
1260 DMWARN("%s: the target %s doesn't support integrity data.",
1261 dm_device_name(tio->io->md),
1262 tio->ti->type->name);
1263 return -EIO;
1264 }
1265
1266 r = bio_integrity_clone(clone, bio, GFP_NOIO);
Mike Snitzerc80914e2016-03-02 12:33:03 -05001267 if (r < 0)
1268 return r;
1269 }
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001270
Damien Le Moal264c8692017-05-08 16:40:47 -07001271 if (bio_op(bio) != REQ_OP_ZONE_REPORT)
1272 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001273 clone->bi_iter.bi_size = to_bytes(len);
1274
Mikulas Patockae2460f22017-04-18 16:51:48 -04001275 if (unlikely(bio_integrity(bio) != NULL))
Dmitry Monakhovfbd08e72017-06-29 11:31:10 -07001276 bio_integrity_trim(clone);
Mike Snitzerc80914e2016-03-02 12:33:03 -05001277
1278 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279}
1280
Mike Snitzer318716d2017-11-22 14:56:12 -05001281static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
1282 struct dm_target *ti, unsigned num_bios)
Alasdair G Kergon9015df22009-06-22 10:12:21 +01001283{
Mike Snitzer318716d2017-11-22 14:56:12 -05001284 struct dm_target_io *tio;
1285 int try;
1286
1287 if (!num_bios)
1288 return;
1289
1290 if (num_bios == 1) {
1291 tio = alloc_tio(ci, ti, 0, GFP_NOIO);
1292 bio_list_add(blist, &tio->clone);
1293 return;
1294 }
1295
1296 for (try = 0; try < 2; try++) {
1297 int bio_nr;
1298 struct bio *bio;
1299
1300 if (try)
1301 mutex_lock(&ci->md->table_devices_lock);
1302 for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
1303 tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
1304 if (!tio)
1305 break;
1306
1307 bio_list_add(blist, &tio->clone);
1308 }
1309 if (try)
1310 mutex_unlock(&ci->md->table_devices_lock);
1311 if (bio_nr == num_bios)
1312 return;
1313
1314 while ((bio = bio_list_pop(blist))) {
1315 tio = container_of(bio, struct dm_target_io, clone);
1316 free_tio(tio);
1317 }
1318 }
1319}
1320
1321static void __clone_and_map_simple_bio(struct clone_info *ci,
1322 struct dm_target_io *tio, unsigned *len)
1323{
Mikulas Patockadba14162012-10-12 21:02:15 +01001324 struct bio *clone = &tio->clone;
Alasdair G Kergon9015df22009-06-22 10:12:21 +01001325
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001326 tio->len_ptr = len;
1327
Junichi Nomura99778272014-10-03 11:55:16 +00001328 __bio_clone_fast(clone, ci->bio);
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001329 if (len)
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001330 bio_setup_sector(clone, ci->sector, *len);
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001331
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001332 __map_bio(tio);
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001333}
1334
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001335static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001336 unsigned num_bios, unsigned *len)
Mike Snitzer06a426c2010-08-12 04:14:09 +01001337{
Mike Snitzer318716d2017-11-22 14:56:12 -05001338 struct bio_list blist = BIO_EMPTY_LIST;
1339 struct bio *bio;
1340 struct dm_target_io *tio;
Mike Snitzer06a426c2010-08-12 04:14:09 +01001341
Mike Snitzer318716d2017-11-22 14:56:12 -05001342 alloc_multiple_bios(&blist, ci, ti, num_bios);
1343
1344 while ((bio = bio_list_pop(&blist))) {
1345 tio = container_of(bio, struct dm_target_io, clone);
1346 __clone_and_map_simple_bio(ci, tio, len);
1347 }
Mike Snitzer06a426c2010-08-12 04:14:09 +01001348}
1349
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001350static int __send_empty_flush(struct clone_info *ci)
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001351{
Mike Snitzer06a426c2010-08-12 04:14:09 +01001352 unsigned target_nr = 0;
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001353 struct dm_target *ti;
1354
Mike Snitzerb372d362010-09-08 18:07:01 +02001355 BUG_ON(bio_has_data(ci->bio));
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001356 while ((ti = dm_table_get_target(ci->map, target_nr++)))
Mikulas Patocka1dd40c32014-03-14 18:41:24 -04001357 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001358
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +01001359 return 0;
1360}
1361
Mike Snitzerc80914e2016-03-02 12:33:03 -05001362static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
NeilBrownf31c21e2017-11-22 14:25:18 +11001363 sector_t sector, unsigned *len)
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001364{
Mikulas Patockadba14162012-10-12 21:02:15 +01001365 struct bio *bio = ci->bio;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001366 struct dm_target_io *tio;
NeilBrownf31c21e2017-11-22 14:25:18 +11001367 int r;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001368
Mike Snitzer318716d2017-11-22 14:56:12 -05001369 tio = alloc_tio(ci, ti, 0, GFP_NOIO);
NeilBrownf31c21e2017-11-22 14:25:18 +11001370 tio->len_ptr = len;
1371 r = clone_bio(tio, bio, sector, *len);
1372 if (r < 0) {
1373 free_tio(tio);
1374 return r;
Alasdair G Kergonb0d8ed42013-03-01 22:45:49 +00001375 }
NeilBrownf31c21e2017-11-22 14:25:18 +11001376 __map_bio(tio);
Mike Snitzerc80914e2016-03-02 12:33:03 -05001377
NeilBrownf31c21e2017-11-22 14:25:18 +11001378 return 0;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001379}
1380
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001381typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
Mike Snitzer23508a92012-12-21 20:23:37 +00001382
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001383static unsigned get_num_discard_bios(struct dm_target *ti)
Mike Snitzer23508a92012-12-21 20:23:37 +00001384{
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001385 return ti->num_discard_bios;
Mike Snitzer23508a92012-12-21 20:23:37 +00001386}
1387
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001388static unsigned get_num_write_same_bios(struct dm_target *ti)
Mike Snitzer23508a92012-12-21 20:23:37 +00001389{
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001390 return ti->num_write_same_bios;
Mike Snitzer23508a92012-12-21 20:23:37 +00001391}
1392
Christoph Hellwigac62d622017-04-05 19:21:05 +02001393static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
1394{
1395 return ti->num_write_zeroes_bios;
1396}
1397
Mike Snitzer23508a92012-12-21 20:23:37 +00001398typedef bool (*is_split_required_fn)(struct dm_target *ti);
1399
1400static bool is_split_required_for_discard(struct dm_target *ti)
1401{
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001402 return ti->split_discard_bios;
Mike Snitzer23508a92012-12-21 20:23:37 +00001403}
1404
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001405static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001406 get_num_bios_fn get_num_bios,
1407 is_split_required_fn is_split_required)
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001408{
Mikulas Patockae0d66092014-03-14 18:40:39 -04001409 unsigned len;
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001410 unsigned num_bios;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001411
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001412 /*
1413 * Even though the device advertised support for this type of
1414 * request, that does not mean every target supports it, and
1415 * reconfiguration might also have changed that since the
1416 * check was performed.
1417 */
1418 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1419 if (!num_bios)
1420 return -EOPNOTSUPP;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001421
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001422 if (is_split_required && !is_split_required(ti))
1423 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1424 else
1425 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001426
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001427 __send_duplicate_bios(ci, ti, num_bios, &len);
Mike Snitzer06a426c2010-08-12 04:14:09 +01001428
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001429 ci->sector += len;
1430 ci->sector_count -= len;
Mike Snitzer5ae89a82010-08-12 04:14:08 +01001431
1432 return 0;
1433}
1434
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001435static int __send_discard(struct clone_info *ci, struct dm_target *ti)
Mike Snitzer23508a92012-12-21 20:23:37 +00001436{
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001437 return __send_changing_extent_only(ci, ti, get_num_discard_bios,
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001438 is_split_required_for_discard);
Mike Snitzer23508a92012-12-21 20:23:37 +00001439}
1440
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001441static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
Mike Snitzer23508a92012-12-21 20:23:37 +00001442{
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001443 return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL);
Mike Snitzer23508a92012-12-21 20:23:37 +00001444}
1445
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001446static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
Christoph Hellwigac62d622017-04-05 19:21:05 +02001447{
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001448 return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL);
Christoph Hellwigac62d622017-04-05 19:21:05 +02001449}
1450
Alasdair G Kergone4c93812013-03-01 22:45:47 +00001451/*
Alasdair G Kergone4c93812013-03-01 22:45:47 +00001452 * Select the correct strategy for processing a non-flush bio.
1453 */
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001454static int __split_and_process_non_flush(struct clone_info *ci)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455{
Mikulas Patockadba14162012-10-12 21:02:15 +01001456 struct bio *bio = ci->bio;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +00001457 struct dm_target *ti;
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001458 unsigned len;
Mike Snitzerc80914e2016-03-02 12:33:03 -05001459 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460
Jun'ichi Nomura512875b2007-12-13 14:15:25 +00001461 ti = dm_table_find_target(ci->map, ci->sector);
1462 if (!dm_target_is_valid(ti))
1463 return -EIO;
1464
Mike Snitzer3d7f4562017-12-08 15:02:11 -05001465 if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
1466 return __send_discard(ci, ti);
1467 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
1468 return __send_write_same(ci, ti);
1469 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES))
1470 return __send_write_zeroes(ci, ti);
1471
Damien Le Moal264c8692017-05-08 16:40:47 -07001472 if (bio_op(bio) == REQ_OP_ZONE_REPORT)
1473 len = ci->sector_count;
1474 else
1475 len = min_t(sector_t, max_io_len(ci->sector, ti),
1476 ci->sector_count);
Jun'ichi Nomura512875b2007-12-13 14:15:25 +00001477
Mike Snitzerc80914e2016-03-02 12:33:03 -05001478 r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
1479 if (r < 0)
1480 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001482 ci->sector += len;
1483 ci->sector_count -= len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484
Kent Overstreet1c3b13e2013-10-29 17:17:49 -07001485 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486}
1487
1488/*
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001489 * Entry point to split a bio into clones and submit them to the targets.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 */
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001491static void __split_and_process_bio(struct mapped_device *md,
1492 struct dm_table *map, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493{
1494 struct clone_info ci;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +00001495 int error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001497 if (unlikely(!map)) {
Tejun Heo6a8736d2010-09-08 18:07:00 +02001498 bio_io_error(bio);
Mikulas Patockaf0b9a452009-04-02 19:55:38 +01001499 return;
1500 }
Mikulas Patocka692d0eb2009-04-09 00:27:13 +01001501
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001502 ci.map = map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 ci.md = md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 ci.io = alloc_io(md);
Christoph Hellwig4e4cbee2017-06-03 09:38:06 +02001505 ci.io->status = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 atomic_set(&ci.io->io_count, 1);
Mike Snitzer745dc572017-12-11 20:51:50 -05001507 ci.io->orig_bio = bio;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 ci.io->md = md;
Kiyoshi Uedaf88fb982009-10-16 23:18:15 +01001509 spin_lock_init(&ci.io->endio_lock);
Kent Overstreet4f024f32013-10-11 15:44:27 -07001510 ci.sector = bio->bi_iter.bi_sector;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -08001512 start_io_acct(ci.io);
Alasdair G Kergonbd2a49b2013-03-01 22:45:46 +00001513
Jens Axboe1eff9d32016-08-05 15:35:16 -06001514 if (bio->bi_opf & REQ_PREFLUSH) {
Mike Snitzerb372d362010-09-08 18:07:01 +02001515 ci.bio = &ci.md->flush_bio;
1516 ci.sector_count = 0;
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001517 error = __send_empty_flush(&ci);
Mike Snitzerb372d362010-09-08 18:07:01 +02001518 /* dec_pending submits any data associated with flush */
Damien Le Moala4aa5e52017-05-08 16:40:46 -07001519 } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
1520 ci.bio = bio;
1521 ci.sector_count = 0;
1522 error = __split_and_process_non_flush(&ci);
Mike Snitzerb372d362010-09-08 18:07:01 +02001523 } else {
Tejun Heo6a8736d2010-09-08 18:07:00 +02001524 ci.bio = bio;
Tejun Heod87f4c12010-09-03 11:56:19 +02001525 ci.sector_count = bio_sectors(bio);
NeilBrown18a25da2017-09-06 09:43:28 +10001526 while (ci.sector_count && !error) {
Alasdair G Kergon14fe5942013-03-01 22:45:47 +00001527 error = __split_and_process_non_flush(&ci);
NeilBrown18a25da2017-09-06 09:43:28 +10001528 if (current->bio_list && ci.sector_count && !error) {
1529 /*
1530 * Remainder must be passed to generic_make_request()
1531 * so that it gets handled *after* bios already submitted
1532 * have been completely processed.
1533 * We take a clone of the original to store in
Mike Snitzer745dc572017-12-11 20:51:50 -05001534 * ci.io->orig_bio to be used by end_io_acct() and
NeilBrown18a25da2017-09-06 09:43:28 +10001535 * for dec_pending to use for completion handling.
1536 * As this path is not used for REQ_OP_ZONE_REPORT,
Mike Snitzer745dc572017-12-11 20:51:50 -05001537 * the usage of io->orig_bio in dm_remap_zone_report()
NeilBrown18a25da2017-09-06 09:43:28 +10001538 * won't be affected by this reassignment.
1539 */
1540 struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
1541 md->queue->bio_split);
Mike Snitzer745dc572017-12-11 20:51:50 -05001542 ci.io->orig_bio = b;
NeilBrown18a25da2017-09-06 09:43:28 +10001543 bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
1544 bio_chain(b, bio);
1545 generic_make_request(bio);
1546 break;
1547 }
1548 }
Tejun Heod87f4c12010-09-03 11:56:19 +02001549 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550
1551 /* drop the extra reference count */
Bart Van Assche54385bf2017-08-09 11:32:10 -07001552 dec_pending(ci.io, errno_to_blk_status(error));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554
1555/*
NeilBrown18a25da2017-09-06 09:43:28 +10001556 * The request function that remaps the bio to one target and
1557 * splits off any remainder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 */
Jens Axboedece1632015-11-05 10:41:16 -07001559static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560{
Kevin Corry12f03a42006-02-01 03:04:52 -08001561 int rw = bio_data_dir(bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 struct mapped_device *md = q->queuedata;
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001563 int srcu_idx;
1564 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001566 map = dm_get_live_table(md, &srcu_idx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567
Jens Axboed62e26b2017-06-30 21:55:08 -06001568 generic_start_io_acct(q, rw, bio_sectors(bio), &dm_disk(md)->part0);
Kevin Corry12f03a42006-02-01 03:04:52 -08001569
Tejun Heo6a8736d2010-09-08 18:07:00 +02001570 /* if we're suspended, we have to queue this io for later */
1571 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001572 dm_put_live_table(md, srcu_idx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
Jens Axboe1eff9d32016-08-05 15:35:16 -06001574 if (!(bio->bi_opf & REQ_RAHEAD))
Tejun Heo6a8736d2010-09-08 18:07:00 +02001575 queue_io(md, bio);
1576 else
Alasdair G Kergon54d9a1b2009-04-09 00:27:14 +01001577 bio_io_error(bio);
Jens Axboedece1632015-11-05 10:41:16 -07001578 return BLK_QC_T_NONE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 }
1580
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001581 __split_and_process_bio(md, map, bio);
1582 dm_put_live_table(md, srcu_idx);
Jens Axboedece1632015-11-05 10:41:16 -07001583 return BLK_QC_T_NONE;
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01001584}
1585
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586static int dm_any_congested(void *congested_data, int bdi_bits)
1587{
Chandra Seetharaman8a57dfc2008-11-13 23:39:14 +00001588 int r = bdi_bits;
1589 struct mapped_device *md = congested_data;
1590 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01001592 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
Mike Snitzere522c032016-02-02 22:35:06 -05001593 if (dm_request_based(md)) {
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01001594 /*
Mike Snitzere522c032016-02-02 22:35:06 -05001595 * With request-based DM we only need to check the
1596 * top-level queue for congestion.
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01001597 */
Jan Karadc3b17c2017-02-02 15:56:50 +01001598 r = md->queue->backing_dev_info->wb.state & bdi_bits;
Mike Snitzere522c032016-02-02 22:35:06 -05001599 } else {
1600 map = dm_get_live_table_fast(md);
1601 if (map)
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01001602 r = dm_table_any_congested(map, bdi_bits);
Mike Snitzere522c032016-02-02 22:35:06 -05001603 dm_put_live_table_fast(md);
Chandra Seetharaman8a57dfc2008-11-13 23:39:14 +00001604 }
1605 }
1606
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 return r;
1608}
1609
1610/*-----------------------------------------------------------------
1611 * An IDR is used to keep track of allocated minor numbers.
1612 *---------------------------------------------------------------*/
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001613static void free_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614{
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001615 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 idr_remove(&_minor_idr, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001617 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618}
1619
1620/*
1621 * See if the device with a specific minor # is free.
1622 */
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001623static int specific_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624{
Tejun Heoc9d76be2013-02-27 17:04:26 -08001625 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
1627 if (minor >= (1 << MINORBITS))
1628 return -EINVAL;
1629
Tejun Heoc9d76be2013-02-27 17:04:26 -08001630 idr_preload(GFP_KERNEL);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001631 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
Tejun Heoc9d76be2013-02-27 17:04:26 -08001633 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001635 spin_unlock(&_minor_lock);
Tejun Heoc9d76be2013-02-27 17:04:26 -08001636 idr_preload_end();
1637 if (r < 0)
1638 return r == -ENOSPC ? -EBUSY : r;
1639 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640}
1641
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001642static int next_free_minor(int *minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643{
Tejun Heoc9d76be2013-02-27 17:04:26 -08001644 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
Tejun Heoc9d76be2013-02-27 17:04:26 -08001646 idr_preload(GFP_KERNEL);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001647 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648
Tejun Heoc9d76be2013-02-27 17:04:26 -08001649 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001651 spin_unlock(&_minor_lock);
Tejun Heoc9d76be2013-02-27 17:04:26 -08001652 idr_preload_end();
1653 if (r < 0)
1654 return r;
1655 *minor = r;
1656 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657}
1658
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07001659static const struct block_device_operations dm_blk_dops;
Dan Williamsf26c5712017-04-12 12:35:44 -07001660static const struct dax_operations dm_dax_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661
Mikulas Patocka53d59142009-04-02 19:55:37 +01001662static void dm_wq_work(struct work_struct *work);
1663
Mike Snitzer4cc96132016-05-12 16:28:10 -04001664void dm_init_md_queue(struct mapped_device *md)
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01001665{
1666 /*
Mikulas Patockaad5f4982015-10-27 19:06:55 -04001667 * Initialize data that will only be used by a non-blk-mq DM queue
1668 * - must do so here (in alloc_dev callchain) before queue is used
1669 */
1670 md->queue->queuedata = md;
Jan Karadc3b17c2017-02-02 15:56:50 +01001671 md->queue->backing_dev_info->congested_data = md;
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05001672}
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01001673
Mike Snitzer4cc96132016-05-12 16:28:10 -04001674void dm_init_normal_md_queue(struct mapped_device *md)
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05001675{
Mike Snitzer17e149b2015-03-11 15:01:09 -04001676 md->use_blk_mq = false;
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05001677 dm_init_md_queue(md);
1678
1679 /*
1680 * Initialize aspects of queue that aren't relevant for blk-mq
1681 */
Jan Karadc3b17c2017-02-02 15:56:50 +01001682 md->queue->backing_dev_info->congested_fn = dm_any_congested;
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01001683}
1684
Mike Snitzer0f209722015-04-28 11:50:29 -04001685static void cleanup_mapped_device(struct mapped_device *md)
1686{
Mike Snitzer0f209722015-04-28 11:50:29 -04001687 if (md->wq)
1688 destroy_workqueue(md->wq);
1689 if (md->kworker_task)
1690 kthread_stop(md->kworker_task);
Mike Snitzer0f209722015-04-28 11:50:29 -04001691 if (md->bs)
1692 bioset_free(md->bs);
Mike Snitzer64f52b02017-12-11 23:17:47 -05001693 if (md->io_bs)
1694 bioset_free(md->io_bs);
Mike Snitzer0f209722015-04-28 11:50:29 -04001695
Dan Williamsf26c5712017-04-12 12:35:44 -07001696 if (md->dax_dev) {
1697 kill_dax(md->dax_dev);
1698 put_dax(md->dax_dev);
1699 md->dax_dev = NULL;
1700 }
1701
Mike Snitzer0f209722015-04-28 11:50:29 -04001702 if (md->disk) {
1703 spin_lock(&_minor_lock);
1704 md->disk->private_data = NULL;
1705 spin_unlock(&_minor_lock);
Mike Snitzer0f209722015-04-28 11:50:29 -04001706 del_gendisk(md->disk);
1707 put_disk(md->disk);
1708 }
1709
1710 if (md->queue)
1711 blk_cleanup_queue(md->queue);
1712
Tahsin Erdogand09960b2016-10-10 05:35:19 -07001713 cleanup_srcu_struct(&md->io_barrier);
1714
Mike Snitzer0f209722015-04-28 11:50:29 -04001715 if (md->bdev) {
1716 bdput(md->bdev);
1717 md->bdev = NULL;
1718 }
Mike Snitzer4cc96132016-05-12 16:28:10 -04001719
1720 dm_mq_cleanup_mapped_device(md);
Mike Snitzer0f209722015-04-28 11:50:29 -04001721}
1722
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723/*
1724 * Allocate and initialise a blank device with a given minor.
1725 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001726static struct mapped_device *alloc_dev(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727{
Mike Snitzer115485e2016-02-22 12:16:21 -05001728 int r, numa_node_id = dm_get_numa_node();
Dan Williamsf26c5712017-04-12 12:35:44 -07001729 struct dax_device *dax_dev;
Mike Snitzer115485e2016-02-22 12:16:21 -05001730 struct mapped_device *md;
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001731 void *old_md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732
Mikulas Patocka856eb092017-10-31 19:33:02 -04001733 md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 if (!md) {
1735 DMWARN("unable to allocate device, out of memory.");
1736 return NULL;
1737 }
1738
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001739 if (!try_module_get(THIS_MODULE))
Milan Broz6ed7ade2008-02-08 02:10:19 +00001740 goto bad_module_get;
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001741
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 /* get a minor number for the dev */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001743 if (minor == DM_ANY_MINOR)
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001744 r = next_free_minor(&minor);
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001745 else
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001746 r = specific_minor(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 if (r < 0)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001748 goto bad_minor;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001750 r = init_srcu_struct(&md->io_barrier);
1751 if (r < 0)
1752 goto bad_io_barrier;
1753
Mike Snitzer115485e2016-02-22 12:16:21 -05001754 md->numa_node_id = numa_node_id;
Mike Snitzer4cc96132016-05-12 16:28:10 -04001755 md->use_blk_mq = dm_use_blk_mq_default();
Mike Snitzer591ddcf2016-01-31 12:05:42 -05001756 md->init_tio_pdu = false;
Mike Snitzera5664da2010-08-12 04:14:01 +01001757 md->type = DM_TYPE_NONE;
Daniel Walkere61290a2008-02-08 02:10:08 +00001758 mutex_init(&md->suspend_lock);
Mike Snitzera5664da2010-08-12 04:14:01 +01001759 mutex_init(&md->type_lock);
Benjamin Marzinski86f11522014-08-13 13:53:43 -05001760 mutex_init(&md->table_devices_lock);
Mikulas Patocka022c2612009-04-02 19:55:39 +01001761 spin_lock_init(&md->deferred_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 atomic_set(&md->holders, 1);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -07001763 atomic_set(&md->open_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 atomic_set(&md->event_nr, 0);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001765 atomic_set(&md->uevent_seq, 0);
1766 INIT_LIST_HEAD(&md->uevent_list);
Benjamin Marzinski86f11522014-08-13 13:53:43 -05001767 INIT_LIST_HEAD(&md->table_devices);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001768 spin_lock_init(&md->uevent_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
Mike Snitzer115485e2016-02-22 12:16:21 -05001770 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 if (!md->queue)
Mike Snitzer0f209722015-04-28 11:50:29 -04001772 goto bad;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01001774 dm_init_md_queue(md);
Stefan Bader9faf4002006-10-03 01:15:41 -07001775
Mike Snitzer115485e2016-02-22 12:16:21 -05001776 md->disk = alloc_disk_node(1, numa_node_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 if (!md->disk)
Mike Snitzer0f209722015-04-28 11:50:29 -04001778 goto bad;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
Nikanth Karthikesan316d3152009-10-06 20:16:55 +02001780 atomic_set(&md->pending[0], 0);
1781 atomic_set(&md->pending[1], 0);
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001782 init_waitqueue_head(&md->wait);
Mikulas Patocka53d59142009-04-02 19:55:37 +01001783 INIT_WORK(&md->work, dm_wq_work);
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001784 init_waitqueue_head(&md->eventq);
Mikulas Patocka2995fa72014-01-13 19:37:54 -05001785 init_completion(&md->kobj_holder.completion);
Keith Busch2eb6e1e2014-10-17 17:46:36 -06001786 md->kworker_task = NULL;
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001787
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 md->disk->major = _major;
1789 md->disk->first_minor = minor;
1790 md->disk->fops = &dm_blk_dops;
1791 md->disk->queue = md->queue;
1792 md->disk->private_data = md;
1793 sprintf(md->disk->disk_name, "dm-%d", minor);
Dan Williamsf26c5712017-04-12 12:35:44 -07001794
1795 dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
1796 if (!dax_dev)
1797 goto bad;
1798 md->dax_dev = dax_dev;
1799
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 add_disk(md->disk);
Mike Anderson7e51f252006-03-27 01:17:52 -08001801 format_dev_t(md->name, MKDEV(_major, minor));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Tejun Heo670368a2013-07-30 08:40:21 -04001803 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
Milan Broz304f3f62008-02-08 02:11:17 +00001804 if (!md->wq)
Mike Snitzer0f209722015-04-28 11:50:29 -04001805 goto bad;
Milan Broz304f3f62008-02-08 02:11:17 +00001806
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001807 md->bdev = bdget_disk(md->disk, 0);
1808 if (!md->bdev)
Mike Snitzer0f209722015-04-28 11:50:29 -04001809 goto bad;
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001810
Ming Lei3a83f462016-11-22 08:57:21 -07001811 bio_init(&md->flush_bio, NULL, 0);
Christoph Hellwig74d46992017-08-23 19:10:32 +02001812 bio_set_dev(&md->flush_bio, md->bdev);
Jan Karaff0361b2017-05-31 09:44:32 +02001813 md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
Tejun Heo6a8736d2010-09-08 18:07:00 +02001814
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04001815 dm_stats_init(&md->stats);
1816
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001817 /* Populate the mapping, nobody knows we exist yet */
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001818 spin_lock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001819 old_md = idr_replace(&_minor_idr, md, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001820 spin_unlock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001821
1822 BUG_ON(old_md != MINOR_ALLOCED);
1823
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 return md;
1825
Mike Snitzer0f209722015-04-28 11:50:29 -04001826bad:
1827 cleanup_mapped_device(md);
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001828bad_io_barrier:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 free_minor(minor);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001830bad_minor:
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001831 module_put(THIS_MODULE);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001832bad_module_get:
Mikulas Patocka856eb092017-10-31 19:33:02 -04001833 kvfree(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 return NULL;
1835}
1836
Jun'ichi Nomuraae9da832007-10-19 22:38:43 +01001837static void unlock_fs(struct mapped_device *md);
1838
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839static void free_dev(struct mapped_device *md)
1840{
Tejun Heof331c022008-09-03 09:01:48 +02001841 int minor = MINOR(disk_devt(md->disk));
Jun'ichi Nomura63d94e42006-02-24 13:04:25 -08001842
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001843 unlock_fs(md);
Keith Busch2eb6e1e2014-10-17 17:46:36 -06001844
Mike Snitzer0f209722015-04-28 11:50:29 -04001845 cleanup_mapped_device(md);
Mike Snitzer0f209722015-04-28 11:50:29 -04001846
1847 free_table_devices(&md->table_devices);
1848 dm_stats_cleanup(&md->stats);
Mike Snitzer63a4f062015-03-23 17:01:43 -04001849 free_minor(minor);
1850
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001851 module_put(THIS_MODULE);
Mikulas Patocka856eb092017-10-31 19:33:02 -04001852 kvfree(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853}
1854
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001855static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1856{
Mikulas Patockac0820cf2012-12-21 20:23:38 +00001857 struct dm_md_mempools *p = dm_table_get_md_mempools(t);
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001858
Mike Snitzer0776aa02017-12-08 14:40:52 -05001859 if (dm_table_bio_based(t)) {
Mike Snitzer64f52b02017-12-11 23:17:47 -05001860 /*
1861 * The md may already have mempools that need changing.
1862 * If so, reload bioset because front_pad may have changed
1863 * because a different table was loaded.
1864 */
Mike Snitzer0776aa02017-12-08 14:40:52 -05001865 if (md->bs) {
Jun'ichi Nomura16245bd2013-03-01 22:45:44 +00001866 bioset_free(md->bs);
Mike Snitzer0776aa02017-12-08 14:40:52 -05001867 md->bs = NULL;
Jun'ichi Nomura16245bd2013-03-01 22:45:44 +00001868 }
Mike Snitzer64f52b02017-12-11 23:17:47 -05001869 if (md->io_bs) {
1870 bioset_free(md->io_bs);
1871 md->io_bs = NULL;
1872 }
Mike Snitzer0776aa02017-12-08 14:40:52 -05001873
1874 } else if (md->bs) {
Mike Snitzer4e6e36c2015-06-26 09:42:57 -04001875 /*
1876 * There's no need to reload with request-based dm
1877 * because the size of front_pad doesn't change.
1878 * Note for future: If you are to reload bioset,
1879 * prep-ed requests in the queue may refer
1880 * to bio from the old bioset, so you must walk
1881 * through the queue to unprep.
1882 */
1883 goto out;
Mikulas Patockac0820cf2012-12-21 20:23:38 +00001884 }
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001885
Mike Snitzerdde1e1e2017-12-11 23:28:13 -05001886 BUG_ON(!p || md->bs || md->io_bs);
Mike Snitzercbc4e3c2015-04-27 16:37:50 -04001887
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001888 md->bs = p->bs;
1889 p->bs = NULL;
Mike Snitzer64f52b02017-12-11 23:17:47 -05001890 md->io_bs = p->io_bs;
1891 p->io_bs = NULL;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001892out:
Mike Snitzer02233342015-03-10 23:49:26 -04001893 /* mempool bind completed, no longer need any mempools in the table */
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001894 dm_table_free_md_mempools(t);
1895}
1896
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897/*
1898 * Bind a table to the device.
1899 */
1900static void event_callback(void *context)
1901{
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001902 unsigned long flags;
1903 LIST_HEAD(uevents);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 struct mapped_device *md = (struct mapped_device *) context;
1905
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001906 spin_lock_irqsave(&md->uevent_lock, flags);
1907 list_splice_init(&md->uevent_list, &uevents);
1908 spin_unlock_irqrestore(&md->uevent_lock, flags);
1909
Tejun Heoed9e1982008-08-25 19:56:05 +09001910 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001911
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 atomic_inc(&md->event_nr);
1913 wake_up(&md->eventq);
Mikulas Patocka62e08242017-09-20 07:29:49 -04001914 dm_issue_global_event();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915}
1916
Mike Snitzerc2176492011-01-13 19:53:46 +00001917/*
1918 * Protected by md->suspend_lock obtained by dm_swap_table().
1919 */
Alasdair G Kergon4e90188be2005-07-28 21:15:59 -07001920static void __set_size(struct mapped_device *md, sector_t size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921{
Bart Van Assche1ea06542017-04-27 10:11:21 -07001922 lockdep_assert_held(&md->suspend_lock);
1923
Alasdair G Kergon4e90188be2005-07-28 21:15:59 -07001924 set_capacity(md->disk, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
Mikulas Patockadb8fef42009-06-22 10:12:15 +01001926 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927}
1928
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00001929/*
1930 * Returns old map, which caller must destroy.
1931 */
1932static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
1933 struct queue_limits *limits)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934{
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00001935 struct dm_table *old_map;
Jens Axboe165125e2007-07-24 09:28:11 +02001936 struct request_queue *q = md->queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 sector_t size;
1938
Bart Van Assche5a8f1f82016-08-31 15:17:04 -07001939 lockdep_assert_held(&md->suspend_lock);
1940
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 size = dm_table_get_size(t);
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001942
1943 /*
1944 * Wipe any geometry if the size of the table changed.
1945 */
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04001946 if (size != dm_get_size(md))
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001947 memset(&md->geometry, 0, sizeof(md->geometry));
1948
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001949 __set_size(md, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001951 dm_table_event_callback(t, event_callback, md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001952
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001953 /*
1954 * The queue hasn't been stopped yet, if the old table type wasn't
1955 * for request-based during suspension. So stop it to prevent
1956 * I/O mapping before resume.
1957 * This must be done before setting the queue restrictions,
1958 * because request-based dm may be run just after the setting.
1959 */
Mike Snitzer16f12262016-01-31 17:22:27 -05001960 if (dm_table_request_based(t)) {
Mike Snitzereca7ee62016-02-20 13:45:38 -05001961 dm_stop_queue(q);
Mike Snitzer16f12262016-01-31 17:22:27 -05001962 /*
1963 * Leverage the fact that request-based DM targets are
1964 * immutable singletons and establish md->immutable_target
1965 * - used to optimize both dm_request_fn and dm_mq_queue_rq
1966 */
1967 md->immutable_target = dm_table_get_immutable_target(t);
1968 }
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01001969
1970 __bind_mempools(md, t);
1971
Eric Dumazeta12f5d42014-11-23 09:34:29 -08001972 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
Mike Snitzer1d3aa6f2016-02-22 14:14:24 -05001973 rcu_assign_pointer(md->map, (void *)t);
Alasdair G Kergon36a04562011-10-31 20:19:04 +00001974 md->immutable_target_type = dm_table_get_immutable_target_type(t);
1975
Mike Snitzer754c5fc2009-06-22 10:12:34 +01001976 dm_table_set_restrictions(t, q, limits);
Hannes Reinecke41abc4e2014-11-05 14:35:50 +01001977 if (old_map)
1978 dm_sync_table(md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001979
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00001980 return old_map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981}
1982
Alasdair G Kergona7940152009-12-10 23:52:23 +00001983/*
1984 * Returns unbound table for the caller to free.
1985 */
1986static struct dm_table *__unbind(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987{
Eric Dumazeta12f5d42014-11-23 09:34:29 -08001988 struct dm_table *map = rcu_dereference_protected(md->map, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989
1990 if (!map)
Alasdair G Kergona7940152009-12-10 23:52:23 +00001991 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992
1993 dm_table_event_callback(map, NULL, NULL);
Monam Agarwal9cdb8522014-03-23 23:58:27 +05301994 RCU_INIT_POINTER(md->map, NULL);
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01001995 dm_sync_table(md);
Alasdair G Kergona7940152009-12-10 23:52:23 +00001996
1997 return map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998}
1999
2000/*
2001 * Constructor for a new device.
2002 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07002003int dm_create(int minor, struct mapped_device **result)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004{
2005 struct mapped_device *md;
2006
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07002007 md = alloc_dev(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 if (!md)
2009 return -ENXIO;
2010
Milan Broz784aae72009-01-06 03:05:12 +00002011 dm_sysfs_init(md);
2012
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 *result = md;
2014 return 0;
2015}
2016
Mike Snitzera5664da2010-08-12 04:14:01 +01002017/*
2018 * Functions to manage md->type.
2019 * All are required to hold md->type_lock.
2020 */
2021void dm_lock_md_type(struct mapped_device *md)
2022{
2023 mutex_lock(&md->type_lock);
2024}
2025
2026void dm_unlock_md_type(struct mapped_device *md)
2027{
2028 mutex_unlock(&md->type_lock);
2029}
2030
Bart Van Assche7e0d5742017-04-27 10:11:23 -07002031void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
Mike Snitzera5664da2010-08-12 04:14:01 +01002032{
Mike Snitzer00c4fc32013-08-27 18:57:03 -04002033 BUG_ON(!mutex_is_locked(&md->type_lock));
Mike Snitzera5664da2010-08-12 04:14:01 +01002034 md->type = type;
2035}
2036
Bart Van Assche7e0d5742017-04-27 10:11:23 -07002037enum dm_queue_mode dm_get_md_type(struct mapped_device *md)
Mike Snitzera5664da2010-08-12 04:14:01 +01002038{
2039 return md->type;
2040}
2041
Alasdair G Kergon36a04562011-10-31 20:19:04 +00002042struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2043{
2044 return md->immutable_target_type;
2045}
2046
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01002047/*
Mike Snitzerf84cb8a2013-09-19 12:13:58 -04002048 * The queue_limits are only valid as long as you have a reference
2049 * count on 'md'.
2050 */
2051struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2052{
2053 BUG_ON(!atomic_read(&md->holders));
2054 return &md->queue->limits;
2055}
2056EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2057
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01002058/*
2059 * Setup the DM device's queue based on md's type
2060 */
Mike Snitzer591ddcf2016-01-31 12:05:42 -05002061int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01002062{
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002063 int r;
Bart Van Assche7e0d5742017-04-27 10:11:23 -07002064 enum dm_queue_mode type = dm_get_md_type(md);
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002065
Toshi Kani545ed202016-06-22 17:54:53 -06002066 switch (type) {
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002067 case DM_TYPE_REQUEST_BASED:
Christoph Hellwigeb8db832017-01-22 18:32:46 +01002068 r = dm_old_init_request_queue(md, t);
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002069 if (r) {
Mike Snitzereca7ee62016-02-20 13:45:38 -05002070 DMERR("Cannot initialize queue for request-based mapped device");
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002071 return r;
Mike Snitzerff36ab32015-02-23 17:56:37 -05002072 }
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002073 break;
2074 case DM_TYPE_MQ_REQUEST_BASED:
Mike Snitzere83068a2016-05-24 21:16:51 -04002075 r = dm_mq_init_request_queue(md, t);
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002076 if (r) {
Mike Snitzereca7ee62016-02-20 13:45:38 -05002077 DMERR("Cannot initialize queue for request-based dm-mq mapped device");
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002078 return r;
2079 }
2080 break;
2081 case DM_TYPE_BIO_BASED:
Toshi Kani545ed202016-06-22 17:54:53 -06002082 case DM_TYPE_DAX_BIO_BASED:
Mike Snitzereca7ee62016-02-20 13:45:38 -05002083 dm_init_normal_md_queue(md);
Mike Snitzerff36ab32015-02-23 17:56:37 -05002084 blk_queue_make_request(md->queue, dm_make_request);
Mike Snitzerbfebd1c2015-03-08 00:51:47 -05002085 break;
Bart Van Assche7e0d5742017-04-27 10:11:23 -07002086 case DM_TYPE_NONE:
2087 WARN_ON_ONCE(true);
2088 break;
Mike Snitzer4a0b4dd2010-08-12 04:14:02 +01002089 }
2090
2091 return 0;
2092}
2093
Mikulas Patocka2bec1f42015-02-17 14:30:53 -05002094struct mapped_device *dm_get_md(dev_t dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095{
2096 struct mapped_device *md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 unsigned minor = MINOR(dev);
2098
2099 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2100 return NULL;
2101
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07002102 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103
2104 md = idr_find(&_minor_idr, minor);
Mike Snitzer49de5762017-11-06 16:40:10 -05002105 if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
2106 test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
2107 md = NULL;
2108 goto out;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07002109 }
Mike Snitzer49de5762017-11-06 16:40:10 -05002110 dm_get(md);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07002111out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07002112 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113
David Teigland637842c2006-01-06 00:20:00 -08002114 return md;
2115}
Alasdair G Kergon3cf2e4b2011-10-31 20:19:06 +00002116EXPORT_SYMBOL_GPL(dm_get_md);
David Teiglandd229a952006-01-06 00:20:01 -08002117
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08002118void *dm_get_mdptr(struct mapped_device *md)
David Teigland637842c2006-01-06 00:20:00 -08002119{
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08002120 return md->interface_ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121}
2122
2123void dm_set_mdptr(struct mapped_device *md, void *ptr)
2124{
2125 md->interface_ptr = ptr;
2126}
2127
2128void dm_get(struct mapped_device *md)
2129{
2130 atomic_inc(&md->holders);
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002131 BUG_ON(test_bit(DMF_FREEING, &md->flags));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132}
2133
Mikulas Patocka09ee96b2015-02-26 11:41:28 -05002134int dm_hold(struct mapped_device *md)
2135{
2136 spin_lock(&_minor_lock);
2137 if (test_bit(DMF_FREEING, &md->flags)) {
2138 spin_unlock(&_minor_lock);
2139 return -EBUSY;
2140 }
2141 dm_get(md);
2142 spin_unlock(&_minor_lock);
2143 return 0;
2144}
2145EXPORT_SYMBOL_GPL(dm_hold);
2146
Alasdair G Kergon72d94862006-06-26 00:27:35 -07002147const char *dm_device_name(struct mapped_device *md)
2148{
2149 return md->name;
2150}
2151EXPORT_SYMBOL_GPL(dm_device_name);
2152
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002153static void __dm_destroy(struct mapped_device *md, bool wait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154{
Bart Van Assche3b785fb2016-08-31 15:17:49 -07002155 struct request_queue *q = dm_get_md_queue(md);
Mike Anderson1134e5a2006-03-27 01:17:54 -08002156 struct dm_table *map;
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002157 int srcu_idx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002159 might_sleep();
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07002160
Mike Snitzer63a4f062015-03-23 17:01:43 -04002161 spin_lock(&_minor_lock);
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002162 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2163 set_bit(DMF_FREEING, &md->flags);
2164 spin_unlock(&_minor_lock);
2165
Bart Van Assche2e91c362016-11-18 14:26:47 -08002166 blk_set_queue_dying(q);
Bart Van Assche3b785fb2016-08-31 15:17:49 -07002167
Mike Snitzer02233342015-03-10 23:49:26 -04002168 if (dm_request_based(md) && md->kworker_task)
Petr Mladek39891442016-10-11 13:55:20 -07002169 kthread_flush_worker(&md->kworker);
Keith Busch2eb6e1e2014-10-17 17:46:36 -06002170
Mikulas Patockaab7c7bb2015-02-27 14:04:27 -05002171 /*
2172 * Take suspend_lock so that presuspend and postsuspend methods
2173 * do not race with internal suspend.
2174 */
2175 mutex_lock(&md->suspend_lock);
Junichi Nomura2a708cf2015-10-01 08:31:51 +00002176 map = dm_get_live_table(md, &srcu_idx);
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002177 if (!dm_suspended_md(md)) {
2178 dm_table_presuspend_targets(map);
2179 dm_table_postsuspend_targets(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 }
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002181 /* dm_put_live_table must be before msleep, otherwise deadlock is possible */
2182 dm_put_live_table(md, srcu_idx);
Junichi Nomura2a708cf2015-10-01 08:31:51 +00002183 mutex_unlock(&md->suspend_lock);
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002184
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002185 /*
2186 * Rare, but there may be I/O requests still going to complete,
2187 * for example. Wait for all references to disappear.
2188 * No one should increment the reference count of the mapped_device,
2189 * after the mapped_device state becomes DMF_FREEING.
2190 */
2191 if (wait)
2192 while (atomic_read(&md->holders))
2193 msleep(1);
2194 else if (atomic_read(&md->holders))
2195 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2196 dm_device_name(md), atomic_read(&md->holders));
2197
2198 dm_sysfs_exit(md);
Kiyoshi Ueda3f77316d2010-08-12 04:13:56 +01002199 dm_table_destroy(__unbind(md));
2200 free_dev(md);
2201}
2202
2203void dm_destroy(struct mapped_device *md)
2204{
2205 __dm_destroy(md, true);
2206}
2207
2208void dm_destroy_immediate(struct mapped_device *md)
2209{
2210 __dm_destroy(md, false);
2211}
2212
2213void dm_put(struct mapped_device *md)
2214{
2215 atomic_dec(&md->holders);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216}
Edward Goggin79eb8852007-05-09 02:32:56 -07002217EXPORT_SYMBOL_GPL(dm_put);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
Bart Van Asscheb48633f2016-08-31 15:16:02 -07002219static int dm_wait_for_completion(struct mapped_device *md, long task_state)
Milan Broz46125c12008-02-08 02:10:30 +00002220{
2221 int r = 0;
Bart Van Assche9f4c3f82016-08-31 15:16:43 -07002222 DEFINE_WAIT(wait);
Milan Broz46125c12008-02-08 02:10:30 +00002223
2224 while (1) {
Bart Van Assche9f4c3f82016-08-31 15:16:43 -07002225 prepare_to_wait(&md->wait, &wait, task_state);
Milan Broz46125c12008-02-08 02:10:30 +00002226
Kiyoshi Uedab4324fe2009-12-10 23:52:16 +00002227 if (!md_in_flight(md))
Milan Broz46125c12008-02-08 02:10:30 +00002228 break;
2229
Bart Van Asschee3fabdf2016-08-31 15:16:22 -07002230 if (signal_pending_state(task_state, current)) {
Milan Broz46125c12008-02-08 02:10:30 +00002231 r = -EINTR;
2232 break;
2233 }
2234
2235 io_schedule();
2236 }
Bart Van Assche9f4c3f82016-08-31 15:16:43 -07002237 finish_wait(&md->wait, &wait);
Mikulas Patockab44ebeb2009-04-02 19:55:39 +01002238
Milan Broz46125c12008-02-08 02:10:30 +00002239 return r;
2240}
2241
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242/*
2243 * Process the deferred bios
2244 */
Mikulas Patockaef208582009-04-02 19:55:38 +01002245static void dm_wq_work(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246{
Mikulas Patockaef208582009-04-02 19:55:38 +01002247 struct mapped_device *md = container_of(work, struct mapped_device,
2248 work);
Milan Broz6d6f10d2008-02-08 02:10:22 +00002249 struct bio *c;
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002250 int srcu_idx;
2251 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002253 map = dm_get_live_table(md, &srcu_idx);
Mikulas Patockaef208582009-04-02 19:55:38 +01002254
Mikulas Patocka3b00b202009-04-09 00:27:15 +01002255 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01002256 spin_lock_irq(&md->deferred_lock);
2257 c = bio_list_pop(&md->deferred);
2258 spin_unlock_irq(&md->deferred_lock);
Mikulas Patocka022c2612009-04-02 19:55:39 +01002259
Tejun Heo6a8736d2010-09-08 18:07:00 +02002260 if (!c)
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01002261 break;
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01002262
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002263 if (dm_request_based(md))
2264 generic_make_request(c);
Tejun Heo6a8736d2010-09-08 18:07:00 +02002265 else
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002266 __split_and_process_bio(md, map, c);
Mikulas Patocka022c2612009-04-02 19:55:39 +01002267 }
Milan Broz73d410c2008-02-08 02:10:25 +00002268
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002269 dm_put_live_table(md, srcu_idx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270}
2271
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01002272static void dm_queue_flush(struct mapped_device *md)
Milan Broz304f3f62008-02-08 02:11:17 +00002273{
Mikulas Patocka3b00b202009-04-09 00:27:15 +01002274 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
Peter Zijlstra4e857c52014-03-17 18:06:10 +01002275 smp_mb__after_atomic();
Mikulas Patocka53d59142009-04-02 19:55:37 +01002276 queue_work(md->wq, &md->work);
Milan Broz304f3f62008-02-08 02:11:17 +00002277}
2278
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279/*
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002280 * Swap in a new table, returning the old one for the caller to destroy.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 */
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002282struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283{
Mike Christie87eb5b22013-03-01 22:45:48 +00002284 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
Mike Snitzer754c5fc2009-06-22 10:12:34 +01002285 struct queue_limits limits;
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002286 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287
Daniel Walkere61290a2008-02-08 02:10:08 +00002288 mutex_lock(&md->suspend_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289
2290 /* device must be suspended */
Kiyoshi Ueda4f186f82009-12-10 23:52:26 +00002291 if (!dm_suspended_md(md))
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07002292 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293
Mike Snitzer3ae70652012-09-26 23:45:45 +01002294 /*
2295 * If the new table has no data devices, retain the existing limits.
2296 * This helps multipath with queue_if_no_path if all paths disappear,
2297 * then new I/O is queued based on these limits, and then some paths
2298 * reappear.
2299 */
2300 if (dm_table_has_no_data_devices(table)) {
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002301 live_map = dm_get_live_table_fast(md);
Mike Snitzer3ae70652012-09-26 23:45:45 +01002302 if (live_map)
2303 limits = md->queue->limits;
Mikulas Patocka83d5e5b2013-07-10 23:41:18 +01002304 dm_put_live_table_fast(md);
Mike Snitzer3ae70652012-09-26 23:45:45 +01002305 }
2306
Mike Christie87eb5b22013-03-01 22:45:48 +00002307 if (!live_map) {
2308 r = dm_calculate_queue_limits(table, &limits);
2309 if (r) {
2310 map = ERR_PTR(r);
2311 goto out;
2312 }
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002313 }
Mike Snitzer754c5fc2009-06-22 10:12:34 +01002314
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002315 map = __bind(md, table, &limits);
Mikulas Patocka62e08242017-09-20 07:29:49 -04002316 dm_issue_global_event();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07002318out:
Daniel Walkere61290a2008-02-08 02:10:08 +00002319 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon042d2a92009-12-10 23:52:24 +00002320 return map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321}
2322
2323/*
2324 * Functions to lock and unlock any filesystem running on the
2325 * device.
2326 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002327static int lock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328{
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08002329 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330
2331 WARN_ON(md->frozen_sb);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07002332
Mikulas Patockadb8fef42009-06-22 10:12:15 +01002333 md->frozen_sb = freeze_bdev(md->bdev);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07002334 if (IS_ERR(md->frozen_sb)) {
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07002335 r = PTR_ERR(md->frozen_sb);
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08002336 md->frozen_sb = NULL;
2337 return r;
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07002338 }
2339
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08002340 set_bit(DMF_FROZEN, &md->flags);
2341
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 return 0;
2343}
2344
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002345static void unlock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346{
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08002347 if (!test_bit(DMF_FROZEN, &md->flags))
2348 return;
2349
Mikulas Patockadb8fef42009-06-22 10:12:15 +01002350 thaw_bdev(md->bdev, md->frozen_sb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 md->frozen_sb = NULL;
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08002352 clear_bit(DMF_FROZEN, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353}
2354
2355/*
Bart Van Asscheb48633f2016-08-31 15:16:02 -07002356 * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
2357 * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
2358 * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY
2359 *
Mike Snitzerffcc3932014-10-28 18:34:52 -04002360 * If __dm_suspend returns 0, the device is completely quiescent
2361 * now. There is no request-processing activity. All new requests
2362 * are being added to md->deferred list.
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002363 */
Mike Snitzerffcc3932014-10-28 18:34:52 -04002364static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
Bart Van Asscheb48633f2016-08-31 15:16:02 -07002365 unsigned suspend_flags, long task_state,
Mike Snitzereaf9a732016-08-02 13:07:20 -04002366 int dmf_suspended_flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367{
Mike Snitzerffcc3932014-10-28 18:34:52 -04002368 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2369 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2370 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371
Bart Van Assche5a8f1f82016-08-31 15:17:04 -07002372 lockdep_assert_held(&md->suspend_lock);
2373
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08002374 /*
2375 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
2376 * This flag is cleared before dm_suspend returns.
2377 */
2378 if (noflush)
2379 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
Bart Van Assche86331f32017-04-27 10:11:26 -07002380 else
2381 pr_debug("%s: suspending with flush\n", dm_device_name(md));
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08002382
Mike Snitzerd67ee212014-10-28 20:13:31 -04002383 /*
2384 * This gets reverted if there's an error later and the targets
2385 * provide the .presuspend_undo hook.
2386 */
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07002387 dm_table_presuspend_targets(map);
2388
Mikulas Patocka32a926d2009-06-22 10:12:17 +01002389 /*
Kiyoshi Ueda9f518b22009-12-10 23:52:16 +00002390 * Flush I/O to the device.
2391 * Any I/O submitted after lock_fs() may not be flushed.
2392 * noflush takes precedence over do_lockfs.
2393 * (lock_fs() flushes I/Os and waits for them to complete.)
Mikulas Patocka32a926d2009-06-22 10:12:17 +01002394 */
2395 if (!noflush && do_lockfs) {
2396 r = lock_fs(md);
Mike Snitzerd67ee212014-10-28 20:13:31 -04002397 if (r) {
2398 dm_table_presuspend_undo_targets(map);
Mike Snitzerffcc3932014-10-28 18:34:52 -04002399 return r;
Mike Snitzerd67ee212014-10-28 20:13:31 -04002400 }
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08002401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402
2403 /*
Mikulas Patocka3b00b202009-04-09 00:27:15 +01002404 * Here we must make sure that no processes are submitting requests
2405 * to target drivers i.e. no one may be executing
2406 * __split_and_process_bio. This is called from dm_request and
2407 * dm_wq_work.
2408 *
2409 * To get all processes out of __split_and_process_bio in dm_request,
2410 * we take the write lock. To prevent any process from reentering
Tejun Heo6a8736d2010-09-08 18:07:00 +02002411 * __split_and_process_bio from dm_request and quiesce the thread
2412 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
2413 * flush_workqueue(md->wq).
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 */
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01002415 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
Hannes Reinecke41abc4e2014-11-05 14:35:50 +01002416 if (map)
2417 synchronize_srcu(&md->io_barrier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418
Kiyoshi Uedad0bcb872009-12-10 23:52:18 +00002419 /*
Tejun Heo29e40132010-09-08 18:07:00 +02002420 * Stop md->queue before flushing md->wq in case request-based
2421 * dm defers requests to md->wq from md->queue.
Kiyoshi Uedad0bcb872009-12-10 23:52:18 +00002422 */
Keith Busch2eb6e1e2014-10-17 17:46:36 -06002423 if (dm_request_based(md)) {
Mike Snitzereca7ee62016-02-20 13:45:38 -05002424 dm_stop_queue(md->queue);
Mike Snitzer02233342015-03-10 23:49:26 -04002425 if (md->kworker_task)
Petr Mladek39891442016-10-11 13:55:20 -07002426 kthread_flush_worker(&md->kworker);
Keith Busch2eb6e1e2014-10-17 17:46:36 -06002427 }
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002428
Kiyoshi Uedad0bcb872009-12-10 23:52:18 +00002429 flush_workqueue(md->wq);
2430
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 /*
Mikulas Patocka3b00b202009-04-09 00:27:15 +01002432 * At this point no more requests are entering target request routines.
2433 * We call dm_wait_for_completion to wait for all existing requests
2434 * to finish.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 */
Bart Van Asscheb48633f2016-08-31 15:16:02 -07002436 r = dm_wait_for_completion(md, task_state);
Mike Snitzereaf9a732016-08-02 13:07:20 -04002437 if (!r)
2438 set_bit(dmf_suspended_flag, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439
Milan Broz6d6f10d2008-02-08 02:10:22 +00002440 if (noflush)
Mikulas Patocka022c2612009-04-02 19:55:39 +01002441 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
Hannes Reinecke41abc4e2014-11-05 14:35:50 +01002442 if (map)
2443 synchronize_srcu(&md->io_barrier);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08002444
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445 /* were we interrupted ? */
Milan Broz46125c12008-02-08 02:10:30 +00002446 if (r < 0) {
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01002447 dm_queue_flush(md);
Milan Broz73d410c2008-02-08 02:10:25 +00002448
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002449 if (dm_request_based(md))
Mike Snitzereca7ee62016-02-20 13:45:38 -05002450 dm_start_queue(md->queue);
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002451
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002452 unlock_fs(md);
Mike Snitzerd67ee212014-10-28 20:13:31 -04002453 dm_table_presuspend_undo_targets(map);
Mike Snitzerffcc3932014-10-28 18:34:52 -04002454 /* pushback list is already flushed, so skip flush */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002455 }
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002456
Mike Snitzerffcc3932014-10-28 18:34:52 -04002457 return r;
2458}
2459
2460/*
2461 * We need to be able to change a mapping table under a mounted
2462 * filesystem. For example we might want to move some data in
2463 * the background. Before the table can be swapped with
2464 * dm_bind_table, dm_suspend must be called to flush any in
2465 * flight bios and ensure that any further io gets deferred.
2466 */
2467/*
2468 * Suspend mechanism in request-based dm.
2469 *
2470 * 1. Flush all I/Os by lock_fs() if needed.
2471 * 2. Stop dispatching any I/O by stopping the request_queue.
2472 * 3. Wait for all in-flight I/Os to be completed or requeued.
2473 *
2474 * To abort suspend, start the request_queue.
2475 */
2476int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2477{
2478 struct dm_table *map = NULL;
2479 int r = 0;
2480
2481retry:
2482 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2483
2484 if (dm_suspended_md(md)) {
2485 r = -EINVAL;
2486 goto out_unlock;
2487 }
2488
2489 if (dm_suspended_internally_md(md)) {
2490 /* already internally suspended, wait for internal resume */
2491 mutex_unlock(&md->suspend_lock);
2492 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2493 if (r)
2494 return r;
2495 goto retry;
2496 }
2497
Eric Dumazeta12f5d42014-11-23 09:34:29 -08002498 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
Mike Snitzerffcc3932014-10-28 18:34:52 -04002499
Mike Snitzereaf9a732016-08-02 13:07:20 -04002500 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
Mike Snitzerffcc3932014-10-28 18:34:52 -04002501 if (r)
2502 goto out_unlock;
Mikulas Patocka3b00b202009-04-09 00:27:15 +01002503
Kiyoshi Ueda4d4471c2009-12-10 23:52:26 +00002504 dm_table_postsuspend_targets(map);
2505
Alasdair G Kergond2874832006-11-08 17:44:43 -08002506out_unlock:
Daniel Walkere61290a2008-02-08 02:10:08 +00002507 mutex_unlock(&md->suspend_lock);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07002508 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509}
2510
Mike Snitzerffcc3932014-10-28 18:34:52 -04002511static int __dm_resume(struct mapped_device *md, struct dm_table *map)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512{
Mike Snitzerffcc3932014-10-28 18:34:52 -04002513 if (map) {
2514 int r = dm_table_resume_targets(map);
2515 if (r)
2516 return r;
2517 }
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002518
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01002519 dm_queue_flush(md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002520
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002521 /*
2522 * Flushing deferred I/Os must be done after targets are resumed
2523 * so that mapping of targets can work correctly.
2524 * Request-based dm is queueing the deferred I/Os in its request_queue.
2525 */
2526 if (dm_request_based(md))
Mike Snitzereca7ee62016-02-20 13:45:38 -05002527 dm_start_queue(md->queue);
Kiyoshi Uedacec47e32009-06-22 10:12:35 +01002528
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002529 unlock_fs(md);
2530
Mike Snitzerffcc3932014-10-28 18:34:52 -04002531 return 0;
2532}
2533
2534int dm_resume(struct mapped_device *md)
2535{
Minfei Huang8dc23652016-09-06 16:00:29 +08002536 int r;
Mike Snitzerffcc3932014-10-28 18:34:52 -04002537 struct dm_table *map = NULL;
2538
2539retry:
Minfei Huang8dc23652016-09-06 16:00:29 +08002540 r = -EINVAL;
Mike Snitzerffcc3932014-10-28 18:34:52 -04002541 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2542
2543 if (!dm_suspended_md(md))
2544 goto out;
2545
2546 if (dm_suspended_internally_md(md)) {
2547 /* already internally suspended, wait for internal resume */
2548 mutex_unlock(&md->suspend_lock);
2549 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2550 if (r)
2551 return r;
2552 goto retry;
2553 }
2554
Eric Dumazeta12f5d42014-11-23 09:34:29 -08002555 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
Mike Snitzerffcc3932014-10-28 18:34:52 -04002556 if (!map || !dm_table_get_size(map))
2557 goto out;
2558
2559 r = __dm_resume(md, map);
2560 if (r)
2561 goto out;
2562
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002563 clear_bit(DMF_SUSPENDED, &md->flags);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07002564out:
Daniel Walkere61290a2008-02-08 02:10:08 +00002565 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07002566
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07002567 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568}
2569
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002570/*
2571 * Internal suspend/resume works like userspace-driven suspend. It waits
2572 * until all bios finish and prevents issuing new bios to the target drivers.
2573 * It may be used only from the kernel.
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002574 */
2575
Mike Snitzerffcc3932014-10-28 18:34:52 -04002576static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
2577{
2578 struct dm_table *map = NULL;
2579
Bart Van Assche1ea06542017-04-27 10:11:21 -07002580 lockdep_assert_held(&md->suspend_lock);
2581
Mikulas Patocka96b26c82015-01-08 18:52:26 -05002582 if (md->internal_suspend_count++)
Mike Snitzerffcc3932014-10-28 18:34:52 -04002583 return; /* nested internal suspend */
2584
2585 if (dm_suspended_md(md)) {
2586 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2587 return; /* nest suspend */
2588 }
2589
Eric Dumazeta12f5d42014-11-23 09:34:29 -08002590 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
Mike Snitzerffcc3932014-10-28 18:34:52 -04002591
2592 /*
2593 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
2594 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend
2595 * would require changing .presuspend to return an error -- avoid this
2596 * until there is a need for more elaborate variants of internal suspend.
2597 */
Mike Snitzereaf9a732016-08-02 13:07:20 -04002598 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
2599 DMF_SUSPENDED_INTERNALLY);
Mike Snitzerffcc3932014-10-28 18:34:52 -04002600
2601 dm_table_postsuspend_targets(map);
2602}
2603
2604static void __dm_internal_resume(struct mapped_device *md)
2605{
Mikulas Patocka96b26c82015-01-08 18:52:26 -05002606 BUG_ON(!md->internal_suspend_count);
2607
2608 if (--md->internal_suspend_count)
Mike Snitzerffcc3932014-10-28 18:34:52 -04002609 return; /* resume from nested internal suspend */
2610
2611 if (dm_suspended_md(md))
2612 goto done; /* resume from nested suspend */
2613
2614 /*
2615 * NOTE: existing callers don't need to call dm_table_resume_targets
2616 * (which may fail -- so best to avoid it for now by passing NULL map)
2617 */
2618 (void) __dm_resume(md, NULL);
2619
2620done:
2621 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2622 smp_mb__after_atomic();
2623 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
2624}
2625
2626void dm_internal_suspend_noflush(struct mapped_device *md)
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002627{
2628 mutex_lock(&md->suspend_lock);
Mike Snitzerffcc3932014-10-28 18:34:52 -04002629 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
2630 mutex_unlock(&md->suspend_lock);
2631}
2632EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
2633
2634void dm_internal_resume(struct mapped_device *md)
2635{
2636 mutex_lock(&md->suspend_lock);
2637 __dm_internal_resume(md);
2638 mutex_unlock(&md->suspend_lock);
2639}
2640EXPORT_SYMBOL_GPL(dm_internal_resume);
2641
2642/*
2643 * Fast variants of internal suspend/resume hold md->suspend_lock,
2644 * which prevents interaction with userspace-driven suspend.
2645 */
2646
2647void dm_internal_suspend_fast(struct mapped_device *md)
2648{
2649 mutex_lock(&md->suspend_lock);
2650 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002651 return;
2652
2653 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2654 synchronize_srcu(&md->io_barrier);
2655 flush_workqueue(md->wq);
2656 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2657}
Mikulas Patockab735fed2015-02-26 11:40:35 -05002658EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002659
Mike Snitzerffcc3932014-10-28 18:34:52 -04002660void dm_internal_resume_fast(struct mapped_device *md)
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002661{
Mike Snitzerffcc3932014-10-28 18:34:52 -04002662 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002663 goto done;
2664
2665 dm_queue_flush(md);
2666
2667done:
2668 mutex_unlock(&md->suspend_lock);
2669}
Mikulas Patockab735fed2015-02-26 11:40:35 -05002670EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
Mikulas Patockafd2ed4d2013-08-16 10:54:23 -04002671
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672/*-----------------------------------------------------------------
2673 * Event notification.
2674 *---------------------------------------------------------------*/
Peter Rajnoha3abf85b2010-03-06 02:32:31 +00002675int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
Milan Broz60935eb2009-06-22 10:12:30 +01002676 unsigned cookie)
Alasdair G Kergon69267a32007-12-13 14:15:57 +00002677{
Milan Broz60935eb2009-06-22 10:12:30 +01002678 char udev_cookie[DM_COOKIE_LENGTH];
2679 char *envp[] = { udev_cookie, NULL };
2680
2681 if (!cookie)
Peter Rajnoha3abf85b2010-03-06 02:32:31 +00002682 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
Milan Broz60935eb2009-06-22 10:12:30 +01002683 else {
2684 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2685 DM_COOKIE_ENV_VAR_NAME, cookie);
Peter Rajnoha3abf85b2010-03-06 02:32:31 +00002686 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2687 action, envp);
Milan Broz60935eb2009-06-22 10:12:30 +01002688 }
Alasdair G Kergon69267a32007-12-13 14:15:57 +00002689}
2690
Mike Anderson7a8c3d32007-10-19 22:48:01 +01002691uint32_t dm_next_uevent_seq(struct mapped_device *md)
2692{
2693 return atomic_add_return(1, &md->uevent_seq);
2694}
2695
Linus Torvalds1da177e2005-04-16 15:20:36 -07002696uint32_t dm_get_event_nr(struct mapped_device *md)
2697{
2698 return atomic_read(&md->event_nr);
2699}
2700
2701int dm_wait_event(struct mapped_device *md, int event_nr)
2702{
2703 return wait_event_interruptible(md->eventq,
2704 (event_nr != atomic_read(&md->event_nr)));
2705}
2706
Mike Anderson7a8c3d32007-10-19 22:48:01 +01002707void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
2708{
2709 unsigned long flags;
2710
2711 spin_lock_irqsave(&md->uevent_lock, flags);
2712 list_add(elist, &md->uevent_list);
2713 spin_unlock_irqrestore(&md->uevent_lock, flags);
2714}
2715
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716/*
2717 * The gendisk is only valid as long as you have a reference
2718 * count on 'md'.
2719 */
2720struct gendisk *dm_disk(struct mapped_device *md)
2721{
2722 return md->disk;
2723}
Sami Tolvanen65ff5b72015-03-18 15:52:14 +00002724EXPORT_SYMBOL_GPL(dm_disk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002725
Milan Broz784aae72009-01-06 03:05:12 +00002726struct kobject *dm_kobject(struct mapped_device *md)
2727{
Mikulas Patocka2995fa72014-01-13 19:37:54 -05002728 return &md->kobj_holder.kobj;
Milan Broz784aae72009-01-06 03:05:12 +00002729}
2730
Milan Broz784aae72009-01-06 03:05:12 +00002731struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2732{
2733 struct mapped_device *md;
2734
Mikulas Patocka2995fa72014-01-13 19:37:54 -05002735 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
Milan Broz784aae72009-01-06 03:05:12 +00002736
Hou Taob9a41d22017-11-01 15:42:36 +08002737 spin_lock(&_minor_lock);
2738 if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
2739 md = NULL;
2740 goto out;
2741 }
Milan Broz784aae72009-01-06 03:05:12 +00002742 dm_get(md);
Hou Taob9a41d22017-11-01 15:42:36 +08002743out:
2744 spin_unlock(&_minor_lock);
2745
Milan Broz784aae72009-01-06 03:05:12 +00002746 return md;
2747}
2748
Kiyoshi Ueda4f186f82009-12-10 23:52:26 +00002749int dm_suspended_md(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750{
2751 return test_bit(DMF_SUSPENDED, &md->flags);
2752}
2753
Mike Snitzerffcc3932014-10-28 18:34:52 -04002754int dm_suspended_internally_md(struct mapped_device *md)
2755{
2756 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2757}
2758
Mikulas Patocka2c140a22013-11-01 18:27:41 -04002759int dm_test_deferred_remove_flag(struct mapped_device *md)
2760{
2761 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
2762}
2763
Kiyoshi Ueda64dbce52009-12-10 23:52:27 +00002764int dm_suspended(struct dm_target *ti)
2765{
Kiyoshi Uedaecdb2e22010-03-06 02:29:52 +00002766 return dm_suspended_md(dm_table_get_md(ti->table));
Kiyoshi Ueda64dbce52009-12-10 23:52:27 +00002767}
2768EXPORT_SYMBOL_GPL(dm_suspended);
2769
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08002770int dm_noflush_suspending(struct dm_target *ti)
2771{
Kiyoshi Uedaecdb2e22010-03-06 02:29:52 +00002772 return __noflush_suspending(dm_table_get_md(ti->table));
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08002773}
2774EXPORT_SYMBOL_GPL(dm_noflush_suspending);
2775
Bart Van Assche7e0d5742017-04-27 10:11:23 -07002776struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
Mike Snitzer0776aa02017-12-08 14:40:52 -05002777 unsigned integrity, unsigned per_io_data_size,
2778 unsigned min_pool_size)
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002779{
Mike Snitzer115485e2016-02-22 12:16:21 -05002780 struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
Mike Snitzer78d8e582015-06-26 10:01:13 -04002781 unsigned int pool_size = 0;
Mike Snitzer64f52b02017-12-11 23:17:47 -05002782 unsigned int front_pad, io_front_pad;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002783
2784 if (!pools)
Mike Snitzer4e6e36c2015-06-26 09:42:57 -04002785 return NULL;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002786
Mike Snitzer78d8e582015-06-26 10:01:13 -04002787 switch (type) {
2788 case DM_TYPE_BIO_BASED:
Toshi Kani545ed202016-06-22 17:54:53 -06002789 case DM_TYPE_DAX_BIO_BASED:
Mike Snitzer0776aa02017-12-08 14:40:52 -05002790 pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
Mike Snitzer30187e12016-01-31 13:28:26 -05002791 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
Mike Snitzer64f52b02017-12-11 23:17:47 -05002792 io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
2793 pools->io_bs = bioset_create(pool_size, io_front_pad, 0);
2794 if (!pools->io_bs)
2795 goto out;
2796 if (integrity && bioset_integrity_create(pools->io_bs, pool_size))
2797 goto out;
Mike Snitzer78d8e582015-06-26 10:01:13 -04002798 break;
2799 case DM_TYPE_REQUEST_BASED:
Mike Snitzer78d8e582015-06-26 10:01:13 -04002800 case DM_TYPE_MQ_REQUEST_BASED:
Mike Snitzer0776aa02017-12-08 14:40:52 -05002801 pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
Mike Snitzer78d8e582015-06-26 10:01:13 -04002802 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
Mike Snitzer591ddcf2016-01-31 12:05:42 -05002803 /* per_io_data_size is used for blk-mq pdu at queue allocation */
Mike Snitzer78d8e582015-06-26 10:01:13 -04002804 break;
2805 default:
2806 BUG();
2807 }
2808
Mike Snitzer4a3f54d2017-11-22 15:37:43 -05002809 pools->bs = bioset_create(pool_size, front_pad, 0);
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002810 if (!pools->bs)
Jun'ichi Nomura5f015202013-03-01 22:45:48 +00002811 goto out;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002812
Martin K. Petersena91a2782011-03-17 11:11:05 +01002813 if (integrity && bioset_integrity_create(pools->bs, pool_size))
Jun'ichi Nomura5f015202013-03-01 22:45:48 +00002814 goto out;
Martin K. Petersena91a2782011-03-17 11:11:05 +01002815
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002816 return pools;
Mike Snitzer78d8e582015-06-26 10:01:13 -04002817
Jun'ichi Nomura5f015202013-03-01 22:45:48 +00002818out:
2819 dm_free_md_mempools(pools);
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002820
Mike Snitzer4e6e36c2015-06-26 09:42:57 -04002821 return NULL;
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002822}
2823
2824void dm_free_md_mempools(struct dm_md_mempools *pools)
2825{
2826 if (!pools)
2827 return;
2828
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002829 if (pools->bs)
2830 bioset_free(pools->bs);
Mike Snitzer64f52b02017-12-11 23:17:47 -05002831 if (pools->io_bs)
2832 bioset_free(pools->io_bs);
Kiyoshi Uedae6ee8c02009-06-22 10:12:36 +01002833
2834 kfree(pools);
2835}
2836
Christoph Hellwig9c72bad2016-07-08 21:23:51 +09002837struct dm_pr {
2838 u64 old_key;
2839 u64 new_key;
2840 u32 flags;
2841 bool fail_early;
2842};
2843
2844static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
2845 void *data)
2846{
2847 struct mapped_device *md = bdev->bd_disk->private_data;
2848 struct dm_table *table;
2849 struct dm_target *ti;
2850 int ret = -ENOTTY, srcu_idx;
2851
2852 table = dm_get_live_table(md, &srcu_idx);
2853 if (!table || !dm_table_get_size(table))
2854 goto out;
2855
2856 /* We only support devices that have a single target */
2857 if (dm_table_get_num_targets(table) != 1)
2858 goto out;
2859 ti = dm_table_get_target(table, 0);
2860
2861 ret = -EINVAL;
2862 if (!ti->type->iterate_devices)
2863 goto out;
2864
2865 ret = ti->type->iterate_devices(ti, fn, data);
2866out:
2867 dm_put_live_table(md, srcu_idx);
2868 return ret;
2869}
2870
2871/*
2872 * For register / unregister we need to manually call out to every path.
2873 */
2874static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
2875 sector_t start, sector_t len, void *data)
2876{
2877 struct dm_pr *pr = data;
2878 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
2879
2880 if (!ops || !ops->pr_register)
2881 return -EOPNOTSUPP;
2882 return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
2883}
2884
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002885static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
Mike Snitzer956a4022016-02-18 16:13:51 -05002886 u32 flags)
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002887{
Christoph Hellwig9c72bad2016-07-08 21:23:51 +09002888 struct dm_pr pr = {
2889 .old_key = old_key,
2890 .new_key = new_key,
2891 .flags = flags,
2892 .fail_early = true,
2893 };
2894 int ret;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002895
Christoph Hellwig9c72bad2016-07-08 21:23:51 +09002896 ret = dm_call_pr(bdev, __dm_pr_register, &pr);
2897 if (ret && new_key) {
2898 /* unregister all paths if we failed to register any path */
2899 pr.old_key = new_key;
2900 pr.new_key = 0;
2901 pr.flags = 0;
2902 pr.fail_early = false;
2903 dm_call_pr(bdev, __dm_pr_register, &pr);
2904 }
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002905
Christoph Hellwig9c72bad2016-07-08 21:23:51 +09002906 return ret;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002907}
2908
2909static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
Mike Snitzer956a4022016-02-18 16:13:51 -05002910 u32 flags)
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002911{
2912 struct mapped_device *md = bdev->bd_disk->private_data;
2913 const struct pr_ops *ops;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002914 fmode_t mode;
Mike Snitzer956a4022016-02-18 16:13:51 -05002915 int r;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002916
Mike Snitzer956a4022016-02-18 16:13:51 -05002917 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002918 if (r < 0)
2919 return r;
2920
2921 ops = bdev->bd_disk->fops->pr_ops;
2922 if (ops && ops->pr_reserve)
2923 r = ops->pr_reserve(bdev, key, type, flags);
2924 else
2925 r = -EOPNOTSUPP;
2926
Mike Snitzer956a4022016-02-18 16:13:51 -05002927 bdput(bdev);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002928 return r;
2929}
2930
2931static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
2932{
2933 struct mapped_device *md = bdev->bd_disk->private_data;
2934 const struct pr_ops *ops;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002935 fmode_t mode;
Mike Snitzer956a4022016-02-18 16:13:51 -05002936 int r;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002937
Mike Snitzer956a4022016-02-18 16:13:51 -05002938 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002939 if (r < 0)
2940 return r;
2941
2942 ops = bdev->bd_disk->fops->pr_ops;
2943 if (ops && ops->pr_release)
2944 r = ops->pr_release(bdev, key, type);
2945 else
2946 r = -EOPNOTSUPP;
2947
Mike Snitzer956a4022016-02-18 16:13:51 -05002948 bdput(bdev);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002949 return r;
2950}
2951
2952static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
Mike Snitzer956a4022016-02-18 16:13:51 -05002953 enum pr_type type, bool abort)
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002954{
2955 struct mapped_device *md = bdev->bd_disk->private_data;
2956 const struct pr_ops *ops;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002957 fmode_t mode;
Mike Snitzer956a4022016-02-18 16:13:51 -05002958 int r;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002959
Mike Snitzer956a4022016-02-18 16:13:51 -05002960 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002961 if (r < 0)
2962 return r;
2963
2964 ops = bdev->bd_disk->fops->pr_ops;
2965 if (ops && ops->pr_preempt)
2966 r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
2967 else
2968 r = -EOPNOTSUPP;
2969
Mike Snitzer956a4022016-02-18 16:13:51 -05002970 bdput(bdev);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002971 return r;
2972}
2973
2974static int dm_pr_clear(struct block_device *bdev, u64 key)
2975{
2976 struct mapped_device *md = bdev->bd_disk->private_data;
2977 const struct pr_ops *ops;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002978 fmode_t mode;
Mike Snitzer956a4022016-02-18 16:13:51 -05002979 int r;
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002980
Mike Snitzer956a4022016-02-18 16:13:51 -05002981 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002982 if (r < 0)
2983 return r;
2984
2985 ops = bdev->bd_disk->fops->pr_ops;
2986 if (ops && ops->pr_clear)
2987 r = ops->pr_clear(bdev, key);
2988 else
2989 r = -EOPNOTSUPP;
2990
Mike Snitzer956a4022016-02-18 16:13:51 -05002991 bdput(bdev);
Christoph Hellwig71cdb692015-10-15 14:10:51 +02002992 return r;
2993}
2994
2995static const struct pr_ops dm_pr_ops = {
2996 .pr_register = dm_pr_register,
2997 .pr_reserve = dm_pr_reserve,
2998 .pr_release = dm_pr_release,
2999 .pr_preempt = dm_pr_preempt,
3000 .pr_clear = dm_pr_clear,
3001};
3002
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07003003static const struct block_device_operations dm_blk_dops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004 .open = dm_blk_open,
3005 .release = dm_blk_close,
Milan Brozaa129a22006-10-03 01:15:15 -07003006 .ioctl = dm_blk_ioctl,
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08003007 .getgeo = dm_blk_getgeo,
Christoph Hellwig71cdb692015-10-15 14:10:51 +02003008 .pr_ops = &dm_pr_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009 .owner = THIS_MODULE
3010};
3011
Dan Williamsf26c5712017-04-12 12:35:44 -07003012static const struct dax_operations dm_dax_ops = {
3013 .direct_access = dm_dax_direct_access,
Dan Williams7e026c82017-05-29 12:57:56 -07003014 .copy_from_iter = dm_dax_copy_from_iter,
Dan Williamsf26c5712017-04-12 12:35:44 -07003015};
3016
Linus Torvalds1da177e2005-04-16 15:20:36 -07003017/*
3018 * module hooks
3019 */
3020module_init(dm_init);
3021module_exit(dm_exit);
3022
3023module_param(major, uint, 0);
3024MODULE_PARM_DESC(major, "The major number of the device mapper");
Mike Snitzerf4790822013-09-12 18:06:12 -04003025
Mike Snitzere8603132013-09-12 18:06:12 -04003026module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3027MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3028
Mike Snitzer115485e2016-02-22 12:16:21 -05003029module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
3030MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
3031
Linus Torvalds1da177e2005-04-16 15:20:36 -07003032MODULE_DESCRIPTION(DM_NAME " driver");
3033MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3034MODULE_LICENSE("GPL");