blob: 52baf8a5b0f47d8a130c9baf48cc9eacdcb4b05c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
Mikulas Patocka586e80e2008-10-21 17:44:59 +01008#include <linux/device-mapper.h>
9
Mike Snitzerf4790822013-09-12 18:06:12 -040010#include "dm.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include "dm-path-selector.h"
Mike Andersonb15546f2007-10-19 22:48:02 +010012#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070013
Mike Snitzere5863d92014-12-17 21:08:12 -050014#include <linux/blkdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/ctype.h>
16#include <linux/init.h>
17#include <linux/mempool.h>
18#include <linux/module.h>
19#include <linux/pagemap.h>
20#include <linux/slab.h>
21#include <linux/time.h>
22#include <linux/workqueue.h>
Mikulas Patocka35991652012-06-03 00:29:58 +010023#include <linux/delay.h>
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070024#include <scsi/scsi_dh.h>
Arun Sharma600634972011-07-26 16:09:06 -070025#include <linux/atomic.h>
Mike Snitzer78ce23b2016-01-31 17:38:28 -050026#include <linux/blk-mq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027
Alasdair G Kergon72d94862006-06-26 00:27:35 -070028#define DM_MSG_PREFIX "multipath"
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000029#define DM_PG_INIT_DELAY_MSECS 2000
30#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32/* Path properties */
33struct pgpath {
34 struct list_head list;
35
36 struct priority_group *pg; /* Owning PG */
37 unsigned fail_count; /* Cumulative failure count */
38
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -080039 struct dm_path path;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000040 struct delayed_work activate_path;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050041
42 bool is_active:1; /* Path status */
Linus Torvalds1da177e2005-04-16 15:20:36 -070043};
44
45#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
46
47/*
48 * Paths are grouped into Priority Groups and numbered from 1 upwards.
49 * Each has a path selector which controls which path gets used.
50 */
51struct priority_group {
52 struct list_head list;
53
54 struct multipath *m; /* Owning multipath instance */
55 struct path_selector ps;
56
57 unsigned pg_num; /* Reference number */
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 unsigned nr_pgpaths; /* Number of paths in PG */
59 struct list_head pgpaths;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050060
61 bool bypassed:1; /* Temporarily bypass this PG? */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062};
63
64/* Multipath context */
65struct multipath {
66 struct list_head list;
67 struct dm_target *ti;
68
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070069 const char *hw_handler_name;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -070070 char *hw_handler_params;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000071
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010072 spinlock_t lock;
73
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 unsigned nr_priority_groups;
75 struct list_head priority_groups;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000076
77 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
78
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 struct pgpath *current_pgpath;
80 struct priority_group *current_pg;
81 struct priority_group *next_pg; /* Switch to this PG if set */
Linus Torvalds1da177e2005-04-16 15:20:36 -070082
Mike Snitzer518257b2016-03-17 16:32:10 -040083 unsigned long flags; /* Multipath state flags */
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010084
Dave Wysochanskic9e45582007-10-19 22:47:53 +010085 unsigned pg_init_retries; /* Number of times to retry pg_init */
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000086 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
Mike Snitzer91e968a2016-03-17 17:10:15 -040088 atomic_t nr_valid_paths; /* Total number of usable paths */
89 atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
90 atomic_t pg_init_count; /* Number of times pg_init called */
91
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 /*
Alasdair G Kergon028867a2007-07-12 17:26:32 +010093 * We must use a mempool of dm_mpath_io structs so that we
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 * can resubmit bios on error.
95 */
96 mempool_t *mpio_pool;
Mike Anderson6380f262009-12-10 23:52:21 +000097
98 struct mutex work_mutex;
Mike Snitzer20800cb2016-03-17 17:13:10 -040099 struct work_struct trigger_event;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100};
101
102/*
103 * Context information attached to each bio we process.
104 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100105struct dm_mpath_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 struct pgpath *pgpath;
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100107 size_t nr_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108};
109
110typedef int (*action_fn) (struct pgpath *pgpath);
111
Christoph Lametere18b8902006-12-06 20:33:20 -0800112static struct kmem_cache *_mpio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700114static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
David Howellsc4028952006-11-22 14:57:56 +0000115static void trigger_event(struct work_struct *work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700116static void activate_path(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Mike Snitzer518257b2016-03-17 16:32:10 -0400118/*-----------------------------------------------
119 * Multipath state flags.
120 *-----------------------------------------------*/
121
122#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
123#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
124#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
125#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */
126#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
127#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
128#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130/*-----------------------------------------------
131 * Allocation routines
132 *-----------------------------------------------*/
133
134static struct pgpath *alloc_pgpath(void)
135{
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700136 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
Mike Anderson224cb3e2008-08-29 09:36:09 +0200138 if (pgpath) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500139 pgpath->is_active = true;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000140 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
Mike Anderson224cb3e2008-08-29 09:36:09 +0200141 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 return pgpath;
144}
145
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100146static void free_pgpath(struct pgpath *pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147{
148 kfree(pgpath);
149}
150
151static struct priority_group *alloc_priority_group(void)
152{
153 struct priority_group *pg;
154
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700155 pg = kzalloc(sizeof(*pg), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700157 if (pg)
158 INIT_LIST_HEAD(&pg->pgpaths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160 return pg;
161}
162
163static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
164{
165 struct pgpath *pgpath, *tmp;
166
167 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
168 list_del(&pgpath->list);
169 dm_put_device(ti, pgpath->path.dev);
170 free_pgpath(pgpath);
171 }
172}
173
174static void free_priority_group(struct priority_group *pg,
175 struct dm_target *ti)
176{
177 struct path_selector *ps = &pg->ps;
178
179 if (ps->type) {
180 ps->type->destroy(ps);
181 dm_put_path_selector(ps->type);
182 }
183
184 free_pgpaths(&pg->pgpaths, ti);
185 kfree(pg);
186}
187
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500188static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
190 struct multipath *m;
191
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700192 m = kzalloc(sizeof(*m), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 if (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 INIT_LIST_HEAD(&m->priority_groups);
195 spin_lock_init(&m->lock);
Mike Snitzer518257b2016-03-17 16:32:10 -0400196 set_bit(MPATHF_QUEUE_IO, &m->flags);
Mike Snitzer91e968a2016-03-17 17:10:15 -0400197 atomic_set(&m->nr_valid_paths, 0);
198 atomic_set(&m->pg_init_in_progress, 0);
199 atomic_set(&m->pg_init_count, 0);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000200 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
David Howellsc4028952006-11-22 14:57:56 +0000201 INIT_WORK(&m->trigger_event, trigger_event);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000202 init_waitqueue_head(&m->pg_init_wait);
Mike Anderson6380f262009-12-10 23:52:21 +0000203 mutex_init(&m->work_mutex);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500204
205 m->mpio_pool = NULL;
206 if (!use_blk_mq) {
207 unsigned min_ios = dm_get_reserved_rq_based_ios();
208
209 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
210 if (!m->mpio_pool) {
211 kfree(m);
212 return NULL;
213 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 }
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500215
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700216 m->ti = ti;
217 ti->private = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 }
219
220 return m;
221}
222
223static void free_multipath(struct multipath *m)
224{
225 struct priority_group *pg, *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
228 list_del(&pg->list);
229 free_priority_group(pg, m->ti);
230 }
231
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700232 kfree(m->hw_handler_name);
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700233 kfree(m->hw_handler_params);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 mempool_destroy(m->mpio_pool);
235 kfree(m);
236}
237
Mike Snitzer2eff1922016-02-03 09:13:14 -0500238static struct dm_mpath_io *get_mpio(union map_info *info)
239{
240 return info->ptr;
241}
242
243static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100244{
245 struct dm_mpath_io *mpio;
246
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500247 if (!m->mpio_pool) {
248 /* Use blk-mq pdu memory requested via per_io_data_size */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500249 mpio = get_mpio(info);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500250 memset(mpio, 0, sizeof(*mpio));
251 return mpio;
252 }
253
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100254 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
255 if (!mpio)
Mike Snitzer2eff1922016-02-03 09:13:14 -0500256 return NULL;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100257
258 memset(mpio, 0, sizeof(*mpio));
259 info->ptr = mpio;
260
Mike Snitzer2eff1922016-02-03 09:13:14 -0500261 return mpio;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100262}
263
Mike Snitzer2eff1922016-02-03 09:13:14 -0500264static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100265{
Mike Snitzer2eff1922016-02-03 09:13:14 -0500266 /* Only needed for non blk-mq (.request_fn) multipath */
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500267 if (m->mpio_pool) {
268 struct dm_mpath_io *mpio = info->ptr;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100269
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500270 info->ptr = NULL;
271 mempool_free(mpio, m->mpio_pool);
272 }
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100273}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275/*-----------------------------------------------
276 * Path selection
277 *-----------------------------------------------*/
278
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100279static int __pg_init_all_paths(struct multipath *m)
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000280{
281 struct pgpath *pgpath;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000282 unsigned long pg_init_delay = 0;
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000283
Mike Snitzer91e968a2016-03-17 17:10:15 -0400284 if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100285 return 0;
Hannes Reinecke17f4ff42014-02-28 15:33:42 +0100286
Mike Snitzer91e968a2016-03-17 17:10:15 -0400287 atomic_inc(&m->pg_init_count);
Mike Snitzer518257b2016-03-17 16:32:10 -0400288 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100289
290 /* Check here to reset pg_init_required */
291 if (!m->current_pg)
292 return 0;
293
Mike Snitzer518257b2016-03-17 16:32:10 -0400294 if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000295 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
296 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000297 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
298 /* Skip failed paths */
299 if (!pgpath->is_active)
300 continue;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000301 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
302 pg_init_delay))
Mike Snitzer91e968a2016-03-17 17:10:15 -0400303 atomic_inc(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000304 }
Mike Snitzer91e968a2016-03-17 17:10:15 -0400305 return atomic_read(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000306}
307
Mike Snitzer2da16102016-03-17 18:38:17 -0400308static int pg_init_all_paths(struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309{
Mike Snitzer2da16102016-03-17 18:38:17 -0400310 int r;
311 unsigned long flags;
312
313 spin_lock_irqsave(&m->lock, flags);
314 r = __pg_init_all_paths(m);
315 spin_unlock_irqrestore(&m->lock, flags);
316
317 return r;
318}
319
320static void __switch_pg(struct multipath *m, struct priority_group *pg)
321{
322 m->current_pg = pg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
324 /* Must we initialise the PG first, and queue I/O till it's ready? */
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700325 if (m->hw_handler_name) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400326 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
327 set_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 } else {
Mike Snitzer518257b2016-03-17 16:32:10 -0400329 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
330 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 }
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100332
Mike Snitzer91e968a2016-03-17 17:10:15 -0400333 atomic_set(&m->pg_init_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Mike Snitzer2da16102016-03-17 18:38:17 -0400336static struct pgpath *choose_path_in_pg(struct multipath *m,
337 struct priority_group *pg,
338 size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339{
Mike Snitzer2da16102016-03-17 18:38:17 -0400340 unsigned long flags;
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -0800341 struct dm_path *path;
Mike Snitzer2da16102016-03-17 18:38:17 -0400342 struct pgpath *pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
Mike Snitzer90a43232016-02-17 21:29:17 -0500344 path = pg->ps.type->select_path(&pg->ps, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 if (!path)
Mike Snitzer2da16102016-03-17 18:38:17 -0400346 return ERR_PTR(-ENXIO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
Mike Snitzer2da16102016-03-17 18:38:17 -0400348 pgpath = path_to_pgpath(path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
Mike Snitzer2da16102016-03-17 18:38:17 -0400350 if (unlikely(lockless_dereference(m->current_pg) != pg)) {
351 /* Only update current_pgpath if pg changed */
352 spin_lock_irqsave(&m->lock, flags);
353 m->current_pgpath = pgpath;
354 __switch_pg(m, pg);
355 spin_unlock_irqrestore(&m->lock, flags);
356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
Mike Snitzer2da16102016-03-17 18:38:17 -0400358 return pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359}
360
Mike Snitzer2da16102016-03-17 18:38:17 -0400361static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362{
Mike Snitzer2da16102016-03-17 18:38:17 -0400363 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 struct priority_group *pg;
Mike Snitzer2da16102016-03-17 18:38:17 -0400365 struct pgpath *pgpath;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500366 bool bypassed = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
Mike Snitzer91e968a2016-03-17 17:10:15 -0400368 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400369 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 goto failed;
Benjamin Marzinski1f271972014-08-13 13:53:42 -0500371 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
373 /* Were we instructed to switch PG? */
Mike Snitzer2da16102016-03-17 18:38:17 -0400374 if (lockless_dereference(m->next_pg)) {
375 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 pg = m->next_pg;
Mike Snitzer2da16102016-03-17 18:38:17 -0400377 if (!pg) {
378 spin_unlock_irqrestore(&m->lock, flags);
379 goto check_current_pg;
380 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 m->next_pg = NULL;
Mike Snitzer2da16102016-03-17 18:38:17 -0400382 spin_unlock_irqrestore(&m->lock, flags);
383 pgpath = choose_path_in_pg(m, pg, nr_bytes);
384 if (!IS_ERR_OR_NULL(pgpath))
385 return pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 }
387
388 /* Don't change PG until it has no remaining paths */
Mike Snitzer2da16102016-03-17 18:38:17 -0400389check_current_pg:
390 pg = lockless_dereference(m->current_pg);
391 if (pg) {
392 pgpath = choose_path_in_pg(m, pg, nr_bytes);
393 if (!IS_ERR_OR_NULL(pgpath))
394 return pgpath;
395 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
397 /*
398 * Loop through priority groups until we find a valid path.
399 * First time we skip PGs marked 'bypassed'.
Mike Christief220fd42012-06-03 00:29:45 +0100400 * Second time we only try the ones we skipped, but set
401 * pg_init_delay_retry so we do not hammer controllers.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 */
403 do {
404 list_for_each_entry(pg, &m->priority_groups, list) {
405 if (pg->bypassed == bypassed)
406 continue;
Mike Snitzer2da16102016-03-17 18:38:17 -0400407 pgpath = choose_path_in_pg(m, pg, nr_bytes);
408 if (!IS_ERR_OR_NULL(pgpath)) {
Mike Christief220fd42012-06-03 00:29:45 +0100409 if (!bypassed)
Mike Snitzer518257b2016-03-17 16:32:10 -0400410 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
Mike Snitzer2da16102016-03-17 18:38:17 -0400411 return pgpath;
Mike Christief220fd42012-06-03 00:29:45 +0100412 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 }
414 } while (bypassed--);
415
416failed:
Mike Snitzer2da16102016-03-17 18:38:17 -0400417 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 m->current_pgpath = NULL;
419 m->current_pg = NULL;
Mike Snitzer2da16102016-03-17 18:38:17 -0400420 spin_unlock_irqrestore(&m->lock, flags);
421
422 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423}
424
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800425/*
426 * Check whether bios must be queued in the device-mapper core rather
427 * than here in the target.
428 *
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800429 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
430 * same value then we are not between multipath_presuspend()
431 * and multipath_resume() calls and we have no need to check
432 * for the DMF_NOFLUSH_SUSPENDING flag.
433 */
Mike Snitzer2da16102016-03-17 18:38:17 -0400434static int must_push_back(struct multipath *m)
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800435{
Mike Snitzer518257b2016-03-17 16:32:10 -0400436 return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
437 ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
438 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
Hannes Reineckee8099172014-02-28 15:33:44 +0100439 dm_noflush_suspending(m->ti)));
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800440}
441
Hannes Reinecke36fcffc2014-02-28 15:33:47 +0100442/*
443 * Map cloned requests
444 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500445static int __multipath_map(struct dm_target *ti, struct request *clone,
446 union map_info *map_context,
447 struct request *rq, struct request **__clone)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Mike Snitzer7943bd62016-02-02 21:53:15 -0500449 struct multipath *m = ti->private;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100450 int r = DM_MAPIO_REQUEUE;
Mike Snitzere5863d92014-12-17 21:08:12 -0500451 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 struct pgpath *pgpath;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +0100453 struct block_device *bdev;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100454 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 /* Do we need to select a new pgpath? */
Mike Snitzer2da16102016-03-17 18:38:17 -0400457 pgpath = lockless_dereference(m->current_pgpath);
458 if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
459 pgpath = choose_pgpath(m, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100461 if (!pgpath) {
Mike Snitzer2da16102016-03-17 18:38:17 -0400462 if (!must_push_back(m))
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100463 r = -EIO; /* Failed */
Mike Snitzer2da16102016-03-17 18:38:17 -0400464 return r;
Mike Snitzer518257b2016-03-17 16:32:10 -0400465 } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
466 test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
Mike Snitzer2da16102016-03-17 18:38:17 -0400467 pg_init_all_paths(m);
468 return r;
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100469 }
Mike Snitzer6afbc012014-07-08 11:55:09 -0400470
Mike Snitzer2eff1922016-02-03 09:13:14 -0500471 mpio = set_mpio(m, map_context);
472 if (!mpio)
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100473 /* ENOMEM, requeue */
Mike Snitzer2da16102016-03-17 18:38:17 -0400474 return r;
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100475
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100476 mpio->pgpath = pgpath;
477 mpio->nr_bytes = nr_bytes;
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600478
479 bdev = pgpath->path.dev->bdev;
480
Mike Snitzere5863d92014-12-17 21:08:12 -0500481 if (clone) {
Mike Snitzerc5248f72016-02-20 14:02:49 -0500482 /*
483 * Old request-based interface: allocated clone is passed in.
484 * Used by: .request_fn stacked on .request_fn path(s).
485 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500486 clone->q = bdev_get_queue(bdev);
487 clone->rq_disk = bdev->bd_disk;
488 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
489 } else {
Mike Snitzereca7ee62016-02-20 13:45:38 -0500490 /*
491 * blk-mq request-based interface; used by both:
492 * .request_fn stacked on blk-mq path(s) and
493 * blk-mq stacked on blk-mq path(s).
494 */
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500495 *__clone = blk_mq_alloc_request(bdev_get_queue(bdev),
496 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400497 if (IS_ERR(*__clone)) {
Mike Snitzere5863d92014-12-17 21:08:12 -0500498 /* ENOMEM, requeue */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500499 clear_request_fn_mpio(m, map_context);
Mike Snitzere5863d92014-12-17 21:08:12 -0500500 return r;
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400501 }
Mike Snitzere5863d92014-12-17 21:08:12 -0500502 (*__clone)->bio = (*__clone)->biotail = NULL;
503 (*__clone)->rq_disk = bdev->bd_disk;
504 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
505 }
506
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100507 if (pgpath->pg->ps.type->start_io)
508 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
509 &pgpath->path,
510 nr_bytes);
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600511 return DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512}
513
Mike Snitzere5863d92014-12-17 21:08:12 -0500514static int multipath_map(struct dm_target *ti, struct request *clone,
515 union map_info *map_context)
516{
517 return __multipath_map(ti, clone, map_context, NULL, NULL);
518}
519
520static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
521 union map_info *map_context,
522 struct request **clone)
523{
524 return __multipath_map(ti, NULL, map_context, rq, clone);
525}
526
527static void multipath_release_clone(struct request *clone)
528{
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500529 blk_mq_free_request(clone);
Mike Snitzere5863d92014-12-17 21:08:12 -0500530}
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532/*
533 * If we run out of usable paths, should we queue I/O or error it?
534 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500535static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
536 bool save_old_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537{
538 unsigned long flags;
539
540 spin_lock_irqsave(&m->lock, flags);
541
Mike Snitzer518257b2016-03-17 16:32:10 -0400542 if (save_old_value) {
543 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
544 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
545 else
546 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
547 } else {
548 if (queue_if_no_path)
549 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
550 else
551 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
552 }
553 if (queue_if_no_path)
554 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700555 else
Mike Snitzer518257b2016-03-17 16:32:10 -0400556 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
557
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 spin_unlock_irqrestore(&m->lock, flags);
559
Hannes Reinecke63d832c2014-05-26 14:45:39 +0200560 if (!queue_if_no_path)
561 dm_table_run_md_queue_async(m->ti->table);
562
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 return 0;
564}
565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566/*
567 * An event is triggered whenever a path is taken out of use.
568 * Includes path failure and PG bypass.
569 */
David Howellsc4028952006-11-22 14:57:56 +0000570static void trigger_event(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571{
David Howellsc4028952006-11-22 14:57:56 +0000572 struct multipath *m =
573 container_of(work, struct multipath, trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 dm_table_event(m->ti->table);
576}
577
578/*-----------------------------------------------------------------
579 * Constructor/argument parsing:
580 * <#multipath feature args> [<arg>]*
581 * <#hw_handler args> [hw_handler [<arg>]*]
582 * <#priority groups>
583 * <initial priority group>
584 * [<selector> <#selector args> [<arg>]*
585 * <#paths> <#per-path selector args>
586 * [<path> [<arg>]* ]+ ]+
587 *---------------------------------------------------------------*/
Mike Snitzer498f0102011-08-02 12:32:04 +0100588static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 struct dm_target *ti)
590{
591 int r;
592 struct path_selector_type *pst;
593 unsigned ps_argc;
594
Mike Snitzer498f0102011-08-02 12:32:04 +0100595 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700596 {0, 1024, "invalid number of path selector args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 };
598
Mike Snitzer498f0102011-08-02 12:32:04 +0100599 pst = dm_get_path_selector(dm_shift_arg(as));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 if (!pst) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700601 ti->error = "unknown path selector type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 return -EINVAL;
603 }
604
Mike Snitzer498f0102011-08-02 12:32:04 +0100605 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100606 if (r) {
607 dm_put_path_selector(pst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 return -EINVAL;
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100609 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
611 r = pst->create(&pg->ps, ps_argc, as->argv);
612 if (r) {
613 dm_put_path_selector(pst);
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700614 ti->error = "path selector constructor failed";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 return r;
616 }
617
618 pg->ps.type = pst;
Mike Snitzer498f0102011-08-02 12:32:04 +0100619 dm_consume_args(as, ps_argc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 return 0;
622}
623
Mike Snitzer498f0102011-08-02 12:32:04 +0100624static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 struct dm_target *ti)
626{
627 int r;
628 struct pgpath *p;
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700629 struct multipath *m = ti->private;
Mike Snitzera58a9352012-07-27 15:08:04 +0100630 struct request_queue *q = NULL;
631 const char *attached_handler_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633 /* we need at least a path arg */
634 if (as->argc < 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700635 ti->error = "no device given";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100636 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 }
638
639 p = alloc_pgpath();
640 if (!p)
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100641 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
Mike Snitzer498f0102011-08-02 12:32:04 +0100643 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +0000644 &p->path.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700646 ti->error = "error getting device";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 goto bad;
648 }
649
Mike Snitzer518257b2016-03-17 16:32:10 -0400650 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
Mike Snitzera58a9352012-07-27 15:08:04 +0100651 q = bdev_get_queue(p->path.dev->bdev);
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100652
Mike Snitzer518257b2016-03-17 16:32:10 -0400653 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200654retain:
Mike Snitzera58a9352012-07-27 15:08:04 +0100655 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
656 if (attached_handler_name) {
657 /*
658 * Reset hw_handler_name to match the attached handler
659 * and clear any hw_handler_params associated with the
660 * ignored handler.
661 *
662 * NB. This modifies the table line to show the actual
663 * handler instead of the original table passed in.
664 */
665 kfree(m->hw_handler_name);
666 m->hw_handler_name = attached_handler_name;
667
668 kfree(m->hw_handler_params);
669 m->hw_handler_params = NULL;
670 }
671 }
672
673 if (m->hw_handler_name) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100674 r = scsi_dh_attach(q, m->hw_handler_name);
675 if (r == -EBUSY) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200676 char b[BDEVNAME_SIZE];
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100677
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200678 printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
679 bdevname(p->path.dev->bdev, b));
680 goto retain;
681 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700682 if (r < 0) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100683 ti->error = "error attaching hardware handler";
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700684 dm_put_device(ti, p->path.dev);
685 goto bad;
686 }
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700687
688 if (m->hw_handler_params) {
689 r = scsi_dh_set_params(q, m->hw_handler_params);
690 if (r < 0) {
691 ti->error = "unable to set hardware "
692 "handler parameters";
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700693 dm_put_device(ti, p->path.dev);
694 goto bad;
695 }
696 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700697 }
698
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
700 if (r) {
701 dm_put_device(ti, p->path.dev);
702 goto bad;
703 }
704
705 return p;
706
707 bad:
708 free_pgpath(p);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100709 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710}
711
Mike Snitzer498f0102011-08-02 12:32:04 +0100712static struct priority_group *parse_priority_group(struct dm_arg_set *as,
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700713 struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714{
Mike Snitzer498f0102011-08-02 12:32:04 +0100715 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700716 {1, 1024, "invalid number of paths"},
717 {0, 1024, "invalid number of selector args"}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 };
719
720 int r;
Mike Snitzer498f0102011-08-02 12:32:04 +0100721 unsigned i, nr_selector_args, nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 struct priority_group *pg;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700723 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
725 if (as->argc < 2) {
726 as->argc = 0;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100727 ti->error = "not enough priority group arguments";
728 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 }
730
731 pg = alloc_priority_group();
732 if (!pg) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700733 ti->error = "couldn't allocate priority group";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100734 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 }
736 pg->m = m;
737
738 r = parse_path_selector(as, pg, ti);
739 if (r)
740 goto bad;
741
742 /*
743 * read the paths
744 */
Mike Snitzer498f0102011-08-02 12:32:04 +0100745 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 if (r)
747 goto bad;
748
Mike Snitzer498f0102011-08-02 12:32:04 +0100749 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 if (r)
751 goto bad;
752
Mike Snitzer498f0102011-08-02 12:32:04 +0100753 nr_args = 1 + nr_selector_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 for (i = 0; i < pg->nr_pgpaths; i++) {
755 struct pgpath *pgpath;
Mike Snitzer498f0102011-08-02 12:32:04 +0100756 struct dm_arg_set path_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757
Mike Snitzer498f0102011-08-02 12:32:04 +0100758 if (as->argc < nr_args) {
Mikulas Patocka148acff2008-07-21 12:00:30 +0100759 ti->error = "not enough path parameters";
Alasdair G Kergon6bbf79a12010-08-12 04:13:49 +0100760 r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 goto bad;
Mikulas Patocka148acff2008-07-21 12:00:30 +0100762 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Mike Snitzer498f0102011-08-02 12:32:04 +0100764 path_args.argc = nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 path_args.argv = as->argv;
766
767 pgpath = parse_path(&path_args, &pg->ps, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100768 if (IS_ERR(pgpath)) {
769 r = PTR_ERR(pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 goto bad;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100771 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
773 pgpath->pg = pg;
774 list_add_tail(&pgpath->list, &pg->pgpaths);
Mike Snitzer498f0102011-08-02 12:32:04 +0100775 dm_consume_args(as, nr_args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 }
777
778 return pg;
779
780 bad:
781 free_priority_group(pg, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100782 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783}
784
Mike Snitzer498f0102011-08-02 12:32:04 +0100785static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 unsigned hw_argc;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700788 int ret;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700789 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790
Mike Snitzer498f0102011-08-02 12:32:04 +0100791 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700792 {0, 1024, "invalid number of hardware handler args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 };
794
Mike Snitzer498f0102011-08-02 12:32:04 +0100795 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 return -EINVAL;
797
798 if (!hw_argc)
799 return 0;
800
Mike Snitzer498f0102011-08-02 12:32:04 +0100801 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
Chandra Seetharaman14e98c52008-11-13 23:39:06 +0000802
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700803 if (hw_argc > 1) {
804 char *p;
805 int i, j, len = 4;
806
807 for (i = 0; i <= hw_argc - 2; i++)
808 len += strlen(as->argv[i]) + 1;
809 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
810 if (!p) {
811 ti->error = "memory allocation failed";
812 ret = -ENOMEM;
813 goto fail;
814 }
815 j = sprintf(p, "%d", hw_argc - 1);
816 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
817 j = sprintf(p, "%s", as->argv[i]);
818 }
Mike Snitzer498f0102011-08-02 12:32:04 +0100819 dm_consume_args(as, hw_argc - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
821 return 0;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700822fail:
823 kfree(m->hw_handler_name);
824 m->hw_handler_name = NULL;
825 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826}
827
Mike Snitzer498f0102011-08-02 12:32:04 +0100828static int parse_features(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829{
830 int r;
831 unsigned argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700832 struct dm_target *ti = m->ti;
Mike Snitzer498f0102011-08-02 12:32:04 +0100833 const char *arg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Mike Snitzer498f0102011-08-02 12:32:04 +0100835 static struct dm_arg _args[] = {
Mike Snitzera58a9352012-07-27 15:08:04 +0100836 {0, 6, "invalid number of feature args"},
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100837 {1, 50, "pg_init_retries must be between 1 and 50"},
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000838 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 };
840
Mike Snitzer498f0102011-08-02 12:32:04 +0100841 r = dm_read_arg_group(_args, as, &argc, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 if (r)
843 return -EINVAL;
844
845 if (!argc)
846 return 0;
847
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100848 do {
Mike Snitzer498f0102011-08-02 12:32:04 +0100849 arg_name = dm_shift_arg(as);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100850 argc--;
851
Mike Snitzer498f0102011-08-02 12:32:04 +0100852 if (!strcasecmp(arg_name, "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500853 r = queue_if_no_path(m, true, false);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100854 continue;
855 }
856
Mike Snitzera58a9352012-07-27 15:08:04 +0100857 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400858 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
Mike Snitzera58a9352012-07-27 15:08:04 +0100859 continue;
860 }
861
Mike Snitzer498f0102011-08-02 12:32:04 +0100862 if (!strcasecmp(arg_name, "pg_init_retries") &&
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100863 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +0100864 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100865 argc--;
866 continue;
867 }
868
Mike Snitzer498f0102011-08-02 12:32:04 +0100869 if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000870 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +0100871 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000872 argc--;
873 continue;
874 }
875
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 ti->error = "Unrecognised multipath feature request";
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100877 r = -EINVAL;
878 } while (argc && !r);
879
880 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881}
882
883static int multipath_ctr(struct dm_target *ti, unsigned int argc,
884 char **argv)
885{
Mike Snitzer498f0102011-08-02 12:32:04 +0100886 /* target arguments */
887 static struct dm_arg _args[] = {
Mike Snitzera490a072011-03-24 13:54:33 +0000888 {0, 1024, "invalid number of priority groups"},
889 {0, 1024, "invalid initial priority group number"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 };
891
892 int r;
893 struct multipath *m;
Mike Snitzer498f0102011-08-02 12:32:04 +0100894 struct dm_arg_set as;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 unsigned pg_count = 0;
896 unsigned next_pg_num;
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500897 bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898
899 as.argc = argc;
900 as.argv = argv;
901
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500902 m = alloc_multipath(ti, use_blk_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 if (!m) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700904 ti->error = "can't allocate multipath";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 return -EINVAL;
906 }
907
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700908 r = parse_features(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 if (r)
910 goto bad;
911
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700912 r = parse_hw_handler(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 if (r)
914 goto bad;
915
Mike Snitzer498f0102011-08-02 12:32:04 +0100916 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 if (r)
918 goto bad;
919
Mike Snitzer498f0102011-08-02 12:32:04 +0100920 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 if (r)
922 goto bad;
923
Mike Snitzera490a072011-03-24 13:54:33 +0000924 if ((!m->nr_priority_groups && next_pg_num) ||
925 (m->nr_priority_groups && !next_pg_num)) {
926 ti->error = "invalid initial priority group";
927 r = -EINVAL;
928 goto bad;
929 }
930
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 /* parse the priority groups */
932 while (as.argc) {
933 struct priority_group *pg;
Mike Snitzer91e968a2016-03-17 17:10:15 -0400934 unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700936 pg = parse_priority_group(&as, m);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100937 if (IS_ERR(pg)) {
938 r = PTR_ERR(pg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 goto bad;
940 }
941
Mike Snitzer91e968a2016-03-17 17:10:15 -0400942 nr_valid_paths += pg->nr_pgpaths;
943 atomic_set(&m->nr_valid_paths, nr_valid_paths);
944
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 list_add_tail(&pg->list, &m->priority_groups);
946 pg_count++;
947 pg->pg_num = pg_count;
948 if (!--next_pg_num)
949 m->next_pg = pg;
950 }
951
952 if (pg_count != m->nr_priority_groups) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700953 ti->error = "priority group count mismatch";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 r = -EINVAL;
955 goto bad;
956 }
957
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +0000958 ti->num_flush_bios = 1;
959 ti->num_discard_bios = 1;
Mike Snitzer042bcef2013-05-10 14:37:16 +0100960 ti->num_write_same_bios = 1;
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500961 if (use_blk_mq)
962 ti->per_io_data_size = sizeof(struct dm_mpath_io);
Mikulas Patocka86279212009-06-22 10:12:24 +0100963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return 0;
965
966 bad:
967 free_multipath(m);
968 return r;
969}
970
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000971static void multipath_wait_for_pg_init_completion(struct multipath *m)
972{
973 DECLARE_WAITQUEUE(wait, current);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000974
975 add_wait_queue(&m->pg_init_wait, &wait);
976
977 while (1) {
978 set_current_state(TASK_UNINTERRUPTIBLE);
979
Mike Snitzer91e968a2016-03-17 17:10:15 -0400980 if (!atomic_read(&m->pg_init_in_progress))
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000981 break;
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000982
983 io_schedule();
984 }
985 set_current_state(TASK_RUNNING);
986
987 remove_wait_queue(&m->pg_init_wait, &wait);
988}
989
990static void flush_multipath_work(struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991{
Mike Snitzer518257b2016-03-17 16:32:10 -0400992 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
993 smp_mb__after_atomic();
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +0000994
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700995 flush_workqueue(kmpath_handlerd);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000996 multipath_wait_for_pg_init_completion(m);
Alasdair G Kergona044d012005-07-12 15:53:02 -0700997 flush_workqueue(kmultipathd);
Tejun Heo43829732012-08-20 14:51:24 -0700998 flush_work(&m->trigger_event);
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +0000999
Mike Snitzer518257b2016-03-17 16:32:10 -04001000 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1001 smp_mb__after_atomic();
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001002}
1003
1004static void multipath_dtr(struct dm_target *ti)
1005{
1006 struct multipath *m = ti->private;
1007
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001008 flush_multipath_work(m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 free_multipath(m);
1010}
1011
1012/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 * Take a path out of use.
1014 */
1015static int fail_path(struct pgpath *pgpath)
1016{
1017 unsigned long flags;
1018 struct multipath *m = pgpath->pg->m;
1019
1020 spin_lock_irqsave(&m->lock, flags);
1021
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001022 if (!pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 goto out;
1024
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001025 DMWARN("Failing path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026
1027 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001028 pgpath->is_active = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 pgpath->fail_count++;
1030
Mike Snitzer91e968a2016-03-17 17:10:15 -04001031 atomic_dec(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 if (pgpath == m->current_pgpath)
1034 m->current_pgpath = NULL;
1035
Mike Andersonb15546f2007-10-19 22:48:02 +01001036 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001037 pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
Mike Andersonb15546f2007-10-19 22:48:02 +01001038
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001039 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040
1041out:
1042 spin_unlock_irqrestore(&m->lock, flags);
1043
1044 return 0;
1045}
1046
1047/*
1048 * Reinstate a previously-failed path
1049 */
1050static int reinstate_path(struct pgpath *pgpath)
1051{
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001052 int r = 0, run_queue = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 unsigned long flags;
1054 struct multipath *m = pgpath->pg->m;
Mike Snitzer91e968a2016-03-17 17:10:15 -04001055 unsigned nr_valid_paths;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
1057 spin_lock_irqsave(&m->lock, flags);
1058
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001059 if (pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 goto out;
1061
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001062 DMWARN("Reinstating path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063
1064 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1065 if (r)
1066 goto out;
1067
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001068 pgpath->is_active = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069
Mike Snitzer91e968a2016-03-17 17:10:15 -04001070 nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
1071 if (nr_valid_paths == 1) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001072 m->current_pgpath = NULL;
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001073 run_queue = 1;
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001074 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001075 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
Mike Snitzer91e968a2016-03-17 17:10:15 -04001076 atomic_inc(&m->pg_init_in_progress);
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001077 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078
Mike Andersonb15546f2007-10-19 22:48:02 +01001079 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001080 pgpath->path.dev->name, nr_valid_paths);
Mike Andersonb15546f2007-10-19 22:48:02 +01001081
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001082 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
1084out:
1085 spin_unlock_irqrestore(&m->lock, flags);
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001086 if (run_queue)
1087 dm_table_run_md_queue_async(m->ti->table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089 return r;
1090}
1091
1092/*
1093 * Fail or reinstate all paths that match the provided struct dm_dev.
1094 */
1095static int action_dev(struct multipath *m, struct dm_dev *dev,
1096 action_fn action)
1097{
Mike Snitzer19040c02011-03-24 13:54:31 +00001098 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 struct pgpath *pgpath;
1100 struct priority_group *pg;
1101
1102 list_for_each_entry(pg, &m->priority_groups, list) {
1103 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1104 if (pgpath->path.dev == dev)
1105 r = action(pgpath);
1106 }
1107 }
1108
1109 return r;
1110}
1111
1112/*
1113 * Temporarily try to avoid having to use the specified PG
1114 */
1115static void bypass_pg(struct multipath *m, struct priority_group *pg,
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001116 bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
1118 unsigned long flags;
1119
1120 spin_lock_irqsave(&m->lock, flags);
1121
1122 pg->bypassed = bypassed;
1123 m->current_pgpath = NULL;
1124 m->current_pg = NULL;
1125
1126 spin_unlock_irqrestore(&m->lock, flags);
1127
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001128 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129}
1130
1131/*
1132 * Switch to using the specified PG from the next I/O that gets mapped
1133 */
1134static int switch_pg_num(struct multipath *m, const char *pgstr)
1135{
1136 struct priority_group *pg;
1137 unsigned pgnum;
1138 unsigned long flags;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001139 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001141 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 (pgnum > m->nr_priority_groups)) {
1143 DMWARN("invalid PG number supplied to switch_pg_num");
1144 return -EINVAL;
1145 }
1146
1147 spin_lock_irqsave(&m->lock, flags);
1148 list_for_each_entry(pg, &m->priority_groups, list) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001149 pg->bypassed = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 if (--pgnum)
1151 continue;
1152
1153 m->current_pgpath = NULL;
1154 m->current_pg = NULL;
1155 m->next_pg = pg;
1156 }
1157 spin_unlock_irqrestore(&m->lock, flags);
1158
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001159 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 return 0;
1161}
1162
1163/*
1164 * Set/clear bypassed status of a PG.
1165 * PGs are numbered upwards from 1 in the order they were declared.
1166 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001167static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168{
1169 struct priority_group *pg;
1170 unsigned pgnum;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001171 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001173 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 (pgnum > m->nr_priority_groups)) {
1175 DMWARN("invalid PG number supplied to bypass_pg");
1176 return -EINVAL;
1177 }
1178
1179 list_for_each_entry(pg, &m->priority_groups, list) {
1180 if (!--pgnum)
1181 break;
1182 }
1183
1184 bypass_pg(m, pg, bypassed);
1185 return 0;
1186}
1187
1188/*
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001189 * Should we retry pg_init immediately?
1190 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001191static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001192{
1193 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001194 bool limit_reached = false;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001195
1196 spin_lock_irqsave(&m->lock, flags);
1197
Mike Snitzer91e968a2016-03-17 17:10:15 -04001198 if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
1199 !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Mike Snitzer518257b2016-03-17 16:32:10 -04001200 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001201 else
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001202 limit_reached = true;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001203
1204 spin_unlock_irqrestore(&m->lock, flags);
1205
1206 return limit_reached;
1207}
1208
Chandra Seetharaman3ae31f62009-10-21 09:22:46 -07001209static void pg_init_done(void *data, int errors)
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001210{
Moger, Babu83c0d5d2010-03-06 02:29:45 +00001211 struct pgpath *pgpath = data;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001212 struct priority_group *pg = pgpath->pg;
1213 struct multipath *m = pg->m;
1214 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001215 bool delay_retry = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001216
1217 /* device or driver problems */
1218 switch (errors) {
1219 case SCSI_DH_OK:
1220 break;
1221 case SCSI_DH_NOSYS:
1222 if (!m->hw_handler_name) {
1223 errors = 0;
1224 break;
1225 }
Moger, Babuf7b934c2010-03-06 02:29:49 +00001226 DMERR("Could not failover the device: Handler scsi_dh_%s "
1227 "Error %d.", m->hw_handler_name, errors);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001228 /*
1229 * Fail path for now, so we do not ping pong
1230 */
1231 fail_path(pgpath);
1232 break;
1233 case SCSI_DH_DEV_TEMP_BUSY:
1234 /*
1235 * Probably doing something like FW upgrade on the
1236 * controller so try the other pg.
1237 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001238 bypass_pg(m, pg, true);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001239 break;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001240 case SCSI_DH_RETRY:
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001241 /* Wait before retrying. */
1242 delay_retry = 1;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001243 case SCSI_DH_IMM_RETRY:
1244 case SCSI_DH_RES_TEMP_UNAVAIL:
1245 if (pg_init_limit_reached(m, pgpath))
1246 fail_path(pgpath);
1247 errors = 0;
1248 break;
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001249 case SCSI_DH_DEV_OFFLINED:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001250 default:
1251 /*
1252 * We probably do not want to fail the path for a device
1253 * error, but this is what the old dm did. In future
1254 * patches we can do more advanced handling.
1255 */
1256 fail_path(pgpath);
1257 }
1258
1259 spin_lock_irqsave(&m->lock, flags);
1260 if (errors) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001261 if (pgpath == m->current_pgpath) {
1262 DMERR("Could not failover device. Error %d.", errors);
1263 m->current_pgpath = NULL;
1264 m->current_pg = NULL;
1265 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001266 } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001267 pg->bypassed = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001268
Mike Snitzer91e968a2016-03-17 17:10:15 -04001269 if (atomic_dec_return(&m->pg_init_in_progress) > 0)
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001270 /* Activations of other paths are still on going */
1271 goto out;
1272
Mike Snitzer518257b2016-03-17 16:32:10 -04001273 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
1274 if (delay_retry)
1275 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1276 else
1277 clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1278
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001279 if (__pg_init_all_paths(m))
1280 goto out;
1281 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001282 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001283
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001284 /*
1285 * Wake up any thread waiting to suspend.
1286 */
1287 wake_up(&m->pg_init_wait);
1288
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001289out:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001290 spin_unlock_irqrestore(&m->lock, flags);
1291}
1292
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001293static void activate_path(struct work_struct *work)
1294{
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001295 struct pgpath *pgpath =
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001296 container_of(work, struct pgpath, activate_path.work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001297
Hannes Reinecke3a017502014-02-28 15:33:49 +01001298 if (pgpath->is_active)
1299 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1300 pg_init_done, pgpath);
1301 else
1302 pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001303}
1304
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001305static int noretry_error(int error)
1306{
1307 switch (error) {
1308 case -EOPNOTSUPP:
1309 case -EREMOTEIO:
1310 case -EILSEQ:
1311 case -ENODATA:
Jun'ichi Nomuracc9d3c32013-09-13 14:54:30 +09001312 case -ENOSPC:
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001313 return 1;
1314 }
1315
1316 /* Anything else could be a path failure, so should be retried */
1317 return 0;
1318}
1319
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320/*
1321 * end_io handling
1322 */
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001323static int do_end_io(struct multipath *m, struct request *clone,
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001324 int error, struct dm_mpath_io *mpio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325{
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001326 /*
1327 * We don't queue any clone request inside the multipath target
1328 * during end I/O handling, since those clone requests don't have
1329 * bio clones. If we queue them inside the multipath target,
1330 * we need to make bio clones, that requires memory allocation.
1331 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1332 * don't have bio clones.)
1333 * Instead of queueing the clone request here, we queue the original
1334 * request into dm core, which will remake a clone request and
1335 * clone bios for it and resubmit it later.
1336 */
1337 int r = DM_ENDIO_REQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001339 if (!error && !clone->errors)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 return 0; /* I/O complete */
1341
Mike Snitzer7eee4ae2014-06-02 15:50:06 -04001342 if (noretry_error(error))
Mike Snitzer959eb4e2010-08-12 04:14:32 +01001343 return error;
1344
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001345 if (mpio->pgpath)
1346 fail_path(mpio->pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
Mike Snitzer91e968a2016-03-17 17:10:15 -04001348 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001349 if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
Mike Snitzer2da16102016-03-17 18:38:17 -04001350 if (!must_push_back(m))
Hannes Reinecke751b2a72011-01-18 10:13:12 +01001351 r = -EIO;
1352 } else {
1353 if (error == -EBADE)
1354 r = error;
1355 }
1356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001358 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359}
1360
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001361static int multipath_end_io(struct dm_target *ti, struct request *clone,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 int error, union map_info *map_context)
1363{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001364 struct multipath *m = ti->private;
Mike Snitzer2eff1922016-02-03 09:13:14 -05001365 struct dm_mpath_io *mpio = get_mpio(map_context);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001366 struct pgpath *pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 struct path_selector *ps;
1368 int r;
1369
Jun'ichi Nomura466891f2012-03-28 18:41:25 +01001370 BUG_ON(!mpio);
1371
Mike Snitzer2eff1922016-02-03 09:13:14 -05001372 r = do_end_io(m, clone, error, mpio);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001373 pgpath = mpio->pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 if (pgpath) {
1375 ps = &pgpath->pg->ps;
1376 if (ps->type->end_io)
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +01001377 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 }
Mike Snitzer2eff1922016-02-03 09:13:14 -05001379 clear_request_fn_mpio(m, map_context);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381 return r;
1382}
1383
1384/*
1385 * Suspend can't complete until all the I/O is processed so if
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001386 * the last path fails we must error any remaining I/O.
1387 * Note that if the freeze_bdev fails while suspending, the
1388 * queue_if_no_path state is lost - userspace should reset it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 */
1390static void multipath_presuspend(struct dm_target *ti)
1391{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001392 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001394 queue_if_no_path(m, false, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395}
1396
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001397static void multipath_postsuspend(struct dm_target *ti)
1398{
Mike Anderson6380f262009-12-10 23:52:21 +00001399 struct multipath *m = ti->private;
1400
1401 mutex_lock(&m->work_mutex);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001402 flush_multipath_work(m);
Mike Anderson6380f262009-12-10 23:52:21 +00001403 mutex_unlock(&m->work_mutex);
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001404}
1405
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001406/*
1407 * Restore the queue_if_no_path setting.
1408 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409static void multipath_resume(struct dm_target *ti)
1410{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001411 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
Mike Snitzer518257b2016-03-17 16:32:10 -04001413 if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
1414 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1415 else
1416 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1417 smp_mb__after_atomic();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418}
1419
1420/*
1421 * Info output has the following format:
1422 * num_multipath_feature_args [multipath_feature_args]*
1423 * num_handler_status_args [handler_status_args]*
1424 * num_groups init_group_number
1425 * [A|D|E num_ps_status_args [ps_status_args]*
1426 * num_paths num_selector_args
1427 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1428 *
1429 * Table output has the following format (identical to the constructor string):
1430 * num_feature_args [features_args]*
1431 * num_handler_args hw_handler [hw_handler_args]*
1432 * num_groups init_group_number
1433 * [priority selector-name num_ps_args [ps_args]*
1434 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1435 */
Mikulas Patockafd7c0922013-03-01 22:45:44 +00001436static void multipath_status(struct dm_target *ti, status_type_t type,
1437 unsigned status_flags, char *result, unsigned maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438{
1439 int sz = 0;
1440 unsigned long flags;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001441 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 struct priority_group *pg;
1443 struct pgpath *p;
1444 unsigned pg_num;
1445 char state;
1446
1447 spin_lock_irqsave(&m->lock, flags);
1448
1449 /* Features */
1450 if (type == STATUSTYPE_INFO)
Mike Snitzer91e968a2016-03-17 17:10:15 -04001451 DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
1452 atomic_read(&m->pg_init_count));
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001453 else {
Mike Snitzer518257b2016-03-17 16:32:10 -04001454 DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001455 (m->pg_init_retries > 0) * 2 +
Mike Snitzera58a9352012-07-27 15:08:04 +01001456 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
Mike Snitzer518257b2016-03-17 16:32:10 -04001457 test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
1458 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001459 DMEMIT("queue_if_no_path ");
1460 if (m->pg_init_retries)
1461 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001462 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
1463 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
Mike Snitzer518257b2016-03-17 16:32:10 -04001464 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
Mike Snitzera58a9352012-07-27 15:08:04 +01001465 DMEMIT("retain_attached_hw_handler ");
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001466 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001468 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 DMEMIT("0 ");
1470 else
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001471 DMEMIT("1 %s ", m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
1473 DMEMIT("%u ", m->nr_priority_groups);
1474
1475 if (m->next_pg)
1476 pg_num = m->next_pg->pg_num;
1477 else if (m->current_pg)
1478 pg_num = m->current_pg->pg_num;
1479 else
Mike Snitzera490a072011-03-24 13:54:33 +00001480 pg_num = (m->nr_priority_groups ? 1 : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481
1482 DMEMIT("%u ", pg_num);
1483
1484 switch (type) {
1485 case STATUSTYPE_INFO:
1486 list_for_each_entry(pg, &m->priority_groups, list) {
1487 if (pg->bypassed)
1488 state = 'D'; /* Disabled */
1489 else if (pg == m->current_pg)
1490 state = 'A'; /* Currently Active */
1491 else
1492 state = 'E'; /* Enabled */
1493
1494 DMEMIT("%c ", state);
1495
1496 if (pg->ps.type->status)
1497 sz += pg->ps.type->status(&pg->ps, NULL, type,
1498 result + sz,
1499 maxlen - sz);
1500 else
1501 DMEMIT("0 ");
1502
1503 DMEMIT("%u %u ", pg->nr_pgpaths,
1504 pg->ps.type->info_args);
1505
1506 list_for_each_entry(p, &pg->pgpaths, list) {
1507 DMEMIT("%s %s %u ", p->path.dev->name,
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001508 p->is_active ? "A" : "F",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 p->fail_count);
1510 if (pg->ps.type->status)
1511 sz += pg->ps.type->status(&pg->ps,
1512 &p->path, type, result + sz,
1513 maxlen - sz);
1514 }
1515 }
1516 break;
1517
1518 case STATUSTYPE_TABLE:
1519 list_for_each_entry(pg, &m->priority_groups, list) {
1520 DMEMIT("%s ", pg->ps.type->name);
1521
1522 if (pg->ps.type->status)
1523 sz += pg->ps.type->status(&pg->ps, NULL, type,
1524 result + sz,
1525 maxlen - sz);
1526 else
1527 DMEMIT("0 ");
1528
1529 DMEMIT("%u %u ", pg->nr_pgpaths,
1530 pg->ps.type->table_args);
1531
1532 list_for_each_entry(p, &pg->pgpaths, list) {
1533 DMEMIT("%s ", p->path.dev->name);
1534 if (pg->ps.type->status)
1535 sz += pg->ps.type->status(&pg->ps,
1536 &p->path, type, result + sz,
1537 maxlen - sz);
1538 }
1539 }
1540 break;
1541 }
1542
1543 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544}
1545
1546static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1547{
Mike Anderson6380f262009-12-10 23:52:21 +00001548 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 struct dm_dev *dev;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001550 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 action_fn action;
1552
Mike Anderson6380f262009-12-10 23:52:21 +00001553 mutex_lock(&m->work_mutex);
1554
Kiyoshi Uedac2f3d242009-12-10 23:52:27 +00001555 if (dm_suspended(ti)) {
1556 r = -EBUSY;
1557 goto out;
1558 }
1559
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 if (argc == 1) {
Mike Snitzer498f0102011-08-02 12:32:04 +01001561 if (!strcasecmp(argv[0], "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001562 r = queue_if_no_path(m, true, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001563 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001564 } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001565 r = queue_if_no_path(m, false, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001566 goto out;
1567 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 }
1569
Mike Anderson6380f262009-12-10 23:52:21 +00001570 if (argc != 2) {
Jose Castilloa356e422014-01-29 17:52:45 +01001571 DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
Mike Anderson6380f262009-12-10 23:52:21 +00001572 goto out;
1573 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Mike Snitzer498f0102011-08-02 12:32:04 +01001575 if (!strcasecmp(argv[0], "disable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001576 r = bypass_pg_num(m, argv[1], true);
Mike Anderson6380f262009-12-10 23:52:21 +00001577 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001578 } else if (!strcasecmp(argv[0], "enable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001579 r = bypass_pg_num(m, argv[1], false);
Mike Anderson6380f262009-12-10 23:52:21 +00001580 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001581 } else if (!strcasecmp(argv[0], "switch_group")) {
Mike Anderson6380f262009-12-10 23:52:21 +00001582 r = switch_pg_num(m, argv[1]);
1583 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001584 } else if (!strcasecmp(argv[0], "reinstate_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 action = reinstate_path;
Mike Snitzer498f0102011-08-02 12:32:04 +01001586 else if (!strcasecmp(argv[0], "fail_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 action = fail_path;
Mike Anderson6380f262009-12-10 23:52:21 +00001588 else {
Jose Castilloa356e422014-01-29 17:52:45 +01001589 DMWARN("Unrecognised multipath message received: %s", argv[0]);
Mike Anderson6380f262009-12-10 23:52:21 +00001590 goto out;
1591 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +00001593 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001595 DMWARN("message: error getting device %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 argv[1]);
Mike Anderson6380f262009-12-10 23:52:21 +00001597 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 }
1599
1600 r = action_dev(m, dev, action);
1601
1602 dm_put_device(ti, dev);
1603
Mike Anderson6380f262009-12-10 23:52:21 +00001604out:
1605 mutex_unlock(&m->work_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607}
1608
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001609static int multipath_prepare_ioctl(struct dm_target *ti,
1610 struct block_device **bdev, fmode_t *mode)
Milan Broz9af4aa32006-10-03 01:15:20 -07001611{
Mikulas Patocka35991652012-06-03 00:29:58 +01001612 struct multipath *m = ti->private;
Mike Snitzer2da16102016-03-17 18:38:17 -04001613 struct pgpath *current_pgpath;
Mikulas Patocka35991652012-06-03 00:29:58 +01001614 int r;
1615
Mike Snitzer2da16102016-03-17 18:38:17 -04001616 current_pgpath = lockless_dereference(m->current_pgpath);
1617 if (!current_pgpath)
1618 current_pgpath = choose_pgpath(m, 0);
Milan Broz9af4aa32006-10-03 01:15:20 -07001619
Mike Snitzer2da16102016-03-17 18:38:17 -04001620 if (current_pgpath) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001621 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
Mike Snitzer2da16102016-03-17 18:38:17 -04001622 *bdev = current_pgpath->path.dev->bdev;
1623 *mode = current_pgpath->path.dev->mode;
Junichi Nomura43e43c92015-11-17 09:36:56 +00001624 r = 0;
1625 } else {
1626 /* pg_init has not started or completed */
1627 r = -ENOTCONN;
1628 }
1629 } else {
1630 /* No path is available */
Mike Snitzer518257b2016-03-17 16:32:10 -04001631 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Junichi Nomura43e43c92015-11-17 09:36:56 +00001632 r = -ENOTCONN;
1633 else
1634 r = -EIO;
Milan Broze90dae12006-10-03 01:15:22 -07001635 }
Milan Broz9af4aa32006-10-03 01:15:20 -07001636
Junichi Nomura5bbbfdf2015-11-17 09:39:26 +00001637 if (r == -ENOTCONN) {
Mike Snitzer2da16102016-03-17 18:38:17 -04001638 if (!lockless_dereference(m->current_pg)) {
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001639 /* Path status changed, redo selection */
Mike Snitzer2da16102016-03-17 18:38:17 -04001640 (void) choose_pgpath(m, 0);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001641 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001642 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Mike Snitzer2da16102016-03-17 18:38:17 -04001643 pg_init_all_paths(m);
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001644 dm_table_run_md_queue_async(m->ti->table);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001645 }
Mikulas Patocka35991652012-06-03 00:29:58 +01001646
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001647 /*
1648 * Only pass ioctls through if the device sizes match exactly.
1649 */
1650 if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
1651 return 1;
1652 return r;
Milan Broz9af4aa32006-10-03 01:15:20 -07001653}
1654
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001655static int multipath_iterate_devices(struct dm_target *ti,
1656 iterate_devices_callout_fn fn, void *data)
1657{
1658 struct multipath *m = ti->private;
1659 struct priority_group *pg;
1660 struct pgpath *p;
1661 int ret = 0;
1662
1663 list_for_each_entry(pg, &m->priority_groups, list) {
1664 list_for_each_entry(p, &pg->pgpaths, list) {
Mike Snitzer5dea2712009-07-23 20:30:42 +01001665 ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001666 if (ret)
1667 goto out;
1668 }
1669 }
1670
1671out:
1672 return ret;
1673}
1674
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001675static int pgpath_busy(struct pgpath *pgpath)
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001676{
1677 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1678
Mike Snitzer52b09912015-02-23 16:36:41 -05001679 return blk_lld_busy(q);
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001680}
1681
1682/*
1683 * We return "busy", only when we can map I/Os but underlying devices
1684 * are busy (so even if we map I/Os now, the I/Os will wait on
1685 * the underlying queue).
1686 * In other words, if we want to kill I/Os or queue them inside us
1687 * due to map unavailability, we don't return "busy". Otherwise,
1688 * dm core won't give us the I/Os and we can't do what we want.
1689 */
1690static int multipath_busy(struct dm_target *ti)
1691{
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001692 bool busy = false, has_active = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001693 struct multipath *m = ti->private;
Mike Snitzer2da16102016-03-17 18:38:17 -04001694 struct priority_group *pg, *next_pg;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001695 struct pgpath *pgpath;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001696
Jun'ichi Nomura7a7a3b42014-07-08 00:55:14 +00001697 /* pg_init in progress or no paths available */
Mike Snitzer91e968a2016-03-17 17:10:15 -04001698 if (atomic_read(&m->pg_init_in_progress) ||
Mike Snitzer2da16102016-03-17 18:38:17 -04001699 (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
1700 return true;
1701
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001702 /* Guess which priority_group will be used at next mapping time */
Mike Snitzer2da16102016-03-17 18:38:17 -04001703 pg = lockless_dereference(m->current_pg);
1704 next_pg = lockless_dereference(m->next_pg);
1705 if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
1706 pg = next_pg;
1707
1708 if (!pg) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001709 /*
1710 * We don't know which pg will be used at next mapping time.
Mike Snitzer2da16102016-03-17 18:38:17 -04001711 * We don't call choose_pgpath() here to avoid to trigger
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001712 * pg_init just by busy checking.
1713 * So we don't know whether underlying devices we will be using
1714 * at next mapping time are busy or not. Just try mapping.
1715 */
Mike Snitzer2da16102016-03-17 18:38:17 -04001716 return busy;
1717 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001718
1719 /*
1720 * If there is one non-busy active path at least, the path selector
1721 * will be able to select it. So we consider such a pg as not busy.
1722 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001723 busy = true;
Mike Snitzer2da16102016-03-17 18:38:17 -04001724 list_for_each_entry(pgpath, &pg->pgpaths, list) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001725 if (pgpath->is_active) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001726 has_active = true;
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001727 if (!pgpath_busy(pgpath)) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001728 busy = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001729 break;
1730 }
1731 }
Mike Snitzer2da16102016-03-17 18:38:17 -04001732 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001733
Mike Snitzer2da16102016-03-17 18:38:17 -04001734 if (!has_active) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001735 /*
1736 * No active path in this pg, so this pg won't be used and
1737 * the current_pg will be changed at next mapping time.
1738 * We need to try mapping to determine it.
1739 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001740 busy = false;
Mike Snitzer2da16102016-03-17 18:38:17 -04001741 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001742
1743 return busy;
1744}
1745
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746/*-----------------------------------------------------------------
1747 * Module setup
1748 *---------------------------------------------------------------*/
1749static struct target_type multipath_target = {
1750 .name = "multipath",
Mike Snitzer16f12262016-01-31 17:22:27 -05001751 .version = {1, 11, 0},
1752 .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 .module = THIS_MODULE,
1754 .ctr = multipath_ctr,
1755 .dtr = multipath_dtr,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001756 .map_rq = multipath_map,
Mike Snitzere5863d92014-12-17 21:08:12 -05001757 .clone_and_map_rq = multipath_clone_and_map,
1758 .release_clone_rq = multipath_release_clone,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001759 .rq_end_io = multipath_end_io,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 .presuspend = multipath_presuspend,
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001761 .postsuspend = multipath_postsuspend,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 .resume = multipath_resume,
1763 .status = multipath_status,
1764 .message = multipath_message,
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001765 .prepare_ioctl = multipath_prepare_ioctl,
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001766 .iterate_devices = multipath_iterate_devices,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001767 .busy = multipath_busy,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768};
1769
1770static int __init dm_multipath_init(void)
1771{
1772 int r;
1773
1774 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001775 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 if (!_mpio_cache)
1777 return -ENOMEM;
1778
1779 r = dm_register_target(&multipath_target);
1780 if (r < 0) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001781 DMERR("register failed %d", r);
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001782 r = -EINVAL;
1783 goto bad_register_target;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 }
1785
Tejun Heo4d4d66a2011-01-13 19:59:57 +00001786 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001787 if (!kmultipathd) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001788 DMERR("failed to create workqueue kmpathd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001789 r = -ENOMEM;
1790 goto bad_alloc_kmultipathd;
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001791 }
1792
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001793 /*
1794 * A separate workqueue is used to handle the device handlers
1795 * to avoid overloading existing workqueue. Overloading the
1796 * old workqueue would also create a bottleneck in the
1797 * path of the storage hardware device activation.
1798 */
Tejun Heo4d4d66a2011-01-13 19:59:57 +00001799 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
1800 WQ_MEM_RECLAIM);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001801 if (!kmpath_handlerd) {
1802 DMERR("failed to create workqueue kmpath_handlerd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001803 r = -ENOMEM;
1804 goto bad_alloc_kmpath_handlerd;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001805 }
1806
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001807 DMINFO("version %u.%u.%u loaded",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 multipath_target.version[0], multipath_target.version[1],
1809 multipath_target.version[2]);
1810
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001811 return 0;
1812
1813bad_alloc_kmpath_handlerd:
1814 destroy_workqueue(kmultipathd);
1815bad_alloc_kmultipathd:
1816 dm_unregister_target(&multipath_target);
1817bad_register_target:
1818 kmem_cache_destroy(_mpio_cache);
1819
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 return r;
1821}
1822
1823static void __exit dm_multipath_exit(void)
1824{
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001825 destroy_workqueue(kmpath_handlerd);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001826 destroy_workqueue(kmultipathd);
1827
Mikulas Patocka10d3bd02009-01-06 03:04:58 +00001828 dm_unregister_target(&multipath_target);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 kmem_cache_destroy(_mpio_cache);
1830}
1831
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832module_init(dm_multipath_init);
1833module_exit(dm_multipath_exit);
1834
1835MODULE_DESCRIPTION(DM_NAME " multipath target");
1836MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1837MODULE_LICENSE("GPL");