blob: c62240883ea1414fb85724651154d8bd9c55be3d [file] [log] [blame]
Thomas Gleixnereb1fe3b2019-05-24 12:03:47 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * Network block device - make block devices work over TCP
4 *
5 * Note that you can not swap over this thing, yet. Seems to work but
6 * deadlocks sometimes - you can not swap over TCP in general.
7 *
Pavel Macheka2531292010-07-18 14:27:13 +02008 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
10 *
Pavel Machekdbf492d2006-06-25 05:47:42 -070011 * (part of code stolen from loop.c)
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 */
13
14#include <linux/major.h>
15
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/sched.h>
Vlastimil Babkaf1083042017-05-08 15:59:53 -070020#include <linux/sched/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/fs.h>
22#include <linux/bio.h>
23#include <linux/stat.h>
24#include <linux/errno.h>
25#include <linux/file.h>
26#include <linux/ioctl.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020027#include <linux/mutex.h>
Herbert Xu4b2f0262006-01-06 00:09:47 -080028#include <linux/compiler.h>
Xiubo Li8454d682019-09-17 17:26:06 +053029#include <linux/completion.h>
Herbert Xu4b2f0262006-01-06 00:09:47 -080030#include <linux/err.h>
31#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090032#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/sock.h>
Trond Myklebust91cf45f2007-11-12 18:10:39 -080034#include <linux/net.h>
Laurent Vivier48cf6062008-04-29 01:02:46 -070035#include <linux/kthread.h>
Markus Pargmannb9c495b2015-04-02 10:11:37 +020036#include <linux/types.h>
Markus Pargmann30d53d92015-08-17 08:20:06 +020037#include <linux/debugfs.h>
Josef Bacikfd8383f2016-09-08 12:33:37 -070038#include <linux/blk-mq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080040#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <asm/types.h>
42
43#include <linux/nbd.h>
Josef Bacike46c7282017-04-06 17:02:00 -040044#include <linux/nbd-netlink.h>
45#include <net/genetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Matt Mullinsea106722019-04-26 11:49:48 -070047#define CREATE_TRACE_POINTS
48#include <trace/events/nbd.h>
49
Josef Bacikb0d91112017-02-01 16:11:40 -050050static DEFINE_IDR(nbd_index_idr);
51static DEFINE_MUTEX(nbd_index_mutex);
Hou Tao68c94172021-08-11 14:44:23 +020052static struct workqueue_struct *nbd_del_wq;
Josef Bacik47d902b2017-04-06 17:02:05 -040053static int nbd_total_devices = 0;
Josef Bacikb0d91112017-02-01 16:11:40 -050054
Josef Bacik9561a7a2016-11-22 14:04:40 -050055struct nbd_sock {
56 struct socket *sock;
57 struct mutex tx_lock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -040058 struct request *pending;
59 int sent;
Josef Bacikf3733242017-04-06 17:01:57 -040060 bool dead;
61 int fallback_index;
Josef Bacik799f9a32017-04-06 17:02:02 -040062 int cookie;
Josef Bacik9561a7a2016-11-22 14:04:40 -050063};
64
Josef Bacik5ea8d102017-04-06 17:01:58 -040065struct recv_thread_args {
66 struct work_struct work;
67 struct nbd_device *nbd;
68 int index;
69};
70
Josef Bacik799f9a32017-04-06 17:02:02 -040071struct link_dead_args {
72 struct work_struct work;
73 int index;
74};
75
Xiubo Liec76a7b2019-09-17 17:26:05 +053076#define NBD_RT_TIMEDOUT 0
77#define NBD_RT_DISCONNECT_REQUESTED 1
78#define NBD_RT_DISCONNECTED 2
79#define NBD_RT_HAS_PID_FILE 3
80#define NBD_RT_HAS_CONFIG_REF 4
81#define NBD_RT_BOUND 5
Josef Bacikc9a2f902021-02-22 15:09:53 -050082#define NBD_RT_DISCONNECT_ON_CLOSE 6
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +053083#define NBD_RT_HAS_BACKEND_FILE 7
Josef Bacik9b4a6ba2016-09-08 12:33:39 -070084
Xiubo Li8454d682019-09-17 17:26:06 +053085#define NBD_DESTROY_ON_DISCONNECT 0
86#define NBD_DISCONNECT_REQUESTED 1
87
Josef Bacik5ea8d102017-04-06 17:01:58 -040088struct nbd_config {
Markus Pargmann22d109c2015-08-17 08:20:09 +020089 u32 flags;
Josef Bacik9b4a6ba2016-09-08 12:33:39 -070090 unsigned long runtime_flags;
Josef Bacik560bc4b2017-04-06 17:02:04 -040091 u64 dead_conn_timeout;
Josef Bacik5ea8d102017-04-06 17:01:58 -040092
Josef Bacik9561a7a2016-11-22 14:04:40 -050093 struct nbd_sock **socks;
Josef Bacik9561a7a2016-11-22 14:04:40 -050094 int num_connections;
Josef Bacik560bc4b2017-04-06 17:02:04 -040095 atomic_t live_connections;
96 wait_queue_head_t conn_wait;
Josef Bacik5ea8d102017-04-06 17:01:58 -040097
Josef Bacik9561a7a2016-11-22 14:04:40 -050098 atomic_t recv_threads;
99 wait_queue_head_t recv_wq;
Nick Desaulniers41e76c62021-09-20 16:25:33 -0700100 unsigned int blksize_bits;
Markus Pargmannb9c495b2015-04-02 10:11:37 +0200101 loff_t bytesize;
Markus Pargmann30d53d92015-08-17 08:20:06 +0200102#if IS_ENABLED(CONFIG_DEBUG_FS)
103 struct dentry *dbg_dir;
104#endif
Markus Pargmann13e71d62015-04-02 10:11:35 +0200105};
106
Nick Desaulniers41e76c62021-09-20 16:25:33 -0700107static inline unsigned int nbd_blksize(struct nbd_config *config)
108{
109 return 1u << config->blksize_bits;
110}
111
Josef Bacik5ea8d102017-04-06 17:01:58 -0400112struct nbd_device {
113 struct blk_mq_tag_set tag_set;
114
Josef Bacike46c7282017-04-06 17:02:00 -0400115 int index;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400116 refcount_t config_refs;
Josef Bacikc6a47592017-04-06 17:02:06 -0400117 refcount_t refs;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400118 struct nbd_config *config;
119 struct mutex config_lock;
120 struct gendisk *disk;
Mike Christiee9e006f2019-08-04 14:10:06 -0500121 struct workqueue_struct *recv_workq;
Hou Tao68c94172021-08-11 14:44:23 +0200122 struct work_struct remove_work;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400123
Josef Bacikc6a47592017-04-06 17:02:06 -0400124 struct list_head list;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400125 struct task_struct *task_setup;
Xiubo Li8454d682019-09-17 17:26:06 +0530126
Xiubo Li8454d682019-09-17 17:26:06 +0530127 unsigned long flags;
Ye Bin0c980572021-10-20 15:39:59 +0800128 pid_t pid; /* pid of nbd-client, if attached */
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +0530129
130 char *backend;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400131};
132
Josef Bacikd7d94d42018-07-16 12:11:34 -0400133#define NBD_CMD_REQUEUED 1
Yu Kuai4e6eef52021-09-16 17:33:44 +0800134/*
135 * This flag will be set if nbd_queue_rq() succeed, and will be checked and
136 * cleared in completion. Both setting and clearing of the flag are protected
137 * by cmd->lock.
138 */
139#define NBD_CMD_INFLIGHT 2
Josef Bacikd7d94d42018-07-16 12:11:34 -0400140
Josef Bacikfd8383f2016-09-08 12:33:37 -0700141struct nbd_cmd {
142 struct nbd_device *nbd;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400143 struct mutex lock;
Josef Bacikf3733242017-04-06 17:01:57 -0400144 int index;
Josef Bacik799f9a32017-04-06 17:02:02 -0400145 int cookie;
Mike Christie2da22da2019-08-13 11:39:52 -0500146 int retries;
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200147 blk_status_t status;
Josef Bacikd7d94d42018-07-16 12:11:34 -0400148 unsigned long flags;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400149 u32 cmd_cookie;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700150};
151
Markus Pargmann30d53d92015-08-17 08:20:06 +0200152#if IS_ENABLED(CONFIG_DEBUG_FS)
153static struct dentry *nbd_dbg_dir;
154#endif
155
156#define nbd_name(nbd) ((nbd)->disk->disk_name)
157
Wanlong Gaof4507162012-03-28 14:42:51 -0700158#define NBD_MAGIC 0x68797548
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
Nick Desaulniers41e76c62021-09-20 16:25:33 -0700160#define NBD_DEF_BLKSIZE_BITS 10
Xiubo Li553768d2019-05-29 15:16:05 -0500161
Ingo van Lil9c7a4162006-07-01 04:36:36 -0700162static unsigned int nbds_max = 16;
Josef Bacik7a8362a2017-08-14 18:56:16 +0000163static int max_part = 16;
Josef Bacikb0d91112017-02-01 16:11:40 -0500164static int part_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165
Josef Bacik9442b732017-02-07 17:10:22 -0500166static int nbd_dev_dbg_init(struct nbd_device *nbd);
167static void nbd_dev_dbg_close(struct nbd_device *nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400168static void nbd_config_put(struct nbd_device *nbd);
Josef Bacike46c7282017-04-06 17:02:00 -0400169static void nbd_connect_reply(struct genl_info *info, int index);
Josef Bacik47d902b2017-04-06 17:02:05 -0400170static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
Josef Bacik799f9a32017-04-06 17:02:02 -0400171static void nbd_dead_link_work(struct work_struct *work);
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -0700172static void nbd_disconnect_and_put(struct nbd_device *nbd);
Josef Bacik9442b732017-02-07 17:10:22 -0500173
Markus Pargmannd18509f2015-04-02 10:11:38 +0200174static inline struct device *nbd_to_dev(struct nbd_device *nbd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175{
Markus Pargmannd18509f2015-04-02 10:11:38 +0200176 return disk_to_dev(nbd->disk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
Josef Bacikd7d94d42018-07-16 12:11:34 -0400179static void nbd_requeue_cmd(struct nbd_cmd *cmd)
180{
181 struct request *req = blk_mq_rq_from_pdu(cmd);
182
183 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
184 blk_mq_requeue_request(req, true);
185}
186
Josef Bacik8f3ea352018-07-16 12:11:35 -0400187#define NBD_COOKIE_BITS 32
188
189static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
190{
191 struct request *req = blk_mq_rq_from_pdu(cmd);
192 u32 tag = blk_mq_unique_tag(req);
193 u64 cookie = cmd->cmd_cookie;
194
195 return (cookie << NBD_COOKIE_BITS) | tag;
196}
197
198static u32 nbd_handle_to_tag(u64 handle)
199{
200 return (u32)handle;
201}
202
203static u32 nbd_handle_to_cookie(u64 handle)
204{
205 return (u32)(handle >> NBD_COOKIE_BITS);
206}
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208static const char *nbdcmd_to_ascii(int cmd)
209{
210 switch (cmd) {
211 case NBD_CMD_READ: return "read";
212 case NBD_CMD_WRITE: return "write";
213 case NBD_CMD_DISC: return "disconnect";
Alex Bligh75f187a2013-02-27 17:05:23 -0800214 case NBD_CMD_FLUSH: return "flush";
Paul Clementsa336d292012-10-04 17:16:18 -0700215 case NBD_CMD_TRIM: return "trim/discard";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 }
217 return "invalid";
218}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
Josef Bacik5ea8d102017-04-06 17:01:58 -0400220static ssize_t pid_show(struct device *dev,
221 struct device_attribute *attr, char *buf)
222{
223 struct gendisk *disk = dev_to_disk(dev);
224 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
225
Ye Bin0c980572021-10-20 15:39:59 +0800226 return sprintf(buf, "%d\n", nbd->pid);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400227}
228
Bhumika Goyaldfbde552017-08-21 17:13:08 +0530229static const struct device_attribute pid_attr = {
Joe Perches5657a812018-05-24 13:38:59 -0600230 .attr = { .name = "pid", .mode = 0444},
Josef Bacik5ea8d102017-04-06 17:01:58 -0400231 .show = pid_show,
232};
233
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +0530234static ssize_t backend_show(struct device *dev,
235 struct device_attribute *attr, char *buf)
236{
237 struct gendisk *disk = dev_to_disk(dev);
238 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
239
240 return sprintf(buf, "%s\n", nbd->backend ?: "");
241}
242
243static const struct device_attribute backend_attr = {
244 .attr = { .name = "backend", .mode = 0444},
245 .show = backend_show,
246};
247
Josef Bacikc6a47592017-04-06 17:02:06 -0400248static void nbd_dev_remove(struct nbd_device *nbd)
249{
250 struct gendisk *disk = nbd->disk;
Josef Bacik8364da42018-05-16 14:51:17 -0400251
Christoph Hellwig327b5012021-08-11 14:44:25 +0200252 del_gendisk(disk);
253 blk_cleanup_disk(disk);
254 blk_mq_free_tag_set(&nbd->tag_set);
Xiubo Li8454d682019-09-17 17:26:06 +0530255
256 /*
Christoph Hellwig3f74e062021-08-11 14:44:24 +0200257 * Remove from idr after del_gendisk() completes, so if the same ID is
258 * reused, the following add_disk() will succeed.
Xiubo Li8454d682019-09-17 17:26:06 +0530259 */
Christoph Hellwig3f74e062021-08-11 14:44:24 +0200260 mutex_lock(&nbd_index_mutex);
Hou Tao68c94172021-08-11 14:44:23 +0200261 idr_remove(&nbd_index_idr, nbd->index);
Hou Tao68c94172021-08-11 14:44:23 +0200262 mutex_unlock(&nbd_index_mutex);
Ye Bine2daec42021-11-02 09:52:37 +0800263 destroy_workqueue(nbd->recv_workq);
Josef Bacikc6a47592017-04-06 17:02:06 -0400264 kfree(nbd);
265}
266
Christoph Hellwig3f74e062021-08-11 14:44:24 +0200267static void nbd_dev_remove_work(struct work_struct *work)
Hou Tao68c94172021-08-11 14:44:23 +0200268{
Christoph Hellwig3f74e062021-08-11 14:44:24 +0200269 nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
Josef Bacikc6a47592017-04-06 17:02:06 -0400270}
271
272static void nbd_put(struct nbd_device *nbd)
273{
Christoph Hellwig3f74e062021-08-11 14:44:24 +0200274 if (!refcount_dec_and_test(&nbd->refs))
275 return;
276
277 /* Call del_gendisk() asynchrounously to prevent deadlock */
278 if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
279 queue_work(nbd_del_wq, &nbd->remove_work);
280 else
Josef Bacikc6a47592017-04-06 17:02:06 -0400281 nbd_dev_remove(nbd);
Josef Bacikc6a47592017-04-06 17:02:06 -0400282}
283
Josef Bacik799f9a32017-04-06 17:02:02 -0400284static int nbd_disconnected(struct nbd_config *config)
Josef Bacikf3733242017-04-06 17:01:57 -0400285{
Xiubo Liec76a7b2019-09-17 17:26:05 +0530286 return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
287 test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
Josef Bacik799f9a32017-04-06 17:02:02 -0400288}
289
290static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
291 int notify)
292{
293 if (!nsock->dead && notify && !nbd_disconnected(nbd->config)) {
294 struct link_dead_args *args;
295 args = kmalloc(sizeof(struct link_dead_args), GFP_NOIO);
296 if (args) {
297 INIT_WORK(&args->work, nbd_dead_link_work);
298 args->index = nbd->index;
299 queue_work(system_wq, &args->work);
300 }
301 }
Josef Bacik560bc4b2017-04-06 17:02:04 -0400302 if (!nsock->dead) {
Josef Bacikf3733242017-04-06 17:01:57 -0400303 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
Kevin Vigor5e3c3a72018-05-30 10:45:11 -0600304 if (atomic_dec_return(&nbd->config->live_connections) == 0) {
Xiubo Liec76a7b2019-09-17 17:26:05 +0530305 if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
Kevin Vigor5e3c3a72018-05-30 10:45:11 -0600306 &nbd->config->runtime_flags)) {
Xiubo Liec76a7b2019-09-17 17:26:05 +0530307 set_bit(NBD_RT_DISCONNECTED,
Kevin Vigor5e3c3a72018-05-30 10:45:11 -0600308 &nbd->config->runtime_flags);
309 dev_info(nbd_to_dev(nbd),
310 "Disconnected due to user request.\n");
311 }
312 }
Josef Bacik560bc4b2017-04-06 17:02:04 -0400313 }
Josef Bacikf3733242017-04-06 17:01:57 -0400314 nsock->dead = true;
315 nsock->pending = NULL;
316 nsock->sent = 0;
317}
318
Josef Bacik29eaadc2017-04-06 17:01:59 -0400319static void nbd_size_clear(struct nbd_device *nbd)
Markus Pargmann37091fd2015-07-27 07:36:49 +0200320{
Josef Bacik5ea8d102017-04-06 17:01:58 -0400321 if (nbd->config->bytesize) {
Josef Bacik5ea8d102017-04-06 17:01:58 -0400322 set_capacity(nbd->disk, 0);
323 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
324 }
Markus Pargmann37091fd2015-07-27 07:36:49 +0200325}
326
Christoph Hellwigdcbddf52020-11-16 15:57:00 +0100327static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
Christoph Hellwig2dc691c2020-11-16 15:56:59 +0100328 loff_t blksize)
Markus Pargmann37091fd2015-07-27 07:36:49 +0200329{
Christoph Hellwigdcbddf52020-11-16 15:57:00 +0100330 if (!blksize)
Nick Desaulniers41e76c62021-09-20 16:25:33 -0700331 blksize = 1u << NBD_DEF_BLKSIZE_BITS;
Christoph Hellwigdcbddf52020-11-16 15:57:00 +0100332 if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
333 return -EINVAL;
334
Christoph Hellwig2dc691c2020-11-16 15:56:59 +0100335 nbd->config->bytesize = bytesize;
Nick Desaulniers41e76c62021-09-20 16:25:33 -0700336 nbd->config->blksize_bits = __ffs(blksize);
Christoph Hellwig2dc691c2020-11-16 15:56:59 +0100337
Ye Bin0c980572021-10-20 15:39:59 +0800338 if (!nbd->pid)
Christoph Hellwigdcbddf52020-11-16 15:57:00 +0100339 return 0;
Josef Bacik9e2b19672018-05-16 14:51:19 -0400340
Christoph Hellwig2dc691c2020-11-16 15:56:59 +0100341 if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
342 nbd->disk->queue->limits.discard_granularity = blksize;
343 nbd->disk->queue->limits.discard_alignment = blksize;
Josef Bacik6df133a2018-05-23 13:35:59 -0400344 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
345 }
Christoph Hellwig2dc691c2020-11-16 15:56:59 +0100346 blk_queue_logical_block_size(nbd->disk->queue, blksize);
347 blk_queue_physical_block_size(nbd->disk->queue, blksize);
Christoph Hellwig92f93c32020-11-16 15:56:58 +0100348
Josh Triplett1aba1692020-12-17 00:58:47 -0800349 if (max_part)
350 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
Christoph Hellwig2ebcabf2020-11-16 15:57:01 +0100351 if (!set_capacity_and_notify(nbd->disk, bytesize >> 9))
352 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
Christoph Hellwigdcbddf52020-11-16 15:57:00 +0100353 return 0;
Markus Pargmann37091fd2015-07-27 07:36:49 +0200354}
355
Christoph Hellwig1e388ae2017-04-20 16:03:06 +0200356static void nbd_complete_rq(struct request *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357{
Christoph Hellwig1e388ae2017-04-20 16:03:06 +0200358 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
Kevin Vigoree57a052018-06-04 10:40:12 -0600360 dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", req,
Christoph Hellwig1e388ae2017-04-20 16:03:06 +0200361 cmd->status ? "failed" : "done");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362
Christoph Hellwig1e388ae2017-04-20 16:03:06 +0200363 blk_mq_end_request(req, cmd->status);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364}
365
Markus Pargmanne018e752015-04-02 10:11:39 +0200366/*
367 * Forcibly shutdown the socket causing all listeners to error
368 */
Markus Pargmann36e47be2015-08-17 08:20:01 +0200369static void sock_shutdown(struct nbd_device *nbd)
Paul Clements7fdfd402007-10-16 23:27:37 -0700370{
Josef Bacik5ea8d102017-04-06 17:01:58 -0400371 struct nbd_config *config = nbd->config;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500372 int i;
Josef Bacikc2611892016-09-08 12:33:38 -0700373
Josef Bacik5ea8d102017-04-06 17:01:58 -0400374 if (config->num_connections == 0)
Markus Pargmann260bbce2015-08-17 08:20:02 +0200375 return;
Xiubo Liec76a7b2019-09-17 17:26:05 +0530376 if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
Josef Bacik9561a7a2016-11-22 14:04:40 -0500377 return;
378
Josef Bacik5ea8d102017-04-06 17:01:58 -0400379 for (i = 0; i < config->num_connections; i++) {
380 struct nbd_sock *nsock = config->socks[i];
Josef Bacik9561a7a2016-11-22 14:04:40 -0500381 mutex_lock(&nsock->tx_lock);
Josef Bacik799f9a32017-04-06 17:02:02 -0400382 nbd_mark_nsock_dead(nbd, nsock, 0);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500383 mutex_unlock(&nsock->tx_lock);
Markus Pargmann23272a672015-10-29 11:51:16 +0100384 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500385 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
Paul Clements7fdfd402007-10-16 23:27:37 -0700386}
387
Mike Christie00514672019-08-13 11:39:50 -0500388static u32 req_to_nbd_cmd_type(struct request *req)
389{
390 switch (req_op(req)) {
391 case REQ_OP_DISCARD:
392 return NBD_CMD_TRIM;
393 case REQ_OP_FLUSH:
394 return NBD_CMD_FLUSH;
395 case REQ_OP_WRITE:
396 return NBD_CMD_WRITE;
397 case REQ_OP_READ:
398 return NBD_CMD_READ;
399 default:
400 return U32_MAX;
401 }
402}
403
Josef Bacik0eadf372016-09-08 12:33:40 -0700404static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
405 bool reserved)
Paul Clements7fdfd402007-10-16 23:27:37 -0700406{
Josef Bacik0eadf372016-09-08 12:33:40 -0700407 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
408 struct nbd_device *nbd = cmd->nbd;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400409 struct nbd_config *config;
Paul Clements7fdfd402007-10-16 23:27:37 -0700410
Josef Bacikde6346e2019-10-21 15:56:27 -0400411 if (!mutex_trylock(&cmd->lock))
412 return BLK_EH_RESET_TIMER;
413
Yu Kuai07175cb12021-09-16 17:33:45 +0800414 if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
415 mutex_unlock(&cmd->lock);
416 return BLK_EH_DONE;
417 }
418
Josef Bacik5ea8d102017-04-06 17:01:58 -0400419 if (!refcount_inc_not_zero(&nbd->config_refs)) {
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200420 cmd->status = BLK_STS_TIMEOUT;
Josef Bacikde6346e2019-10-21 15:56:27 -0400421 mutex_unlock(&cmd->lock);
Christoph Hellwige5eab012018-05-29 15:52:31 +0200422 goto done;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400423 }
Josef Bacik5ea8d102017-04-06 17:01:58 -0400424 config = nbd->config;
425
Hou Pud9709582020-02-28 01:40:29 -0500426 if (config->num_connections > 1 ||
427 (config->num_connections == 1 && nbd->tag_set.timeout)) {
Josef Bacikf3733242017-04-06 17:01:57 -0400428 dev_err_ratelimited(nbd_to_dev(nbd),
Kevin Vigor5e3c3a72018-05-30 10:45:11 -0600429 "Connection timed out, retrying (%d/%d alive)\n",
430 atomic_read(&config->live_connections),
431 config->num_connections);
Josef Bacikf3733242017-04-06 17:01:57 -0400432 /*
433 * Hooray we have more connections, requeue this IO, the submit
Hou Pud9709582020-02-28 01:40:29 -0500434 * path will put it on a real connection. Or if only one
435 * connection is configured, the submit path will wait util
436 * a new connection is reconfigured or util dead timeout.
Josef Bacikf3733242017-04-06 17:01:57 -0400437 */
Hou Pud9709582020-02-28 01:40:29 -0500438 if (config->socks) {
Josef Bacik5ea8d102017-04-06 17:01:58 -0400439 if (cmd->index < config->num_connections) {
Josef Bacikf3733242017-04-06 17:01:57 -0400440 struct nbd_sock *nsock =
Josef Bacik5ea8d102017-04-06 17:01:58 -0400441 config->socks[cmd->index];
Josef Bacikf3733242017-04-06 17:01:57 -0400442 mutex_lock(&nsock->tx_lock);
Josef Bacik799f9a32017-04-06 17:02:02 -0400443 /* We can have multiple outstanding requests, so
444 * we don't want to mark the nsock dead if we've
445 * already reconnected with a new socket, so
446 * only mark it dead if its the same socket we
447 * were sent out on.
448 */
449 if (cmd->cookie == nsock->cookie)
450 nbd_mark_nsock_dead(nbd, nsock, 1);
Josef Bacikf3733242017-04-06 17:01:57 -0400451 mutex_unlock(&nsock->tx_lock);
452 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400453 mutex_unlock(&cmd->lock);
Josef Bacikd7d94d42018-07-16 12:11:34 -0400454 nbd_requeue_cmd(cmd);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400455 nbd_config_put(nbd);
Christoph Hellwig66005932018-05-29 15:52:29 +0200456 return BLK_EH_DONE;
Josef Bacikf3733242017-04-06 17:01:57 -0400457 }
Josef Bacikf3733242017-04-06 17:01:57 -0400458 }
Mike Christie2da22da2019-08-13 11:39:52 -0500459
460 if (!nbd->tag_set.timeout) {
461 /*
462 * Userspace sets timeout=0 to disable socket disconnection,
463 * so just warn and reset the timer.
464 */
Hou Pu2c272542020-02-28 01:40:30 -0500465 struct nbd_sock *nsock = config->socks[cmd->index];
Mike Christie2da22da2019-08-13 11:39:52 -0500466 cmd->retries++;
467 dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
468 req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
469 (unsigned long long)blk_rq_pos(req) << 9,
470 blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
471
Hou Pu2c272542020-02-28 01:40:30 -0500472 mutex_lock(&nsock->tx_lock);
473 if (cmd->cookie != nsock->cookie) {
474 nbd_requeue_cmd(cmd);
475 mutex_unlock(&nsock->tx_lock);
476 mutex_unlock(&cmd->lock);
477 nbd_config_put(nbd);
478 return BLK_EH_DONE;
479 }
480 mutex_unlock(&nsock->tx_lock);
Mike Christie2da22da2019-08-13 11:39:52 -0500481 mutex_unlock(&cmd->lock);
482 nbd_config_put(nbd);
483 return BLK_EH_RESET_TIMER;
484 }
485
486 dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
Xiubo Liec76a7b2019-09-17 17:26:05 +0530487 set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200488 cmd->status = BLK_STS_IOERR;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400489 mutex_unlock(&cmd->lock);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500490 sock_shutdown(nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400491 nbd_config_put(nbd);
Christoph Hellwige5eab012018-05-29 15:52:31 +0200492done:
493 blk_mq_complete_request(req);
494 return BLK_EH_DONE;
Paul Clements7fdfd402007-10-16 23:27:37 -0700495}
496
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497/*
Yu Kuaif52c0e02021-09-16 17:33:48 +0800498 * Send or receive packet. Return a positive value on success and
499 * negtive value on failue, and never return 0.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 */
Al Viroc9f2b6a2015-11-12 05:09:35 -0500501static int sock_xmit(struct nbd_device *nbd, int index, int send,
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400502 struct iov_iter *iter, int msg_flags, int *sent)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503{
Josef Bacik5ea8d102017-04-06 17:01:58 -0400504 struct nbd_config *config = nbd->config;
505 struct socket *sock = config->socks[index]->sock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 int result;
507 struct msghdr msg;
Vlastimil Babkaf1083042017-05-08 15:59:53 -0700508 unsigned int noreclaim_flag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Mike Snitzerffc41cf2008-04-02 13:04:47 -0700510 if (unlikely(!sock)) {
Josef Bacika897b662016-12-05 16:20:29 -0500511 dev_err_ratelimited(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200512 "Attempted %s on closed socket in sock_xmit\n",
513 (send ? "send" : "recv"));
Mike Snitzerffc41cf2008-04-02 13:04:47 -0700514 return -EINVAL;
515 }
516
Al Viroc9f2b6a2015-11-12 05:09:35 -0500517 msg.msg_iter = *iter;
Al Viroc1696ca2015-11-12 04:51:19 -0500518
Vlastimil Babkaf1083042017-05-08 15:59:53 -0700519 noreclaim_flag = memalloc_noreclaim_save();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 do {
Mel Gorman7f338fe2012-07-31 16:44:32 -0700521 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 msg.msg_name = NULL;
523 msg.msg_namelen = 0;
524 msg.msg_control = NULL;
525 msg.msg_controllen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
527
Markus Pargmann7e2893a2015-08-17 08:20:00 +0200528 if (send)
Al Viroc1696ca2015-11-12 04:51:19 -0500529 result = sock_sendmsg(sock, &msg);
Markus Pargmann7e2893a2015-08-17 08:20:00 +0200530 else
Al Viroc1696ca2015-11-12 04:51:19 -0500531 result = sock_recvmsg(sock, &msg, msg.msg_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 if (result <= 0) {
534 if (result == 0)
535 result = -EPIPE; /* short read */
536 break;
537 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400538 if (sent)
539 *sent += result;
Al Viroc1696ca2015-11-12 04:51:19 -0500540 } while (msg_data_left(&msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
Vlastimil Babkaf1083042017-05-08 15:59:53 -0700542 memalloc_noreclaim_restore(noreclaim_flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543
544 return result;
545}
546
Josef Bacik32e67a32017-10-24 15:57:18 -0400547/*
548 * Different settings for sk->sk_sndtimeo can result in different return values
549 * if there is a signal pending when we enter sendmsg, because reasons?
550 */
551static inline int was_interrupted(int result)
552{
553 return result == -ERESTARTSYS || result == -EINTR;
554}
555
Paul Clements7fdfd402007-10-16 23:27:37 -0700556/* always call with the tx_lock held */
Josef Bacik9561a7a2016-11-22 14:04:40 -0500557static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700559 struct request *req = blk_mq_rq_from_pdu(cmd);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400560 struct nbd_config *config = nbd->config;
561 struct nbd_sock *nsock = config->socks[index];
Josef Bacikd61b7f92017-01-19 16:08:49 -0500562 int result;
Al Viroc9f2b6a2015-11-12 05:09:35 -0500563 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
564 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
565 struct iov_iter from;
Tejun Heo1011c1b2009-05-07 22:24:45 +0900566 unsigned long size = blk_rq_bytes(req);
Jens Axboe429a7872016-11-17 12:30:37 -0700567 struct bio *bio;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400568 u64 handle;
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200569 u32 type;
Shaun McDowell685c9b22017-05-25 23:55:54 -0400570 u32 nbd_cmd_flags = 0;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400571 int sent = nsock->sent, skip = 0;
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200572
David Howellsaa563d72018-10-20 00:57:56 +0100573 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
Al Viroc9f2b6a2015-11-12 05:09:35 -0500574
Mike Christie00514672019-08-13 11:39:50 -0500575 type = req_to_nbd_cmd_type(req);
576 if (type == U32_MAX)
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100577 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578
Christoph Hellwig09fc54cc2017-01-31 16:57:28 +0100579 if (rq_data_dir(req) == WRITE &&
Josef Bacik5ea8d102017-04-06 17:01:58 -0400580 (config->flags & NBD_FLAG_READ_ONLY)) {
Christoph Hellwig09fc54cc2017-01-31 16:57:28 +0100581 dev_err_ratelimited(disk_to_dev(nbd->disk),
582 "Write on read-only\n");
583 return -EIO;
584 }
585
Shaun McDowell685c9b22017-05-25 23:55:54 -0400586 if (req->cmd_flags & REQ_FUA)
587 nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
588
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400589 /* We did a partial send previously, and we at least sent the whole
590 * request struct, so just go and send the rest of the pages in the
591 * request.
592 */
593 if (sent) {
594 if (sent >= sizeof(request)) {
595 skip = sent - sizeof(request);
Andrew Hall2abd2de2019-04-26 11:49:49 -0700596
597 /* initialize handle for tracing purposes */
598 handle = nbd_cmd_handle(cmd);
599
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400600 goto send_pages;
601 }
602 iov_iter_advance(&from, sent);
Josef Bacik8f3ea352018-07-16 12:11:35 -0400603 } else {
604 cmd->cmd_cookie++;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400605 }
Josef Bacikf3733242017-04-06 17:01:57 -0400606 cmd->index = index;
Josef Bacik799f9a32017-04-06 17:02:02 -0400607 cmd->cookie = nsock->cookie;
Mike Christie2da22da2019-08-13 11:39:52 -0500608 cmd->retries = 0;
Shaun McDowell685c9b22017-05-25 23:55:54 -0400609 request.type = htonl(type | nbd_cmd_flags);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500610 if (type != NBD_CMD_FLUSH) {
Alex Bligh75f187a2013-02-27 17:05:23 -0800611 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
612 request.len = htonl(size);
613 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400614 handle = nbd_cmd_handle(cmd);
615 memcpy(request.handle, &handle, sizeof(handle));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
Matt Mullinsea106722019-04-26 11:49:48 -0700617 trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
618
Markus Pargmannd18509f2015-04-02 10:11:38 +0200619 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
Kevin Vigoree57a052018-06-04 10:40:12 -0600620 req, nbdcmd_to_ascii(type),
Markus Pargmannd18509f2015-04-02 10:11:38 +0200621 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
Al Viroc9f2b6a2015-11-12 05:09:35 -0500622 result = sock_xmit(nbd, index, 1, &from,
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400623 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
Andrew Hall2abd2de2019-04-26 11:49:49 -0700624 trace_nbd_header_sent(req, handle);
Yu Kuaif52c0e02021-09-16 17:33:48 +0800625 if (result < 0) {
Josef Bacik32e67a32017-10-24 15:57:18 -0400626 if (was_interrupted(result)) {
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400627 /* If we havne't sent anything we can just return BUSY,
628 * however if we have sent something we need to make
629 * sure we only allow this req to be sent until we are
630 * completely done.
631 */
632 if (sent) {
633 nsock->pending = req;
634 nsock->sent = sent;
635 }
Josef Bacikd7d94d42018-07-16 12:11:34 -0400636 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
Christoph Hellwigfc17b652017-06-03 09:38:05 +0200637 return BLK_STS_RESOURCE;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400638 }
Josef Bacika897b662016-12-05 16:20:29 -0500639 dev_err_ratelimited(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200640 "Send control failed (result %d)\n", result);
Josef Bacikf3733242017-04-06 17:01:57 -0400641 return -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400643send_pages:
Jens Axboe429a7872016-11-17 12:30:37 -0700644 if (type != NBD_CMD_WRITE)
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400645 goto out;
Jens Axboe429a7872016-11-17 12:30:37 -0700646
Jens Axboe429a7872016-11-17 12:30:37 -0700647 bio = req->bio;
648 while (bio) {
649 struct bio *next = bio->bi_next;
650 struct bvec_iter iter;
Kent Overstreet79886132013-11-23 17:19:00 -0800651 struct bio_vec bvec;
Jens Axboe429a7872016-11-17 12:30:37 -0700652
653 bio_for_each_segment(bvec, bio, iter) {
654 bool is_last = !next && bio_iter_last(bvec, iter);
Josef Bacikd61b7f92017-01-19 16:08:49 -0500655 int flags = is_last ? 0 : MSG_MORE;
Jens Axboe429a7872016-11-17 12:30:37 -0700656
Markus Pargmannd18509f2015-04-02 10:11:38 +0200657 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
Kevin Vigoree57a052018-06-04 10:40:12 -0600658 req, bvec.bv_len);
David Howellsaa563d72018-10-20 00:57:56 +0100659 iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400660 if (skip) {
661 if (skip >= iov_iter_count(&from)) {
662 skip -= iov_iter_count(&from);
663 continue;
664 }
665 iov_iter_advance(&from, skip);
666 skip = 0;
667 }
668 result = sock_xmit(nbd, index, 1, &from, flags, &sent);
Yu Kuaif52c0e02021-09-16 17:33:48 +0800669 if (result < 0) {
Josef Bacik32e67a32017-10-24 15:57:18 -0400670 if (was_interrupted(result)) {
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400671 /* We've already sent the header, we
672 * have no choice but to set pending and
673 * return BUSY.
674 */
675 nsock->pending = req;
676 nsock->sent = sent;
Josef Bacikd7d94d42018-07-16 12:11:34 -0400677 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
Christoph Hellwigfc17b652017-06-03 09:38:05 +0200678 return BLK_STS_RESOURCE;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400679 }
Wanlong Gaof4507162012-03-28 14:42:51 -0700680 dev_err(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200681 "Send data failed (result %d)\n",
682 result);
Josef Bacikf3733242017-04-06 17:01:57 -0400683 return -EAGAIN;
Jens Axboe6c92e692007-08-16 13:43:12 +0200684 }
Jens Axboe429a7872016-11-17 12:30:37 -0700685 /*
686 * The completion might already have come in,
687 * so break for the last one instead of letting
688 * the iterator do it. This prevents use-after-free
689 * of the bio.
690 */
691 if (is_last)
692 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 }
Jens Axboe429a7872016-11-17 12:30:37 -0700694 bio = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400696out:
Andrew Hall2abd2de2019-04-26 11:49:49 -0700697 trace_nbd_payload_sent(req, handle);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400698 nsock->pending = NULL;
699 nsock->sent = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701}
702
Yu Kuai3fe1db62021-09-16 17:33:49 +0800703static int nbd_read_reply(struct nbd_device *nbd, int index,
704 struct nbd_reply *reply)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
Yu Kuai3fe1db62021-09-16 17:33:49 +0800706 struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
707 struct iov_iter to;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 int result;
Yu Kuai3fe1db62021-09-16 17:33:49 +0800709
710 reply->magic = 0;
711 iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
712 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
713 if (result < 0) {
714 if (!nbd_disconnected(nbd->config))
715 dev_err(disk_to_dev(nbd->disk),
716 "Receive control failed (result %d)\n", result);
717 return result;
718 }
719
720 if (ntohl(reply->magic) != NBD_REPLY_MAGIC) {
721 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
722 (unsigned long)ntohl(reply->magic));
723 return -EPROTO;
724 }
725
726 return 0;
727}
728
729/* NULL returned = something went wrong, inform userspace */
730static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
731 struct nbd_reply *reply)
732{
733 int result;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700734 struct nbd_cmd *cmd;
735 struct request *req = NULL;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400736 u64 handle;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700737 u16 hwq;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500738 u32 tag;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400739 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Yu Kuai3fe1db62021-09-16 17:33:49 +0800741 memcpy(&handle, reply->handle, sizeof(handle));
Josef Bacik8f3ea352018-07-16 12:11:35 -0400742 tag = nbd_handle_to_tag(handle);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700743 hwq = blk_mq_unique_tag_to_hwq(tag);
744 if (hwq < nbd->tag_set.nr_hw_queues)
745 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
746 blk_mq_unique_tag_to_tag(tag));
747 if (!req || !blk_mq_request_started(req)) {
748 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
749 tag, req);
750 return ERR_PTR(-ENOENT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 }
Andrew Hall2abd2de2019-04-26 11:49:49 -0700752 trace_nbd_header_received(req, handle);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700753 cmd = blk_mq_rq_to_pdu(req);
Josef Bacik8f3ea352018-07-16 12:11:35 -0400754
755 mutex_lock(&cmd->lock);
Yu Kuai4e6eef52021-09-16 17:33:44 +0800756 if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
757 dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
758 tag, cmd->status, cmd->flags);
759 ret = -ENOENT;
760 goto out;
761 }
Yu Kuaifcf3d632021-09-16 17:33:46 +0800762 if (cmd->index != index) {
763 dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)",
764 tag, index, cmd->index);
Yu Kuai494dbee2021-11-01 17:25:38 +0800765 ret = -ENOENT;
766 goto out;
Yu Kuaifcf3d632021-09-16 17:33:46 +0800767 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400768 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
769 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
770 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
771 ret = -ENOENT;
772 goto out;
773 }
Josef Bacik7ce23e82019-10-21 15:56:28 -0400774 if (cmd->status != BLK_STS_OK) {
775 dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
776 req);
777 ret = -ENOENT;
778 goto out;
779 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400780 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
781 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
782 req);
783 ret = -ENOENT;
784 goto out;
785 }
Yu Kuai3fe1db62021-09-16 17:33:49 +0800786 if (ntohl(reply->error)) {
Wanlong Gaof4507162012-03-28 14:42:51 -0700787 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
Yu Kuai3fe1db62021-09-16 17:33:49 +0800788 ntohl(reply->error));
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200789 cmd->status = BLK_STS_IOERR;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400790 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 }
792
Kevin Vigoree57a052018-06-04 10:40:12 -0600793 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200794 if (rq_data_dir(req) != WRITE) {
NeilBrown5705f702007-09-25 12:35:59 +0200795 struct req_iterator iter;
Kent Overstreet79886132013-11-23 17:19:00 -0800796 struct bio_vec bvec;
Yu Kuai3fe1db62021-09-16 17:33:49 +0800797 struct iov_iter to;
NeilBrown5705f702007-09-25 12:35:59 +0200798
799 rq_for_each_segment(bvec, req, iter) {
David Howellsaa563d72018-10-20 00:57:56 +0100800 iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400801 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
Yu Kuaif52c0e02021-09-16 17:33:48 +0800802 if (result < 0) {
Wanlong Gaof4507162012-03-28 14:42:51 -0700803 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
WANG Cong7f1b90f2011-08-19 14:48:22 +0200804 result);
Josef Bacikf3733242017-04-06 17:01:57 -0400805 /*
Hou Pud9709582020-02-28 01:40:29 -0500806 * If we've disconnected, we need to make sure we
Josef Bacikf3733242017-04-06 17:01:57 -0400807 * complete this request, otherwise error out
808 * and let the timeout stuff handle resubmitting
809 * this request onto another connection.
810 */
Yu Kuai3fe1db62021-09-16 17:33:49 +0800811 if (nbd_disconnected(nbd->config)) {
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200812 cmd->status = BLK_STS_IOERR;
Josef Bacik8f3ea352018-07-16 12:11:35 -0400813 goto out;
Josef Bacikf3733242017-04-06 17:01:57 -0400814 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400815 ret = -EIO;
816 goto out;
Jens Axboe6c92e692007-08-16 13:43:12 +0200817 }
Markus Pargmannd18509f2015-04-02 10:11:38 +0200818 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
Kevin Vigoree57a052018-06-04 10:40:12 -0600819 req, bvec.bv_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 }
821 }
Josef Bacik8f3ea352018-07-16 12:11:35 -0400822out:
Andrew Hall2abd2de2019-04-26 11:49:49 -0700823 trace_nbd_payload_received(req, handle);
Josef Bacik8f3ea352018-07-16 12:11:35 -0400824 mutex_unlock(&cmd->lock);
825 return ret ? ERR_PTR(ret) : cmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826}
827
Josef Bacik9561a7a2016-11-22 14:04:40 -0500828static void recv_work(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829{
Josef Bacik9561a7a2016-11-22 14:04:40 -0500830 struct recv_thread_args *args = container_of(work,
831 struct recv_thread_args,
832 work);
833 struct nbd_device *nbd = args->nbd;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400834 struct nbd_config *config = nbd->config;
Yu Kuai8663b212021-09-16 22:18:10 +0800835 struct request_queue *q = nbd->disk->queue;
Yu Kuai3fe1db62021-09-16 17:33:49 +0800836 struct nbd_sock *nsock;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700837 struct nbd_cmd *cmd;
Christoph Hellwig15f73f52020-06-11 08:44:47 +0200838 struct request *rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
Markus Pargmann19391832015-08-17 08:20:03 +0200840 while (1) {
Yu Kuai3fe1db62021-09-16 17:33:49 +0800841 struct nbd_reply reply;
Josef Bacikf3733242017-04-06 17:01:57 -0400842
Yu Kuai3fe1db62021-09-16 17:33:49 +0800843 if (nbd_read_reply(nbd, args->index, &reply))
Markus Pargmann19391832015-08-17 08:20:03 +0200844 break;
Yu Kuai3fe1db62021-09-16 17:33:49 +0800845
Yu Kuai8663b212021-09-16 22:18:10 +0800846 /*
847 * Grab .q_usage_counter so request pool won't go away, then no
848 * request use-after-free is possible during nbd_handle_reply().
849 * If queue is frozen, there won't be any inflight requests, we
850 * needn't to handle the incoming garbage message.
851 */
852 if (!percpu_ref_tryget(&q->q_usage_counter)) {
853 dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n",
854 __func__);
Yu Kuai3fe1db62021-09-16 17:33:49 +0800855 break;
Yu Kuai8663b212021-09-16 22:18:10 +0800856 }
857
858 cmd = nbd_handle_reply(nbd, args->index, &reply);
859 if (IS_ERR(cmd)) {
860 percpu_ref_put(&q->q_usage_counter);
861 break;
862 }
Markus Pargmann19391832015-08-17 08:20:03 +0200863
Christoph Hellwig15f73f52020-06-11 08:44:47 +0200864 rq = blk_mq_rq_from_pdu(cmd);
865 if (likely(!blk_should_fake_timeout(rq->q)))
866 blk_mq_complete_request(rq);
Yu Kuai8663b212021-09-16 22:18:10 +0800867 percpu_ref_put(&q->q_usage_counter);
Markus Pargmann19391832015-08-17 08:20:03 +0200868 }
Yu Kuai3fe1db62021-09-16 17:33:49 +0800869
870 nsock = config->socks[args->index];
871 mutex_lock(&nsock->tx_lock);
872 nbd_mark_nsock_dead(nbd, nsock, 1);
873 mutex_unlock(&nsock->tx_lock);
874
Xiubo Li87aac3a2020-10-13 22:45:14 -0400875 nbd_config_put(nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400876 atomic_dec(&config->recv_threads);
877 wake_up(&config->recv_wq);
Josef Bacik5ea8d102017-04-06 17:01:58 -0400878 kfree(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879}
880
Jens Axboe7baa8572018-11-08 10:24:07 -0700881static bool nbd_clear_req(struct request *req, void *data, bool reserved)
Josef Bacikfd8383f2016-09-08 12:33:37 -0700882{
Christoph Hellwigd250bf4e2018-05-30 18:51:00 +0200883 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700884
Xie Yongjicddce012021-08-13 23:13:30 +0800885 /* don't abort one completed request */
886 if (blk_mq_request_completed(req))
887 return true;
888
Josef Bacikde6346e2019-10-21 15:56:27 -0400889 mutex_lock(&cmd->lock);
Yu Kuai07175cb12021-09-16 17:33:45 +0800890 if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
891 mutex_unlock(&cmd->lock);
892 return true;
893 }
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200894 cmd->status = BLK_STS_IOERR;
Josef Bacikde6346e2019-10-21 15:56:27 -0400895 mutex_unlock(&cmd->lock);
896
Christoph Hellwig08e00292017-04-20 16:03:09 +0200897 blk_mq_complete_request(req);
Jens Axboe7baa8572018-11-08 10:24:07 -0700898 return true;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700899}
900
Wanlong Gaof4507162012-03-28 14:42:51 -0700901static void nbd_clear_que(struct nbd_device *nbd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902{
Sagi Grimbergb52c2e92017-07-04 09:57:09 +0300903 blk_mq_quiesce_queue(nbd->disk->queue);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700904 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
Sagi Grimbergb52c2e92017-07-04 09:57:09 +0300905 blk_mq_unquiesce_queue(nbd->disk->queue);
Markus Pargmanne78273c2015-08-17 08:20:04 +0200906 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907}
908
Josef Bacikf3733242017-04-06 17:01:57 -0400909static int find_fallback(struct nbd_device *nbd, int index)
910{
Josef Bacik5ea8d102017-04-06 17:01:58 -0400911 struct nbd_config *config = nbd->config;
Josef Bacikf3733242017-04-06 17:01:57 -0400912 int new_index = -1;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400913 struct nbd_sock *nsock = config->socks[index];
Josef Bacikf3733242017-04-06 17:01:57 -0400914 int fallback = nsock->fallback_index;
915
Xiubo Liec76a7b2019-09-17 17:26:05 +0530916 if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
Josef Bacikf3733242017-04-06 17:01:57 -0400917 return new_index;
918
Josef Bacik5ea8d102017-04-06 17:01:58 -0400919 if (config->num_connections <= 1) {
Josef Bacikf3733242017-04-06 17:01:57 -0400920 dev_err_ratelimited(disk_to_dev(nbd->disk),
Hou Pud9709582020-02-28 01:40:29 -0500921 "Dead connection, failed to find a fallback\n");
Josef Bacikf3733242017-04-06 17:01:57 -0400922 return new_index;
923 }
924
Josef Bacik5ea8d102017-04-06 17:01:58 -0400925 if (fallback >= 0 && fallback < config->num_connections &&
926 !config->socks[fallback]->dead)
Josef Bacikf3733242017-04-06 17:01:57 -0400927 return fallback;
928
929 if (nsock->fallback_index < 0 ||
Josef Bacik5ea8d102017-04-06 17:01:58 -0400930 nsock->fallback_index >= config->num_connections ||
931 config->socks[nsock->fallback_index]->dead) {
Josef Bacikf3733242017-04-06 17:01:57 -0400932 int i;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400933 for (i = 0; i < config->num_connections; i++) {
Josef Bacikf3733242017-04-06 17:01:57 -0400934 if (i == index)
935 continue;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400936 if (!config->socks[i]->dead) {
Josef Bacikf3733242017-04-06 17:01:57 -0400937 new_index = i;
938 break;
939 }
940 }
941 nsock->fallback_index = new_index;
942 if (new_index < 0) {
943 dev_err_ratelimited(disk_to_dev(nbd->disk),
944 "Dead connection, failed to find a fallback\n");
945 return new_index;
946 }
947 }
948 new_index = nsock->fallback_index;
949 return new_index;
950}
Paul Clements7fdfd402007-10-16 23:27:37 -0700951
Josef Bacik560bc4b2017-04-06 17:02:04 -0400952static int wait_for_reconnect(struct nbd_device *nbd)
953{
954 struct nbd_config *config = nbd->config;
955 if (!config->dead_conn_timeout)
956 return 0;
Xiubo Liec76a7b2019-09-17 17:26:05 +0530957 if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
Josef Bacik560bc4b2017-04-06 17:02:04 -0400958 return 0;
Kevin Vigor5e3c3a72018-05-30 10:45:11 -0600959 return wait_event_timeout(config->conn_wait,
960 atomic_read(&config->live_connections) > 0,
961 config->dead_conn_timeout) > 0;
Josef Bacik560bc4b2017-04-06 17:02:04 -0400962}
963
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400964static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
Laurent Vivier48cf6062008-04-29 01:02:46 -0700965{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700966 struct request *req = blk_mq_rq_from_pdu(cmd);
967 struct nbd_device *nbd = cmd->nbd;
Josef Bacik5ea8d102017-04-06 17:01:58 -0400968 struct nbd_config *config;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500969 struct nbd_sock *nsock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400970 int ret;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700971
Josef Bacik5ea8d102017-04-06 17:01:58 -0400972 if (!refcount_inc_not_zero(&nbd->config_refs)) {
973 dev_err_ratelimited(disk_to_dev(nbd->disk),
974 "Socks array is empty\n");
975 return -EINVAL;
976 }
977 config = nbd->config;
978
979 if (index >= config->num_connections) {
Josef Bacika897b662016-12-05 16:20:29 -0500980 dev_err_ratelimited(disk_to_dev(nbd->disk),
981 "Attempted send on invalid socket\n");
Josef Bacik5ea8d102017-04-06 17:01:58 -0400982 nbd_config_put(nbd);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400983 return -EINVAL;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500984 }
Christoph Hellwig2a842ac2017-06-03 09:38:04 +0200985 cmd->status = BLK_STS_OK;
Josef Bacikf3733242017-04-06 17:01:57 -0400986again:
Josef Bacik5ea8d102017-04-06 17:01:58 -0400987 nsock = config->socks[index];
Josef Bacik9561a7a2016-11-22 14:04:40 -0500988 mutex_lock(&nsock->tx_lock);
Josef Bacikf3733242017-04-06 17:01:57 -0400989 if (nsock->dead) {
Josef Bacik560bc4b2017-04-06 17:02:04 -0400990 int old_index = index;
Josef Bacikf3733242017-04-06 17:01:57 -0400991 index = find_fallback(nbd, index);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500992 mutex_unlock(&nsock->tx_lock);
Josef Bacik560bc4b2017-04-06 17:02:04 -0400993 if (index < 0) {
994 if (wait_for_reconnect(nbd)) {
995 index = old_index;
996 goto again;
997 }
998 /* All the sockets should already be down at this point,
999 * we just want to make sure that DISCONNECTED is set so
1000 * any requests that come in that were queue'ed waiting
1001 * for the reconnect timer don't trigger the timer again
1002 * and instead just error out.
1003 */
1004 sock_shutdown(nbd);
1005 nbd_config_put(nbd);
1006 return -EIO;
1007 }
Josef Bacikf3733242017-04-06 17:01:57 -04001008 goto again;
Laurent Vivier48cf6062008-04-29 01:02:46 -07001009 }
1010
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001011 /* Handle the case that we have a pending request that was partially
1012 * transmitted that _has_ to be serviced first. We need to call requeue
1013 * here so that it gets put _after_ the request that is already on the
1014 * dispatch list.
1015 */
Josef Bacik6a468d52017-11-06 16:11:58 -05001016 blk_mq_start_request(req);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001017 if (unlikely(nsock->pending && nsock->pending != req)) {
Josef Bacikd7d94d42018-07-16 12:11:34 -04001018 nbd_requeue_cmd(cmd);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001019 ret = 0;
1020 goto out;
Laurent Vivier48cf6062008-04-29 01:02:46 -07001021 }
Josef Bacikf3733242017-04-06 17:01:57 -04001022 /*
1023 * Some failures are related to the link going down, so anything that
1024 * returns EAGAIN can be retried on a different socket.
1025 */
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001026 ret = nbd_send_cmd(nbd, cmd, index);
Yu Kuai4e6eef52021-09-16 17:33:44 +08001027 /*
1028 * Access to this flag is protected by cmd->lock, thus it's safe to set
1029 * the flag after nbd_send_cmd() succeed to send request to server.
1030 */
1031 if (!ret)
1032 __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
1033 else if (ret == -EAGAIN) {
Josef Bacikf3733242017-04-06 17:01:57 -04001034 dev_err_ratelimited(disk_to_dev(nbd->disk),
Josef Bacik6a468d52017-11-06 16:11:58 -05001035 "Request send failed, requeueing\n");
Josef Bacik799f9a32017-04-06 17:02:02 -04001036 nbd_mark_nsock_dead(nbd, nsock, 1);
Josef Bacikd7d94d42018-07-16 12:11:34 -04001037 nbd_requeue_cmd(cmd);
Josef Bacik6a468d52017-11-06 16:11:58 -05001038 ret = 0;
Josef Bacikf3733242017-04-06 17:01:57 -04001039 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001040out:
Josef Bacik9561a7a2016-11-22 14:04:40 -05001041 mutex_unlock(&nsock->tx_lock);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001042 nbd_config_put(nbd);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001043 return ret;
Laurent Vivier48cf6062008-04-29 01:02:46 -07001044}
1045
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001046static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
Josef Bacikfd8383f2016-09-08 12:33:37 -07001047 const struct blk_mq_queue_data *bd)
Laurent Vivier48cf6062008-04-29 01:02:46 -07001048{
Josef Bacikfd8383f2016-09-08 12:33:37 -07001049 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001050 int ret;
Laurent Vivier48cf6062008-04-29 01:02:46 -07001051
Josef Bacik9561a7a2016-11-22 14:04:40 -05001052 /*
1053 * Since we look at the bio's to send the request over the network we
1054 * need to make sure the completion work doesn't mark this request done
1055 * before we are done doing our send. This keeps us from dereferencing
1056 * freed data if we have particularly fast completions (ie we get the
1057 * completion before we exit sock_xmit on the last bvec) or in the case
1058 * that the server is misbehaving (or there was an error) before we're
1059 * done sending everything over the wire.
1060 */
Josef Bacik8f3ea352018-07-16 12:11:35 -04001061 mutex_lock(&cmd->lock);
Josef Bacikd7d94d42018-07-16 12:11:34 -04001062 clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001063
1064 /* We can be called directly from the user space process, which means we
1065 * could possibly have signals pending so our sendmsg will fail. In
1066 * this case we need to return that we are busy, otherwise error out as
1067 * appropriate.
1068 */
1069 ret = nbd_handle_cmd(cmd, hctx->queue_num);
Josef Bacik6e60a3b2017-10-02 16:22:08 -04001070 if (ret < 0)
1071 ret = BLK_STS_IOERR;
1072 else if (!ret)
1073 ret = BLK_STS_OK;
Josef Bacik8f3ea352018-07-16 12:11:35 -04001074 mutex_unlock(&cmd->lock);
Josef Bacik9561a7a2016-11-22 14:04:40 -05001075
Josef Bacik6e60a3b2017-10-02 16:22:08 -04001076 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077}
1078
Mike Christiecf1b2322019-10-17 16:27:34 -05001079static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
1080 int *err)
1081{
1082 struct socket *sock;
1083
1084 *err = 0;
1085 sock = sockfd_lookup(fd, err);
1086 if (!sock)
1087 return NULL;
1088
1089 if (sock->ops->shutdown == sock_no_shutdown) {
1090 dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
1091 *err = -EINVAL;
Sun Kedff10bb2019-11-19 14:09:11 +08001092 sockfd_put(sock);
Mike Christiecf1b2322019-10-17 16:27:34 -05001093 return NULL;
1094 }
1095
1096 return sock;
1097}
1098
Josef Bacike46c7282017-04-06 17:02:00 -04001099static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
1100 bool netlink)
Markus Pargmann23272a672015-10-29 11:51:16 +01001101{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001102 struct nbd_config *config = nbd->config;
Josef Bacik9442b732017-02-07 17:10:22 -05001103 struct socket *sock;
Josef Bacik9561a7a2016-11-22 14:04:40 -05001104 struct nbd_sock **socks;
1105 struct nbd_sock *nsock;
Josef Bacik9442b732017-02-07 17:10:22 -05001106 int err;
1107
Mike Christiecf1b2322019-10-17 16:27:34 -05001108 sock = nbd_get_socket(nbd, arg, &err);
Josef Bacik9442b732017-02-07 17:10:22 -05001109 if (!sock)
1110 return err;
Markus Pargmann23272a672015-10-29 11:51:16 +01001111
Josef Bacikb98e7622021-01-25 12:21:02 -05001112 /*
1113 * We need to make sure we don't get any errant requests while we're
1114 * reallocating the ->socks array.
1115 */
1116 blk_mq_freeze_queue(nbd->disk->queue);
1117
Josef Bacike46c7282017-04-06 17:02:00 -04001118 if (!netlink && !nbd->task_setup &&
Xiubo Liec76a7b2019-09-17 17:26:05 +05301119 !test_bit(NBD_RT_BOUND, &config->runtime_flags))
Josef Bacik9561a7a2016-11-22 14:04:40 -05001120 nbd->task_setup = current;
Josef Bacike46c7282017-04-06 17:02:00 -04001121
1122 if (!netlink &&
1123 (nbd->task_setup != current ||
Xiubo Liec76a7b2019-09-17 17:26:05 +05301124 test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
Josef Bacik9561a7a2016-11-22 14:04:40 -05001125 dev_err(disk_to_dev(nbd->disk),
1126 "Device being setup by another task");
Zheng Bin579dd912020-06-29 09:23:49 +08001127 err = -EBUSY;
1128 goto put_socket;
1129 }
1130
1131 nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
1132 if (!nsock) {
1133 err = -ENOMEM;
1134 goto put_socket;
Markus Pargmann23272a672015-10-29 11:51:16 +01001135 }
1136
Josef Bacik5ea8d102017-04-06 17:01:58 -04001137 socks = krealloc(config->socks, (config->num_connections + 1) *
Josef Bacik9561a7a2016-11-22 14:04:40 -05001138 sizeof(struct nbd_sock *), GFP_KERNEL);
Josef Bacik9b1355d2017-04-06 17:01:56 -04001139 if (!socks) {
Zheng Bin579dd912020-06-29 09:23:49 +08001140 kfree(nsock);
1141 err = -ENOMEM;
1142 goto put_socket;
Josef Bacik9b1355d2017-04-06 17:01:56 -04001143 }
Navid Emamdoost03bf73c2019-09-23 15:09:58 -05001144
1145 config->socks = socks;
1146
Josef Bacikf3733242017-04-06 17:01:57 -04001147 nsock->fallback_index = -1;
1148 nsock->dead = false;
Josef Bacik9561a7a2016-11-22 14:04:40 -05001149 mutex_init(&nsock->tx_lock);
1150 nsock->sock = sock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001151 nsock->pending = NULL;
1152 nsock->sent = 0;
Josef Bacik799f9a32017-04-06 17:02:02 -04001153 nsock->cookie = 0;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001154 socks[config->num_connections++] = nsock;
Josef Bacik560bc4b2017-04-06 17:02:04 -04001155 atomic_inc(&config->live_connections);
Josef Bacikb98e7622021-01-25 12:21:02 -05001156 blk_mq_unfreeze_queue(nbd->disk->queue);
Josef Bacik9561a7a2016-11-22 14:04:40 -05001157
1158 return 0;
Zheng Bin579dd912020-06-29 09:23:49 +08001159
1160put_socket:
Josef Bacikb98e7622021-01-25 12:21:02 -05001161 blk_mq_unfreeze_queue(nbd->disk->queue);
Zheng Bin579dd912020-06-29 09:23:49 +08001162 sockfd_put(sock);
1163 return err;
Markus Pargmann23272a672015-10-29 11:51:16 +01001164}
1165
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001166static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
1167{
1168 struct nbd_config *config = nbd->config;
1169 struct socket *sock, *old;
1170 struct recv_thread_args *args;
1171 int i;
1172 int err;
1173
Mike Christiecf1b2322019-10-17 16:27:34 -05001174 sock = nbd_get_socket(nbd, arg, &err);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001175 if (!sock)
1176 return err;
1177
1178 args = kzalloc(sizeof(*args), GFP_KERNEL);
1179 if (!args) {
1180 sockfd_put(sock);
1181 return -ENOMEM;
1182 }
1183
1184 for (i = 0; i < config->num_connections; i++) {
1185 struct nbd_sock *nsock = config->socks[i];
1186
1187 if (!nsock->dead)
1188 continue;
1189
1190 mutex_lock(&nsock->tx_lock);
1191 if (!nsock->dead) {
1192 mutex_unlock(&nsock->tx_lock);
1193 continue;
1194 }
1195 sk_set_memalloc(sock->sk);
Josef Bacika7ee8cf2017-07-21 10:48:15 -04001196 if (nbd->tag_set.timeout)
1197 sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001198 atomic_inc(&config->recv_threads);
1199 refcount_inc(&nbd->config_refs);
1200 old = nsock->sock;
1201 nsock->fallback_index = -1;
1202 nsock->sock = sock;
1203 nsock->dead = false;
1204 INIT_WORK(&args->work, recv_work);
1205 args->index = i;
1206 args->nbd = nbd;
Josef Bacik799f9a32017-04-06 17:02:02 -04001207 nsock->cookie++;
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001208 mutex_unlock(&nsock->tx_lock);
1209 sockfd_put(old);
1210
Xiubo Liec76a7b2019-09-17 17:26:05 +05301211 clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
Josef Bacik7a362ea2017-07-25 13:31:19 -04001212
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001213 /* We take the tx_mutex in an error path in the recv_work, so we
1214 * need to queue_work outside of the tx_mutex.
1215 */
Mike Christiee9e006f2019-08-04 14:10:06 -05001216 queue_work(nbd->recv_workq, &args->work);
Josef Bacik560bc4b2017-04-06 17:02:04 -04001217
1218 atomic_inc(&config->live_connections);
1219 wake_up(&config->conn_wait);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04001220 return 0;
1221 }
1222 sockfd_put(sock);
1223 kfree(args);
1224 return -ENOSPC;
1225}
1226
Markus Pargmann0e4f0f62015-10-29 12:04:51 +01001227static void nbd_bdev_reset(struct block_device *bdev)
1228{
Ratna Manoj Bollaabbbdf12017-03-24 14:08:29 -04001229 if (bdev->bd_openers > 1)
1230 return;
Christoph Hellwiga7824832020-11-26 18:43:37 +01001231 set_capacity(bdev->bd_disk, 0);
Markus Pargmann0e4f0f62015-10-29 12:04:51 +01001232}
1233
Josef Bacik29eaadc2017-04-06 17:01:59 -04001234static void nbd_parse_flags(struct nbd_device *nbd)
Markus Pargmannd02cf532015-10-29 12:06:15 +01001235{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001236 struct nbd_config *config = nbd->config;
1237 if (config->flags & NBD_FLAG_READ_ONLY)
Josef Bacik29eaadc2017-04-06 17:01:59 -04001238 set_disk_ro(nbd->disk, true);
1239 else
1240 set_disk_ro(nbd->disk, false);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001241 if (config->flags & NBD_FLAG_SEND_TRIM)
Bart Van Assche8b904b52018-03-07 17:10:10 -08001242 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
Shaun McDowell685c9b22017-05-25 23:55:54 -04001243 if (config->flags & NBD_FLAG_SEND_FLUSH) {
1244 if (config->flags & NBD_FLAG_SEND_FUA)
1245 blk_queue_write_cache(nbd->disk->queue, true, true);
1246 else
1247 blk_queue_write_cache(nbd->disk->queue, true, false);
1248 }
Markus Pargmannd02cf532015-10-29 12:06:15 +01001249 else
Jens Axboeaafb1ee2016-03-30 10:10:53 -06001250 blk_queue_write_cache(nbd->disk->queue, false, false);
Markus Pargmannd02cf532015-10-29 12:06:15 +01001251}
1252
Josef Bacik9561a7a2016-11-22 14:04:40 -05001253static void send_disconnects(struct nbd_device *nbd)
1254{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001255 struct nbd_config *config = nbd->config;
Al Viroc9f2b6a2015-11-12 05:09:35 -05001256 struct nbd_request request = {
1257 .magic = htonl(NBD_REQUEST_MAGIC),
1258 .type = htonl(NBD_CMD_DISC),
1259 };
1260 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
1261 struct iov_iter from;
Josef Bacik9561a7a2016-11-22 14:04:40 -05001262 int i, ret;
1263
Josef Bacik5ea8d102017-04-06 17:01:58 -04001264 for (i = 0; i < config->num_connections; i++) {
Josef Bacikb4b2aec2017-07-21 10:48:14 -04001265 struct nbd_sock *nsock = config->socks[i];
1266
David Howellsaa563d72018-10-20 00:57:56 +01001267 iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
Josef Bacikb4b2aec2017-07-21 10:48:14 -04001268 mutex_lock(&nsock->tx_lock);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -04001269 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
Yu Kuaif52c0e02021-09-16 17:33:48 +08001270 if (ret < 0)
Josef Bacik9561a7a2016-11-22 14:04:40 -05001271 dev_err(disk_to_dev(nbd->disk),
1272 "Send disconnect failed %d\n", ret);
Josef Bacikb4b2aec2017-07-21 10:48:14 -04001273 mutex_unlock(&nsock->tx_lock);
Josef Bacik9561a7a2016-11-22 14:04:40 -05001274 }
1275}
1276
Josef Bacik29eaadc2017-04-06 17:01:59 -04001277static int nbd_disconnect(struct nbd_device *nbd)
Josef Bacik9442b732017-02-07 17:10:22 -05001278{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001279 struct nbd_config *config = nbd->config;
Josef Bacik9442b732017-02-07 17:10:22 -05001280
Josef Bacik5ea8d102017-04-06 17:01:58 -04001281 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
Xiubo Liec76a7b2019-09-17 17:26:05 +05301282 set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
Xiubo Li8454d682019-09-17 17:26:06 +05301283 set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
Josef Bacik2e134562017-07-21 10:48:13 -04001284 send_disconnects(nbd);
Josef Bacik9442b732017-02-07 17:10:22 -05001285 return 0;
1286}
1287
Josef Bacik29eaadc2017-04-06 17:01:59 -04001288static void nbd_clear_sock(struct nbd_device *nbd)
Josef Bacik9442b732017-02-07 17:10:22 -05001289{
1290 sock_shutdown(nbd);
1291 nbd_clear_que(nbd);
Josef Bacik9442b732017-02-07 17:10:22 -05001292 nbd->task_setup = NULL;
Josef Bacik9442b732017-02-07 17:10:22 -05001293}
1294
Josef Bacik5ea8d102017-04-06 17:01:58 -04001295static void nbd_config_put(struct nbd_device *nbd)
1296{
1297 if (refcount_dec_and_mutex_lock(&nbd->config_refs,
1298 &nbd->config_lock)) {
Josef Bacik5ea8d102017-04-06 17:01:58 -04001299 struct nbd_config *config = nbd->config;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001300 nbd_dev_dbg_close(nbd);
Josef Bacik29eaadc2017-04-06 17:01:59 -04001301 nbd_size_clear(nbd);
Xiubo Liec76a7b2019-09-17 17:26:05 +05301302 if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
Josef Bacik5ea8d102017-04-06 17:01:58 -04001303 &config->runtime_flags))
1304 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
Ye Bin0c980572021-10-20 15:39:59 +08001305 nbd->pid = 0;
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05301306 if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
1307 &config->runtime_flags)) {
1308 device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
1309 kfree(nbd->backend);
1310 nbd->backend = NULL;
1311 }
Josef Bacik29eaadc2017-04-06 17:01:59 -04001312 nbd_clear_sock(nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001313 if (config->num_connections) {
1314 int i;
1315 for (i = 0; i < config->num_connections; i++) {
1316 sockfd_put(config->socks[i]->sock);
1317 kfree(config->socks[i]);
1318 }
1319 kfree(config->socks);
1320 }
Ilya Dryomovfa976532017-05-23 17:49:55 +02001321 kfree(nbd->config);
Ilya Dryomovaf622b82017-05-23 17:49:54 +02001322 nbd->config = NULL;
1323
1324 nbd->tag_set.timeout = 0;
Josef Bacik6df133a2018-05-23 13:35:59 -04001325 nbd->disk->queue->limits.discard_granularity = 0;
Josef Bacik07ce2132018-06-05 11:41:23 -04001326 nbd->disk->queue->limits.discard_alignment = 0;
Josef Bacik6df133a2018-05-23 13:35:59 -04001327 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
Bart Van Assche8b904b52018-03-07 17:10:10 -08001328 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
Josef Bacika2c97902017-04-06 17:02:07 -04001329
Josef Bacik5ea8d102017-04-06 17:01:58 -04001330 mutex_unlock(&nbd->config_lock);
Josef Bacikc6a47592017-04-06 17:02:06 -04001331 nbd_put(nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001332 module_put(THIS_MODULE);
1333 }
1334}
1335
Josef Bacike46c7282017-04-06 17:02:00 -04001336static int nbd_start_device(struct nbd_device *nbd)
Josef Bacik9442b732017-02-07 17:10:22 -05001337{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001338 struct nbd_config *config = nbd->config;
1339 int num_connections = config->num_connections;
Josef Bacik9442b732017-02-07 17:10:22 -05001340 int error = 0, i;
1341
Ye Bin0c980572021-10-20 15:39:59 +08001342 if (nbd->pid)
Josef Bacik9442b732017-02-07 17:10:22 -05001343 return -EBUSY;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001344 if (!config->socks)
Josef Bacik9442b732017-02-07 17:10:22 -05001345 return -EINVAL;
1346 if (num_connections > 1 &&
Josef Bacik5ea8d102017-04-06 17:01:58 -04001347 !(config->flags & NBD_FLAG_CAN_MULTI_CONN)) {
Josef Bacik9442b732017-02-07 17:10:22 -05001348 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
Josef Bacik5ea8d102017-04-06 17:01:58 -04001349 return -EINVAL;
Josef Bacik9442b732017-02-07 17:10:22 -05001350 }
1351
Josef Bacik5ea8d102017-04-06 17:01:58 -04001352 blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
Ye Bin0c980572021-10-20 15:39:59 +08001353 nbd->pid = task_pid_nr(current);
Josef Bacik9442b732017-02-07 17:10:22 -05001354
Josef Bacik29eaadc2017-04-06 17:01:59 -04001355 nbd_parse_flags(nbd);
Josef Bacik9442b732017-02-07 17:10:22 -05001356
1357 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
1358 if (error) {
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05301359 dev_err(disk_to_dev(nbd->disk), "device_create_file failed for pid!\n");
Josef Bacik5ea8d102017-04-06 17:01:58 -04001360 return error;
Josef Bacik9442b732017-02-07 17:10:22 -05001361 }
Xiubo Liec76a7b2019-09-17 17:26:05 +05301362 set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
Josef Bacik9442b732017-02-07 17:10:22 -05001363
1364 nbd_dev_dbg_init(nbd);
1365 for (i = 0; i < num_connections; i++) {
Josef Bacik5ea8d102017-04-06 17:01:58 -04001366 struct recv_thread_args *args;
1367
1368 args = kzalloc(sizeof(*args), GFP_KERNEL);
1369 if (!args) {
1370 sock_shutdown(nbd);
Sun Ke5c0dd222020-01-22 11:18:57 +08001371 /*
1372 * If num_connections is m (2 < m),
1373 * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful.
1374 * But NO.(n + 1) failed. We still have n recv threads.
1375 * So, add flush_workqueue here to prevent recv threads
1376 * dropping the last config_refs and trying to destroy
1377 * the workqueue from inside the workqueue.
1378 */
1379 if (i)
1380 flush_workqueue(nbd->recv_workq);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001381 return -ENOMEM;
1382 }
1383 sk_set_memalloc(config->socks[i]->sock->sk);
Josef Bacika7ee8cf2017-07-21 10:48:15 -04001384 if (nbd->tag_set.timeout)
1385 config->socks[i]->sock->sk->sk_sndtimeo =
1386 nbd->tag_set.timeout;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001387 atomic_inc(&config->recv_threads);
1388 refcount_inc(&nbd->config_refs);
1389 INIT_WORK(&args->work, recv_work);
1390 args->nbd = nbd;
1391 args->index = i;
Mike Christiee9e006f2019-08-04 14:10:06 -05001392 queue_work(nbd->recv_workq, &args->work);
Josef Bacik9442b732017-02-07 17:10:22 -05001393 }
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001394 return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
Josef Bacike46c7282017-04-06 17:02:00 -04001395}
1396
1397static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
1398{
1399 struct nbd_config *config = nbd->config;
1400 int ret;
1401
1402 ret = nbd_start_device(nbd);
1403 if (ret)
1404 return ret;
1405
Josef Bacike46c7282017-04-06 17:02:00 -04001406 if (max_part)
Christoph Hellwig38430f02020-09-21 09:19:45 +02001407 set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
Josef Bacike46c7282017-04-06 17:02:00 -04001408 mutex_unlock(&nbd->config_lock);
1409 ret = wait_event_interruptible(config->recv_wq,
Josef Bacik5ea8d102017-04-06 17:01:58 -04001410 atomic_read(&config->recv_threads) == 0);
Mike Christie1c058392019-12-08 16:51:50 -06001411 if (ret)
Josef Bacik5ea8d102017-04-06 17:01:58 -04001412 sock_shutdown(nbd);
Mike Christie1c058392019-12-08 16:51:50 -06001413 flush_workqueue(nbd->recv_workq);
1414
Josef Bacik9442b732017-02-07 17:10:22 -05001415 mutex_lock(&nbd->config_lock);
Josef Bacik76aa1d32018-05-16 14:51:22 -04001416 nbd_bdev_reset(bdev);
Josef Bacik9442b732017-02-07 17:10:22 -05001417 /* user requested, ignore socket errors */
Xiubo Liec76a7b2019-09-17 17:26:05 +05301418 if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
Josef Bacike46c7282017-04-06 17:02:00 -04001419 ret = 0;
Xiubo Liec76a7b2019-09-17 17:26:05 +05301420 if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
Josef Bacike46c7282017-04-06 17:02:00 -04001421 ret = -ETIMEDOUT;
1422 return ret;
Josef Bacik9442b732017-02-07 17:10:22 -05001423}
Markus Pargmann30d53d92015-08-17 08:20:06 +02001424
Josef Bacik29eaadc2017-04-06 17:01:59 -04001425static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
1426 struct block_device *bdev)
1427{
Josef Bacik2516ab12017-04-06 17:02:03 -04001428 sock_shutdown(nbd);
Munehisa Kamata2b5c8f02019-07-31 20:13:10 +08001429 __invalidate_device(bdev, true);
Josef Bacik29eaadc2017-04-06 17:01:59 -04001430 nbd_bdev_reset(bdev);
Xiubo Liec76a7b2019-09-17 17:26:05 +05301431 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
Josef Bacike46c7282017-04-06 17:02:00 -04001432 &nbd->config->runtime_flags))
1433 nbd_config_put(nbd);
Josef Bacik29eaadc2017-04-06 17:01:59 -04001434}
1435
Mike Christie55313e92019-08-13 11:39:49 -05001436static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
1437{
1438 nbd->tag_set.timeout = timeout * HZ;
Mike Christie2da22da2019-08-13 11:39:52 -05001439 if (timeout)
1440 blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
Hou Puacb19e12020-08-10 08:00:44 -04001441 else
1442 blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
Mike Christie55313e92019-08-13 11:39:49 -05001443}
1444
Josef Bacik9561a7a2016-11-22 14:04:40 -05001445/* Must be called with config_lock held */
Wanlong Gaof4507162012-03-28 14:42:51 -07001446static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
Pavel Machek1a2ad212009-04-02 16:58:41 -07001447 unsigned int cmd, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001449 struct nbd_config *config = nbd->config;
Baokun Lifad7cd32021-08-04 10:12:12 +08001450 loff_t bytesize;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001451
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 switch (cmd) {
Josef Bacik9442b732017-02-07 17:10:22 -05001453 case NBD_DISCONNECT:
Josef Bacik29eaadc2017-04-06 17:01:59 -04001454 return nbd_disconnect(nbd);
Markus Pargmann23272a672015-10-29 11:51:16 +01001455 case NBD_CLEAR_SOCK:
Josef Bacik29eaadc2017-04-06 17:01:59 -04001456 nbd_clear_sock_ioctl(nbd, bdev);
1457 return 0;
Josef Bacik9442b732017-02-07 17:10:22 -05001458 case NBD_SET_SOCK:
Josef Bacike46c7282017-04-06 17:02:00 -04001459 return nbd_add_socket(nbd, arg, false);
Josef Bacik9442b732017-02-07 17:10:22 -05001460 case NBD_SET_BLKSIZE:
Christoph Hellwigdcbddf52020-11-16 15:57:00 +01001461 return nbd_set_size(nbd, config->bytesize, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 case NBD_SET_SIZE:
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001463 return nbd_set_size(nbd, arg, nbd_blksize(config));
Markus Pargmann37091fd2015-07-27 07:36:49 +02001464 case NBD_SET_SIZE_BLOCKS:
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001465 if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
Baokun Lifad7cd32021-08-04 10:12:12 +08001466 return -EINVAL;
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001467 return nbd_set_size(nbd, bytesize, nbd_blksize(config));
Paul Clements7fdfd402007-10-16 23:27:37 -07001468 case NBD_SET_TIMEOUT:
Mike Christie2da22da2019-08-13 11:39:52 -05001469 nbd_set_cmd_timeout(nbd, arg);
Paul Clements7fdfd402007-10-16 23:27:37 -07001470 return 0;
Pavel Machek1a2ad212009-04-02 16:58:41 -07001471
Paul Clements2f012502012-10-04 17:16:15 -07001472 case NBD_SET_FLAGS:
Josef Bacik5ea8d102017-04-06 17:01:58 -04001473 config->flags = arg;
Paul Clements2f012502012-10-04 17:16:15 -07001474 return 0;
Josef Bacik9442b732017-02-07 17:10:22 -05001475 case NBD_DO_IT:
Josef Bacike46c7282017-04-06 17:02:00 -04001476 return nbd_start_device_ioctl(nbd, bdev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 case NBD_CLEAR_QUE:
Herbert Xu4b2f0262006-01-06 00:09:47 -08001478 /*
1479 * This is for compatibility only. The queue is always cleared
1480 * by NBD_DO_IT or NBD_CLEAR_SOCK.
1481 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return 0;
1483 case NBD_PRINT_DEBUG:
Josef Bacikfd8383f2016-09-08 12:33:37 -07001484 /*
1485 * For compatibility only, we no longer keep a list of
1486 * outstanding requests.
1487 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 return 0;
1489 }
Pavel Machek1a2ad212009-04-02 16:58:41 -07001490 return -ENOTTY;
1491}
1492
1493static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
1494 unsigned int cmd, unsigned long arg)
1495{
Wanlong Gaof4507162012-03-28 14:42:51 -07001496 struct nbd_device *nbd = bdev->bd_disk->private_data;
Josef Bacike46c7282017-04-06 17:02:00 -04001497 struct nbd_config *config = nbd->config;
1498 int error = -EINVAL;
Pavel Machek1a2ad212009-04-02 16:58:41 -07001499
1500 if (!capable(CAP_SYS_ADMIN))
1501 return -EPERM;
1502
Josef Bacik1dae69b2017-05-05 22:25:18 -04001503 /* The block layer will pass back some non-nbd ioctls in case we have
1504 * special handling for them, but we don't so just return an error.
1505 */
1506 if (_IOC_TYPE(cmd) != 0xab)
1507 return -EINVAL;
1508
Josef Bacik9561a7a2016-11-22 14:04:40 -05001509 mutex_lock(&nbd->config_lock);
Josef Bacike46c7282017-04-06 17:02:00 -04001510
1511 /* Don't allow ioctl operations on a nbd device that was created with
1512 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
1513 */
Xiubo Liec76a7b2019-09-17 17:26:05 +05301514 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
Josef Bacike46c7282017-04-06 17:02:00 -04001515 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
1516 error = __nbd_ioctl(bdev, nbd, cmd, arg);
1517 else
1518 dev_err(nbd_to_dev(nbd), "Cannot use ioctl interface on a netlink controlled device.\n");
Josef Bacik9561a7a2016-11-22 14:04:40 -05001519 mutex_unlock(&nbd->config_lock);
Pavel Machek1a2ad212009-04-02 16:58:41 -07001520 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521}
1522
Josef Bacik5ea8d102017-04-06 17:01:58 -04001523static struct nbd_config *nbd_alloc_config(void)
1524{
1525 struct nbd_config *config;
1526
1527 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
1528 if (!config)
1529 return NULL;
1530 atomic_set(&config->recv_threads, 0);
1531 init_waitqueue_head(&config->recv_wq);
Josef Bacik560bc4b2017-04-06 17:02:04 -04001532 init_waitqueue_head(&config->conn_wait);
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001533 config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
Josef Bacik560bc4b2017-04-06 17:02:04 -04001534 atomic_set(&config->live_connections, 0);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001535 try_module_get(THIS_MODULE);
1536 return config;
1537}
1538
1539static int nbd_open(struct block_device *bdev, fmode_t mode)
1540{
1541 struct nbd_device *nbd;
1542 int ret = 0;
1543
1544 mutex_lock(&nbd_index_mutex);
1545 nbd = bdev->bd_disk->private_data;
1546 if (!nbd) {
1547 ret = -ENXIO;
1548 goto out;
1549 }
Josef Bacikc6a47592017-04-06 17:02:06 -04001550 if (!refcount_inc_not_zero(&nbd->refs)) {
1551 ret = -ENXIO;
1552 goto out;
1553 }
Josef Bacik5ea8d102017-04-06 17:01:58 -04001554 if (!refcount_inc_not_zero(&nbd->config_refs)) {
1555 struct nbd_config *config;
1556
1557 mutex_lock(&nbd->config_lock);
1558 if (refcount_inc_not_zero(&nbd->config_refs)) {
1559 mutex_unlock(&nbd->config_lock);
1560 goto out;
1561 }
1562 config = nbd->config = nbd_alloc_config();
1563 if (!config) {
1564 ret = -ENOMEM;
1565 mutex_unlock(&nbd->config_lock);
1566 goto out;
1567 }
1568 refcount_set(&nbd->config_refs, 1);
Josef Bacikc6a47592017-04-06 17:02:06 -04001569 refcount_inc(&nbd->refs);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001570 mutex_unlock(&nbd->config_lock);
Josh Triplett1aba1692020-12-17 00:58:47 -08001571 if (max_part)
1572 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
Josef Bacikfe1f9e62018-05-16 14:51:21 -04001573 } else if (nbd_disconnected(nbd->config)) {
Josh Triplett1aba1692020-12-17 00:58:47 -08001574 if (max_part)
1575 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001576 }
1577out:
1578 mutex_unlock(&nbd_index_mutex);
1579 return ret;
1580}
1581
1582static void nbd_release(struct gendisk *disk, fmode_t mode)
1583{
1584 struct nbd_device *nbd = disk->private_data;
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07001585
Xiubo Liec76a7b2019-09-17 17:26:05 +05301586 if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
Christoph Hellwig977115c2020-11-26 10:41:07 +01001587 disk->part0->bd_openers == 0)
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07001588 nbd_disconnect_and_put(nbd);
1589
Josef Bacik5ea8d102017-04-06 17:01:58 -04001590 nbd_config_put(nbd);
Josef Bacikc6a47592017-04-06 17:02:06 -04001591 nbd_put(nbd);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001592}
1593
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07001594static const struct block_device_operations nbd_fops =
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595{
1596 .owner = THIS_MODULE,
Josef Bacik5ea8d102017-04-06 17:01:58 -04001597 .open = nbd_open,
1598 .release = nbd_release,
Arnd Bergmann8a6cfeb2010-07-08 10:18:46 +02001599 .ioctl = nbd_ioctl,
Al Viro263a3df2016-01-07 10:04:37 -05001600 .compat_ioctl = nbd_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601};
1602
Markus Pargmann30d53d92015-08-17 08:20:06 +02001603#if IS_ENABLED(CONFIG_DEBUG_FS)
1604
1605static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
1606{
1607 struct nbd_device *nbd = s->private;
1608
Ye Bin0c980572021-10-20 15:39:59 +08001609 if (nbd->pid)
1610 seq_printf(s, "recv: %d\n", nbd->pid);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001611
1612 return 0;
1613}
1614
Liao Pingfanga2d52a62021-02-06 15:10:55 +08001615DEFINE_SHOW_ATTRIBUTE(nbd_dbg_tasks);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001616
1617static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1618{
1619 struct nbd_device *nbd = s->private;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001620 u32 flags = nbd->config->flags;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001621
1622 seq_printf(s, "Hex: 0x%08x\n\n", flags);
1623
1624 seq_puts(s, "Known flags:\n");
1625
1626 if (flags & NBD_FLAG_HAS_FLAGS)
1627 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
1628 if (flags & NBD_FLAG_READ_ONLY)
1629 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1630 if (flags & NBD_FLAG_SEND_FLUSH)
1631 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
Shaun McDowell685c9b22017-05-25 23:55:54 -04001632 if (flags & NBD_FLAG_SEND_FUA)
1633 seq_puts(s, "NBD_FLAG_SEND_FUA\n");
Markus Pargmann30d53d92015-08-17 08:20:06 +02001634 if (flags & NBD_FLAG_SEND_TRIM)
1635 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1636
1637 return 0;
1638}
1639
Liao Pingfanga2d52a62021-02-06 15:10:55 +08001640DEFINE_SHOW_ATTRIBUTE(nbd_dbg_flags);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001641
1642static int nbd_dev_dbg_init(struct nbd_device *nbd)
1643{
1644 struct dentry *dir;
Josef Bacik5ea8d102017-04-06 17:01:58 -04001645 struct nbd_config *config = nbd->config;
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001646
1647 if (!nbd_dbg_dir)
1648 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001649
1650 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001651 if (!dir) {
1652 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
1653 nbd_name(nbd));
1654 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001655 }
Josef Bacik5ea8d102017-04-06 17:01:58 -04001656 config->dbg_dir = dir;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001657
Liao Pingfanga2d52a62021-02-06 15:10:55 +08001658 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001659 debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
Josef Bacik0eadf372016-09-08 12:33:40 -07001660 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001661 debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
Liao Pingfanga2d52a62021-02-06 15:10:55 +08001662 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001663
1664 return 0;
1665}
1666
1667static void nbd_dev_dbg_close(struct nbd_device *nbd)
1668{
Josef Bacik5ea8d102017-04-06 17:01:58 -04001669 debugfs_remove_recursive(nbd->config->dbg_dir);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001670}
1671
1672static int nbd_dbg_init(void)
1673{
1674 struct dentry *dbg_dir;
1675
1676 dbg_dir = debugfs_create_dir("nbd", NULL);
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001677 if (!dbg_dir)
1678 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001679
1680 nbd_dbg_dir = dbg_dir;
1681
1682 return 0;
1683}
1684
1685static void nbd_dbg_close(void)
1686{
1687 debugfs_remove_recursive(nbd_dbg_dir);
1688}
1689
1690#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
1691
1692static int nbd_dev_dbg_init(struct nbd_device *nbd)
1693{
1694 return 0;
1695}
1696
1697static void nbd_dev_dbg_close(struct nbd_device *nbd)
1698{
1699}
1700
1701static int nbd_dbg_init(void)
1702{
1703 return 0;
1704}
1705
1706static void nbd_dbg_close(void)
1707{
1708}
1709
1710#endif
1711
Christoph Hellwigd6296d392017-05-01 10:19:08 -06001712static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
1713 unsigned int hctx_idx, unsigned int numa_node)
Josef Bacikfd8383f2016-09-08 12:33:37 -07001714{
1715 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
Christoph Hellwigd6296d392017-05-01 10:19:08 -06001716 cmd->nbd = set->driver_data;
Josef Bacikd7d94d42018-07-16 12:11:34 -04001717 cmd->flags = 0;
Josef Bacik8f3ea352018-07-16 12:11:35 -04001718 mutex_init(&cmd->lock);
Josef Bacikfd8383f2016-09-08 12:33:37 -07001719 return 0;
1720}
1721
Eric Biggersf363b082017-03-30 13:39:16 -07001722static const struct blk_mq_ops nbd_mq_ops = {
Josef Bacikfd8383f2016-09-08 12:33:37 -07001723 .queue_rq = nbd_queue_rq,
Christoph Hellwig1e388ae2017-04-20 16:03:06 +02001724 .complete = nbd_complete_rq,
Josef Bacikfd8383f2016-09-08 12:33:37 -07001725 .init_request = nbd_init_request,
Josef Bacik0eadf372016-09-08 12:33:40 -07001726 .timeout = nbd_xmit_timeout,
Josef Bacikfd8383f2016-09-08 12:33:37 -07001727};
1728
Christoph Hellwig6e4df4c2021-08-11 14:44:28 +02001729static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
Josef Bacikb0d91112017-02-01 16:11:40 -05001730{
1731 struct nbd_device *nbd;
1732 struct gendisk *disk;
Josef Bacikb0d91112017-02-01 16:11:40 -05001733 int err = -ENOMEM;
1734
1735 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1736 if (!nbd)
1737 goto out;
1738
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001739 nbd->tag_set.ops = &nbd_mq_ops;
1740 nbd->tag_set.nr_hw_queues = 1;
1741 nbd->tag_set.queue_depth = 128;
1742 nbd->tag_set.numa_node = NUMA_NO_NODE;
1743 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1744 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1745 BLK_MQ_F_BLOCKING;
1746 nbd->tag_set.driver_data = nbd;
Hou Tao68c94172021-08-11 14:44:23 +02001747 INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05301748 nbd->backend = NULL;
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001749
1750 err = blk_mq_alloc_tag_set(&nbd->tag_set);
1751 if (err)
Josef Bacikb0d91112017-02-01 16:11:40 -05001752 goto out_free_nbd;
1753
Christoph Hellwig6e4df4c2021-08-11 14:44:28 +02001754 mutex_lock(&nbd_index_mutex);
Josef Bacikb0d91112017-02-01 16:11:40 -05001755 if (index >= 0) {
1756 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1757 GFP_KERNEL);
1758 if (err == -ENOSPC)
1759 err = -EEXIST;
1760 } else {
1761 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1762 if (err >= 0)
1763 index = err;
1764 }
Tetsuo Handab1903002021-08-25 18:31:06 +02001765 nbd->index = index;
Christoph Hellwig6e4df4c2021-08-11 14:44:28 +02001766 mutex_unlock(&nbd_index_mutex);
Josef Bacikb0d91112017-02-01 16:11:40 -05001767 if (err < 0)
Josef Bacikb0d91112017-02-01 16:11:40 -05001768 goto out_free_tags;
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001769
1770 disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
1771 if (IS_ERR(disk)) {
1772 err = PTR_ERR(disk);
1773 goto out_free_idr;
Josef Bacikb0d91112017-02-01 16:11:40 -05001774 }
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001775 nbd->disk = disk;
Josef Bacikb0d91112017-02-01 16:11:40 -05001776
Ye Bine2daec42021-11-02 09:52:37 +08001777 nbd->recv_workq = alloc_workqueue("nbd%d-recv",
1778 WQ_MEM_RECLAIM | WQ_HIGHPRI |
1779 WQ_UNBOUND, 0, nbd->index);
1780 if (!nbd->recv_workq) {
1781 dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv work queue.\n");
1782 err = -ENOMEM;
1783 goto out_err_disk;
1784 }
1785
Josef Bacikb0d91112017-02-01 16:11:40 -05001786 /*
1787 * Tell the block layer that we are not a rotational device
1788 */
Bart Van Assche8b904b52018-03-07 17:10:10 -08001789 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
1790 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
Josef Bacik6df133a2018-05-23 13:35:59 -04001791 disk->queue->limits.discard_granularity = 0;
Josef Bacik07ce2132018-06-05 11:41:23 -04001792 disk->queue->limits.discard_alignment = 0;
Josef Bacik6df133a2018-05-23 13:35:59 -04001793 blk_queue_max_discard_sectors(disk->queue, 0);
Josef Bacikebb16d02017-04-18 16:22:51 -04001794 blk_queue_max_segment_size(disk->queue, UINT_MAX);
Josef Bacik1cc1f172017-04-20 15:47:01 -04001795 blk_queue_max_segments(disk->queue, USHRT_MAX);
Josef Bacikb0d91112017-02-01 16:11:40 -05001796 blk_queue_max_hw_sectors(disk->queue, 65536);
1797 disk->queue->limits.max_sectors = 256;
1798
Josef Bacikb0d91112017-02-01 16:11:40 -05001799 mutex_init(&nbd->config_lock);
Josef Bacik5ea8d102017-04-06 17:01:58 -04001800 refcount_set(&nbd->config_refs, 0);
Tetsuo Handa75b7f622021-08-25 18:31:05 +02001801 /*
1802 * Start out with a zero references to keep other threads from using
1803 * this device until it is fully initialized.
1804 */
1805 refcount_set(&nbd->refs, 0);
Josef Bacikc6a47592017-04-06 17:02:06 -04001806 INIT_LIST_HEAD(&nbd->list);
Josef Bacikb0d91112017-02-01 16:11:40 -05001807 disk->major = NBD_MAJOR;
Pavel Skripkinb1a81162021-08-12 12:15:01 +03001808
1809 /* Too big first_minor can cause duplicate creation of
Yu Kuai940c2642021-11-02 09:52:35 +08001810 * sysfs files/links, since index << part_shift might overflow, or
1811 * MKDEV() expect that the max bits of first_minor is 20.
Pavel Skripkinb1a81162021-08-12 12:15:01 +03001812 */
Josef Bacikb0d91112017-02-01 16:11:40 -05001813 disk->first_minor = index << part_shift;
Yu Kuai940c2642021-11-02 09:52:35 +08001814 if (disk->first_minor < index || disk->first_minor > MINORMASK) {
Pavel Skripkinb1a81162021-08-12 12:15:01 +03001815 err = -EINVAL;
Ye Bine2daec42021-11-02 09:52:37 +08001816 goto out_free_work;
Pavel Skripkinb1a81162021-08-12 12:15:01 +03001817 }
1818
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001819 disk->minors = 1 << part_shift;
Josef Bacikb0d91112017-02-01 16:11:40 -05001820 disk->fops = &nbd_fops;
1821 disk->private_data = nbd;
1822 sprintf(disk->disk_name, "nbd%d", index);
Luis Chamberlaine1654f42021-09-27 14:59:58 -07001823 err = add_disk(disk);
1824 if (err)
Ye Bine2daec42021-11-02 09:52:37 +08001825 goto out_free_work;
Tetsuo Handa75b7f622021-08-25 18:31:05 +02001826
1827 /*
1828 * Now publish the device.
1829 */
1830 refcount_set(&nbd->refs, refs);
Josef Bacik47d902b2017-04-06 17:02:05 -04001831 nbd_total_devices++;
Christoph Hellwig7bdc00c2021-08-11 14:44:26 +02001832 return nbd;
Josef Bacikb0d91112017-02-01 16:11:40 -05001833
Ye Bine2daec42021-11-02 09:52:37 +08001834out_free_work:
1835 destroy_workqueue(nbd->recv_workq);
Luis Chamberlaine1654f42021-09-27 14:59:58 -07001836out_err_disk:
1837 blk_cleanup_disk(disk);
Josef Bacikb0d91112017-02-01 16:11:40 -05001838out_free_idr:
Tetsuo Handa93f63bc2021-08-25 18:31:03 +02001839 mutex_lock(&nbd_index_mutex);
Josef Bacikb0d91112017-02-01 16:11:40 -05001840 idr_remove(&nbd_index_idr, index);
Tetsuo Handa93f63bc2021-08-25 18:31:03 +02001841 mutex_unlock(&nbd_index_mutex);
Christoph Hellwig4af5f2e2021-06-02 09:53:34 +03001842out_free_tags:
1843 blk_mq_free_tag_set(&nbd->tag_set);
Josef Bacikb0d91112017-02-01 16:11:40 -05001844out_free_nbd:
1845 kfree(nbd);
1846out:
Christoph Hellwig7bdc00c2021-08-11 14:44:26 +02001847 return ERR_PTR(err);
Josef Bacikb0d91112017-02-01 16:11:40 -05001848}
1849
Christoph Hellwig438cd312021-08-25 18:31:07 +02001850static struct nbd_device *nbd_find_get_unused(void)
Josef Bacike46c7282017-04-06 17:02:00 -04001851{
Christoph Hellwig409e0ff2021-08-25 18:31:04 +02001852 struct nbd_device *nbd;
1853 int id;
Josef Bacike46c7282017-04-06 17:02:00 -04001854
Christoph Hellwig409e0ff2021-08-25 18:31:04 +02001855 lockdep_assert_held(&nbd_index_mutex);
1856
Christoph Hellwig438cd312021-08-25 18:31:07 +02001857 idr_for_each_entry(&nbd_index_idr, nbd, id) {
1858 if (refcount_read(&nbd->config_refs) ||
1859 test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
1860 continue;
1861 if (refcount_inc_not_zero(&nbd->refs))
Christoph Hellwig409e0ff2021-08-25 18:31:04 +02001862 return nbd;
Josef Bacike46c7282017-04-06 17:02:00 -04001863 }
Christoph Hellwig409e0ff2021-08-25 18:31:04 +02001864
1865 return NULL;
Josef Bacike46c7282017-04-06 17:02:00 -04001866}
1867
1868/* Netlink interface. */
Stephen Hemmingera86c4122018-07-18 09:32:43 -07001869static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
Josef Bacike46c7282017-04-06 17:02:00 -04001870 [NBD_ATTR_INDEX] = { .type = NLA_U32 },
1871 [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 },
1872 [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 },
1873 [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 },
1874 [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 },
1875 [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 },
1876 [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED},
Josef Bacik560bc4b2017-04-06 17:02:04 -04001877 [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 },
Josef Bacik47d902b2017-04-06 17:02:05 -04001878 [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED},
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05301879 [NBD_ATTR_BACKEND_IDENTIFIER] = { .type = NLA_STRING},
Josef Bacike46c7282017-04-06 17:02:00 -04001880};
1881
Stephen Hemmingera86c4122018-07-18 09:32:43 -07001882static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
Josef Bacike46c7282017-04-06 17:02:00 -04001883 [NBD_SOCK_FD] = { .type = NLA_U32 },
1884};
1885
Josef Bacik47d902b2017-04-06 17:02:05 -04001886/* We don't use this right now since we don't parse the incoming list, but we
1887 * still want it here so userspace knows what to expect.
1888 */
Stephen Hemmingera86c4122018-07-18 09:32:43 -07001889static const struct nla_policy __attribute__((unused))
Josef Bacik47d902b2017-04-06 17:02:05 -04001890nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
1891 [NBD_DEVICE_INDEX] = { .type = NLA_U32 },
1892 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
1893};
1894
Mike Christie4ddeaae82019-05-29 15:16:06 -05001895static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
1896{
1897 struct nbd_config *config = nbd->config;
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001898 u64 bsize = nbd_blksize(config);
Mike Christie4ddeaae82019-05-29 15:16:06 -05001899 u64 bytes = config->bytesize;
1900
1901 if (info->attrs[NBD_ATTR_SIZE_BYTES])
1902 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1903
Christoph Hellwigdcbddf52020-11-16 15:57:00 +01001904 if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
Mike Christie4ddeaae82019-05-29 15:16:06 -05001905 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
Mike Christie4ddeaae82019-05-29 15:16:06 -05001906
Nick Desaulniers41e76c62021-09-20 16:25:33 -07001907 if (bytes != config->bytesize || bsize != nbd_blksize(config))
Christoph Hellwigdcbddf52020-11-16 15:57:00 +01001908 return nbd_set_size(nbd, bytes, bsize);
Mike Christie4ddeaae82019-05-29 15:16:06 -05001909 return 0;
1910}
1911
Josef Bacike46c7282017-04-06 17:02:00 -04001912static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
1913{
Christoph Hellwig409e0ff2021-08-25 18:31:04 +02001914 struct nbd_device *nbd;
Josef Bacike46c7282017-04-06 17:02:00 -04001915 struct nbd_config *config;
1916 int index = -1;
1917 int ret;
Josef Bacika2c97902017-04-06 17:02:07 -04001918 bool put_dev = false;
Josef Bacike46c7282017-04-06 17:02:00 -04001919
1920 if (!netlink_capable(skb, CAP_SYS_ADMIN))
1921 return -EPERM;
1922
1923 if (info->attrs[NBD_ATTR_INDEX])
1924 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
1925 if (!info->attrs[NBD_ATTR_SOCKETS]) {
1926 printk(KERN_ERR "nbd: must specify at least one socket\n");
1927 return -EINVAL;
1928 }
1929 if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
1930 printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
1931 return -EINVAL;
1932 }
1933again:
1934 mutex_lock(&nbd_index_mutex);
1935 if (index == -1) {
Christoph Hellwig438cd312021-08-25 18:31:07 +02001936 nbd = nbd_find_get_unused();
Josef Bacike46c7282017-04-06 17:02:00 -04001937 } else {
1938 nbd = idr_find(&nbd_index_idr, index);
Christoph Hellwig7ee656c2021-08-25 18:31:08 +02001939 if (nbd) {
1940 if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
1941 test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
1942 !refcount_inc_not_zero(&nbd->refs)) {
Josef Bacike6a76272017-08-14 18:25:33 +00001943 mutex_unlock(&nbd_index_mutex);
Christoph Hellwig7ee656c2021-08-25 18:31:08 +02001944 pr_err("nbd: device at index %d is going down\n",
1945 index);
1946 return -EINVAL;
Josef Bacike6a76272017-08-14 18:25:33 +00001947 }
Josef Bacike6a76272017-08-14 18:25:33 +00001948 }
Josef Bacike46c7282017-04-06 17:02:00 -04001949 }
Josef Bacikc6a47592017-04-06 17:02:06 -04001950 mutex_unlock(&nbd_index_mutex);
Josef Bacike46c7282017-04-06 17:02:00 -04001951
Christoph Hellwig7ee656c2021-08-25 18:31:08 +02001952 if (!nbd) {
Christoph Hellwig6e4df4c2021-08-11 14:44:28 +02001953 nbd = nbd_dev_add(index, 2);
1954 if (IS_ERR(nbd)) {
Christoph Hellwig6177b562021-08-11 14:44:27 +02001955 pr_err("nbd: failed to add new device\n");
1956 return PTR_ERR(nbd);
1957 }
Josef Bacike46c7282017-04-06 17:02:00 -04001958 }
1959
Josef Bacike46c7282017-04-06 17:02:00 -04001960 mutex_lock(&nbd->config_lock);
1961 if (refcount_read(&nbd->config_refs)) {
1962 mutex_unlock(&nbd->config_lock);
Josef Bacikc6a47592017-04-06 17:02:06 -04001963 nbd_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04001964 if (index == -1)
1965 goto again;
1966 printk(KERN_ERR "nbd: nbd%d already in use\n", index);
1967 return -EBUSY;
1968 }
1969 if (WARN_ON(nbd->config)) {
1970 mutex_unlock(&nbd->config_lock);
Josef Bacikc6a47592017-04-06 17:02:06 -04001971 nbd_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04001972 return -EINVAL;
1973 }
1974 config = nbd->config = nbd_alloc_config();
1975 if (!nbd->config) {
1976 mutex_unlock(&nbd->config_lock);
Josef Bacikc6a47592017-04-06 17:02:06 -04001977 nbd_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04001978 printk(KERN_ERR "nbd: couldn't allocate config\n");
1979 return -ENOMEM;
1980 }
1981 refcount_set(&nbd->config_refs, 1);
Xiubo Liec76a7b2019-09-17 17:26:05 +05301982 set_bit(NBD_RT_BOUND, &config->runtime_flags);
Josef Bacike46c7282017-04-06 17:02:00 -04001983
Mike Christie4ddeaae82019-05-29 15:16:06 -05001984 ret = nbd_genl_size_set(info, nbd);
1985 if (ret)
1986 goto out;
1987
Mike Christie55313e92019-08-13 11:39:49 -05001988 if (info->attrs[NBD_ATTR_TIMEOUT])
1989 nbd_set_cmd_timeout(nbd,
1990 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
Josef Bacik560bc4b2017-04-06 17:02:04 -04001991 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
1992 config->dead_conn_timeout =
1993 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
1994 config->dead_conn_timeout *= HZ;
1995 }
Josef Bacike46c7282017-04-06 17:02:00 -04001996 if (info->attrs[NBD_ATTR_SERVER_FLAGS])
1997 config->flags =
1998 nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]);
Josef Bacika2c97902017-04-06 17:02:07 -04001999 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2000 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2001 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
Josef Bacikc9a2f902021-02-22 15:09:53 -05002002 /*
2003 * We have 1 ref to keep the device around, and then 1
2004 * ref for our current operation here, which will be
2005 * inherited by the config. If we already have
2006 * DESTROY_ON_DISCONNECT set then we know we don't have
2007 * that extra ref already held so we don't need the
2008 * put_dev.
2009 */
2010 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2011 &nbd->flags))
2012 put_dev = true;
Xiubo Li8454d682019-09-17 17:26:06 +05302013 } else {
Josef Bacikc9a2f902021-02-22 15:09:53 -05002014 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2015 &nbd->flags))
2016 refcount_inc(&nbd->refs);
Josef Bacika2c97902017-04-06 17:02:07 -04002017 }
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002018 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
Xiubo Liec76a7b2019-09-17 17:26:05 +05302019 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002020 &config->runtime_flags);
2021 }
Josef Bacika2c97902017-04-06 17:02:07 -04002022 }
2023
Josef Bacike46c7282017-04-06 17:02:00 -04002024 if (info->attrs[NBD_ATTR_SOCKETS]) {
2025 struct nlattr *attr;
2026 int rem, fd;
2027
2028 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2029 rem) {
2030 struct nlattr *socks[NBD_SOCK_MAX+1];
2031
2032 if (nla_type(attr) != NBD_SOCK_ITEM) {
2033 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
2034 ret = -EINVAL;
2035 goto out;
2036 }
Johannes Berg8cb08172019-04-26 14:07:28 +02002037 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2038 attr,
2039 nbd_sock_policy,
2040 info->extack);
Josef Bacike46c7282017-04-06 17:02:00 -04002041 if (ret != 0) {
2042 printk(KERN_ERR "nbd: error processing sock list\n");
2043 ret = -EINVAL;
2044 goto out;
2045 }
2046 if (!socks[NBD_SOCK_FD])
2047 continue;
2048 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2049 ret = nbd_add_socket(nbd, fd, true);
2050 if (ret)
2051 goto out;
2052 }
2053 }
2054 ret = nbd_start_device(nbd);
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05302055 if (ret)
2056 goto out;
2057 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
2058 nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2059 GFP_KERNEL);
2060 if (!nbd->backend) {
2061 ret = -ENOMEM;
2062 goto out;
2063 }
2064 }
2065 ret = device_create_file(disk_to_dev(nbd->disk), &backend_attr);
2066 if (ret) {
2067 dev_err(disk_to_dev(nbd->disk),
2068 "device_create_file failed for backend!\n");
2069 goto out;
2070 }
2071 set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
Josef Bacike46c7282017-04-06 17:02:00 -04002072out:
2073 mutex_unlock(&nbd->config_lock);
2074 if (!ret) {
Xiubo Liec76a7b2019-09-17 17:26:05 +05302075 set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
Josef Bacike46c7282017-04-06 17:02:00 -04002076 refcount_inc(&nbd->config_refs);
2077 nbd_connect_reply(info, nbd->index);
2078 }
2079 nbd_config_put(nbd);
Josef Bacika2c97902017-04-06 17:02:07 -04002080 if (put_dev)
2081 nbd_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04002082 return ret;
2083}
2084
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002085static void nbd_disconnect_and_put(struct nbd_device *nbd)
2086{
2087 mutex_lock(&nbd->config_lock);
2088 nbd_disconnect(nbd);
Xie Yongjicddce012021-08-13 23:13:30 +08002089 sock_shutdown(nbd);
Mike Christiee9e006f2019-08-04 14:10:06 -05002090 /*
Ye Bine2daec42021-11-02 09:52:37 +08002091 * Make sure recv thread has finished, we can safely call nbd_clear_que()
Xie Yongjicddce012021-08-13 23:13:30 +08002092 * to cancel the inflight I/Os.
Mike Christiee9e006f2019-08-04 14:10:06 -05002093 */
Ye Bine2daec42021-11-02 09:52:37 +08002094 flush_workqueue(nbd->recv_workq);
Xie Yongjicddce012021-08-13 23:13:30 +08002095 nbd_clear_que(nbd);
2096 nbd->task_setup = NULL;
2097 mutex_unlock(&nbd->config_lock);
2098
Xiubo Liec76a7b2019-09-17 17:26:05 +05302099 if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002100 &nbd->config->runtime_flags))
2101 nbd_config_put(nbd);
2102}
2103
Josef Bacike46c7282017-04-06 17:02:00 -04002104static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
2105{
2106 struct nbd_device *nbd;
2107 int index;
2108
2109 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2110 return -EPERM;
2111
2112 if (!info->attrs[NBD_ATTR_INDEX]) {
2113 printk(KERN_ERR "nbd: must specify an index to disconnect\n");
2114 return -EINVAL;
2115 }
2116 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2117 mutex_lock(&nbd_index_mutex);
2118 nbd = idr_find(&nbd_index_idr, index);
Josef Bacike46c7282017-04-06 17:02:00 -04002119 if (!nbd) {
Josef Bacikc6a47592017-04-06 17:02:06 -04002120 mutex_unlock(&nbd_index_mutex);
Josef Bacike46c7282017-04-06 17:02:00 -04002121 printk(KERN_ERR "nbd: couldn't find device at index %d\n",
2122 index);
2123 return -EINVAL;
2124 }
Josef Bacikc6a47592017-04-06 17:02:06 -04002125 if (!refcount_inc_not_zero(&nbd->refs)) {
2126 mutex_unlock(&nbd_index_mutex);
2127 printk(KERN_ERR "nbd: device at index %d is going down\n",
2128 index);
2129 return -EINVAL;
2130 }
2131 mutex_unlock(&nbd_index_mutex);
Sun Kebedf78c2021-05-12 19:43:31 +08002132 if (!refcount_inc_not_zero(&nbd->config_refs))
2133 goto put_nbd;
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002134 nbd_disconnect_and_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04002135 nbd_config_put(nbd);
Sun Kebedf78c2021-05-12 19:43:31 +08002136put_nbd:
Josef Bacikc6a47592017-04-06 17:02:06 -04002137 nbd_put(nbd);
Josef Bacike46c7282017-04-06 17:02:00 -04002138 return 0;
2139}
2140
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002141static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
2142{
2143 struct nbd_device *nbd = NULL;
2144 struct nbd_config *config;
2145 int index;
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002146 int ret = 0;
Josef Bacika2c97902017-04-06 17:02:07 -04002147 bool put_dev = false;
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002148
2149 if (!netlink_capable(skb, CAP_SYS_ADMIN))
2150 return -EPERM;
2151
2152 if (!info->attrs[NBD_ATTR_INDEX]) {
2153 printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
2154 return -EINVAL;
2155 }
2156 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2157 mutex_lock(&nbd_index_mutex);
2158 nbd = idr_find(&nbd_index_idr, index);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002159 if (!nbd) {
Josef Bacikc6a47592017-04-06 17:02:06 -04002160 mutex_unlock(&nbd_index_mutex);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002161 printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
2162 index);
2163 return -EINVAL;
2164 }
Prasanna Kumar Kalever6497ef82021-04-29 15:58:28 +05302165 if (nbd->backend) {
2166 if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
2167 if (nla_strcmp(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
2168 nbd->backend)) {
2169 mutex_unlock(&nbd_index_mutex);
2170 dev_err(nbd_to_dev(nbd),
2171 "backend image doesn't match with %s\n",
2172 nbd->backend);
2173 return -EINVAL;
2174 }
2175 } else {
2176 mutex_unlock(&nbd_index_mutex);
2177 dev_err(nbd_to_dev(nbd), "must specify backend\n");
2178 return -EINVAL;
2179 }
2180 }
Josef Bacikc6a47592017-04-06 17:02:06 -04002181 if (!refcount_inc_not_zero(&nbd->refs)) {
2182 mutex_unlock(&nbd_index_mutex);
2183 printk(KERN_ERR "nbd: device at index %d is going down\n",
2184 index);
2185 return -EINVAL;
2186 }
2187 mutex_unlock(&nbd_index_mutex);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002188
2189 if (!refcount_inc_not_zero(&nbd->config_refs)) {
2190 dev_err(nbd_to_dev(nbd),
2191 "not configured, cannot reconfigure\n");
Josef Bacikc6a47592017-04-06 17:02:06 -04002192 nbd_put(nbd);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002193 return -EINVAL;
2194 }
2195
2196 mutex_lock(&nbd->config_lock);
2197 config = nbd->config;
Xiubo Liec76a7b2019-09-17 17:26:05 +05302198 if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
Ye Bin0c980572021-10-20 15:39:59 +08002199 !nbd->pid) {
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002200 dev_err(nbd_to_dev(nbd),
2201 "not configured, cannot reconfigure\n");
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002202 ret = -EINVAL;
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002203 goto out;
2204 }
2205
Mike Christie4ddeaae82019-05-29 15:16:06 -05002206 ret = nbd_genl_size_set(info, nbd);
2207 if (ret)
2208 goto out;
2209
Mike Christie55313e92019-08-13 11:39:49 -05002210 if (info->attrs[NBD_ATTR_TIMEOUT])
2211 nbd_set_cmd_timeout(nbd,
2212 nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
Josef Bacik560bc4b2017-04-06 17:02:04 -04002213 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
2214 config->dead_conn_timeout =
2215 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
2216 config->dead_conn_timeout *= HZ;
2217 }
Josef Bacika2c97902017-04-06 17:02:07 -04002218 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
2219 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
2220 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
Josef Bacikc9a2f902021-02-22 15:09:53 -05002221 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2222 &nbd->flags))
Josef Bacika2c97902017-04-06 17:02:07 -04002223 put_dev = true;
2224 } else {
Josef Bacikc9a2f902021-02-22 15:09:53 -05002225 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2226 &nbd->flags))
Josef Bacika2c97902017-04-06 17:02:07 -04002227 refcount_inc(&nbd->refs);
2228 }
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002229
2230 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
Xiubo Liec76a7b2019-09-17 17:26:05 +05302231 set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002232 &config->runtime_flags);
2233 } else {
Xiubo Liec76a7b2019-09-17 17:26:05 +05302234 clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
Doron Roberts-Kedes08ba91e2018-06-15 14:05:32 -07002235 &config->runtime_flags);
2236 }
Josef Bacika2c97902017-04-06 17:02:07 -04002237 }
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002238
2239 if (info->attrs[NBD_ATTR_SOCKETS]) {
2240 struct nlattr *attr;
2241 int rem, fd;
2242
2243 nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS],
2244 rem) {
2245 struct nlattr *socks[NBD_SOCK_MAX+1];
2246
2247 if (nla_type(attr) != NBD_SOCK_ITEM) {
2248 printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
2249 ret = -EINVAL;
2250 goto out;
2251 }
Johannes Berg8cb08172019-04-26 14:07:28 +02002252 ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2253 attr,
2254 nbd_sock_policy,
2255 info->extack);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002256 if (ret != 0) {
2257 printk(KERN_ERR "nbd: error processing sock list\n");
2258 ret = -EINVAL;
2259 goto out;
2260 }
2261 if (!socks[NBD_SOCK_FD])
2262 continue;
2263 fd = (int)nla_get_u32(socks[NBD_SOCK_FD]);
2264 ret = nbd_reconnect_socket(nbd, fd);
2265 if (ret) {
2266 if (ret == -ENOSPC)
2267 ret = 0;
2268 goto out;
2269 }
2270 dev_info(nbd_to_dev(nbd), "reconnected socket\n");
2271 }
2272 }
2273out:
2274 mutex_unlock(&nbd->config_lock);
2275 nbd_config_put(nbd);
Josef Bacikc6a47592017-04-06 17:02:06 -04002276 nbd_put(nbd);
Josef Bacika2c97902017-04-06 17:02:07 -04002277 if (put_dev)
2278 nbd_put(nbd);
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002279 return ret;
2280}
2281
Jakub Kicinski66a9b922020-10-02 14:49:54 -07002282static const struct genl_small_ops nbd_connect_genl_ops[] = {
Josef Bacike46c7282017-04-06 17:02:00 -04002283 {
2284 .cmd = NBD_CMD_CONNECT,
Johannes Bergef6243a2019-04-26 14:07:31 +02002285 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
Josef Bacike46c7282017-04-06 17:02:00 -04002286 .doit = nbd_genl_connect,
2287 },
2288 {
2289 .cmd = NBD_CMD_DISCONNECT,
Johannes Bergef6243a2019-04-26 14:07:31 +02002290 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
Josef Bacike46c7282017-04-06 17:02:00 -04002291 .doit = nbd_genl_disconnect,
2292 },
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002293 {
2294 .cmd = NBD_CMD_RECONFIGURE,
Johannes Bergef6243a2019-04-26 14:07:31 +02002295 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
Josef Bacikb7aa3d32017-04-06 17:02:01 -04002296 .doit = nbd_genl_reconfigure,
2297 },
Josef Bacik47d902b2017-04-06 17:02:05 -04002298 {
2299 .cmd = NBD_CMD_STATUS,
Johannes Bergef6243a2019-04-26 14:07:31 +02002300 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
Josef Bacik47d902b2017-04-06 17:02:05 -04002301 .doit = nbd_genl_status,
2302 },
Josef Bacike46c7282017-04-06 17:02:00 -04002303};
2304
Josef Bacik799f9a32017-04-06 17:02:02 -04002305static const struct genl_multicast_group nbd_mcast_grps[] = {
2306 { .name = NBD_GENL_MCAST_GROUP_NAME, },
2307};
2308
Josef Bacike46c7282017-04-06 17:02:00 -04002309static struct genl_family nbd_genl_family __ro_after_init = {
2310 .hdrsize = 0,
2311 .name = NBD_GENL_FAMILY_NAME,
2312 .version = NBD_GENL_VERSION,
2313 .module = THIS_MODULE,
Jakub Kicinski66a9b922020-10-02 14:49:54 -07002314 .small_ops = nbd_connect_genl_ops,
2315 .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
Josef Bacike46c7282017-04-06 17:02:00 -04002316 .maxattr = NBD_ATTR_MAX,
Johannes Berg3b0f31f2019-03-21 22:51:02 +01002317 .policy = nbd_attr_policy,
Josef Bacik799f9a32017-04-06 17:02:02 -04002318 .mcgrps = nbd_mcast_grps,
2319 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
Josef Bacike46c7282017-04-06 17:02:00 -04002320};
2321
Josef Bacik47d902b2017-04-06 17:02:05 -04002322static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply)
2323{
2324 struct nlattr *dev_opt;
2325 u8 connected = 0;
2326 int ret;
2327
2328 /* This is a little racey, but for status it's ok. The
2329 * reason we don't take a ref here is because we can't
2330 * take a ref in the index == -1 case as we would need
2331 * to put under the nbd_index_mutex, which could
2332 * deadlock if we are configured to remove ourselves
2333 * once we're disconnected.
2334 */
2335 if (refcount_read(&nbd->config_refs))
2336 connected = 1;
Michal Kubecekae0be8d2019-04-26 11:13:06 +02002337 dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
Josef Bacik47d902b2017-04-06 17:02:05 -04002338 if (!dev_opt)
2339 return -EMSGSIZE;
2340 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
2341 if (ret)
2342 return -EMSGSIZE;
2343 ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED,
2344 connected);
2345 if (ret)
2346 return -EMSGSIZE;
2347 nla_nest_end(reply, dev_opt);
2348 return 0;
2349}
2350
2351static int status_cb(int id, void *ptr, void *data)
2352{
2353 struct nbd_device *nbd = ptr;
2354 return populate_nbd_status(nbd, (struct sk_buff *)data);
2355}
2356
2357static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
2358{
2359 struct nlattr *dev_list;
2360 struct sk_buff *reply;
2361 void *reply_head;
2362 size_t msg_size;
2363 int index = -1;
2364 int ret = -ENOMEM;
2365
2366 if (info->attrs[NBD_ATTR_INDEX])
2367 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
2368
2369 mutex_lock(&nbd_index_mutex);
2370
2371 msg_size = nla_total_size(nla_attr_size(sizeof(u32)) +
2372 nla_attr_size(sizeof(u8)));
2373 msg_size *= (index == -1) ? nbd_total_devices : 1;
2374
2375 reply = genlmsg_new(msg_size, GFP_KERNEL);
2376 if (!reply)
2377 goto out;
2378 reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0,
2379 NBD_CMD_STATUS);
2380 if (!reply_head) {
2381 nlmsg_free(reply);
2382 goto out;
2383 }
2384
Michal Kubecekae0be8d2019-04-26 11:13:06 +02002385 dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
Josef Bacik47d902b2017-04-06 17:02:05 -04002386 if (index == -1) {
2387 ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
2388 if (ret) {
2389 nlmsg_free(reply);
2390 goto out;
2391 }
2392 } else {
2393 struct nbd_device *nbd;
2394 nbd = idr_find(&nbd_index_idr, index);
2395 if (nbd) {
2396 ret = populate_nbd_status(nbd, reply);
2397 if (ret) {
2398 nlmsg_free(reply);
2399 goto out;
2400 }
2401 }
2402 }
2403 nla_nest_end(reply, dev_list);
2404 genlmsg_end(reply, reply_head);
Li RongQingcd46eb82019-02-19 13:14:07 +08002405 ret = genlmsg_reply(reply, info);
Josef Bacik47d902b2017-04-06 17:02:05 -04002406out:
2407 mutex_unlock(&nbd_index_mutex);
2408 return ret;
2409}
2410
Josef Bacike46c7282017-04-06 17:02:00 -04002411static void nbd_connect_reply(struct genl_info *info, int index)
2412{
2413 struct sk_buff *skb;
2414 void *msg_head;
2415 int ret;
2416
2417 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2418 if (!skb)
2419 return;
2420 msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0,
2421 NBD_CMD_CONNECT);
2422 if (!msg_head) {
2423 nlmsg_free(skb);
2424 return;
2425 }
2426 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2427 if (ret) {
2428 nlmsg_free(skb);
2429 return;
2430 }
2431 genlmsg_end(skb, msg_head);
2432 genlmsg_reply(skb, info);
2433}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434
Josef Bacik799f9a32017-04-06 17:02:02 -04002435static void nbd_mcast_index(int index)
2436{
2437 struct sk_buff *skb;
2438 void *msg_head;
2439 int ret;
2440
2441 skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
2442 if (!skb)
2443 return;
2444 msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0,
2445 NBD_CMD_LINK_DEAD);
2446 if (!msg_head) {
2447 nlmsg_free(skb);
2448 return;
2449 }
2450 ret = nla_put_u32(skb, NBD_ATTR_INDEX, index);
2451 if (ret) {
2452 nlmsg_free(skb);
2453 return;
2454 }
2455 genlmsg_end(skb, msg_head);
2456 genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL);
2457}
2458
2459static void nbd_dead_link_work(struct work_struct *work)
2460{
2461 struct link_dead_args *args = container_of(work, struct link_dead_args,
2462 work);
2463 nbd_mcast_index(args->index);
2464 kfree(args);
2465}
2466
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467static int __init nbd_init(void)
2468{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 int i;
2470
Adrian Bunk5b7b18c2006-03-25 03:07:04 -08002471 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472
Laurent Vivierd71a6d72008-04-29 01:02:51 -07002473 if (max_part < 0) {
WANG Cong7742ce42011-08-19 14:48:28 +02002474 printk(KERN_ERR "nbd: max_part must be >= 0\n");
Laurent Vivierd71a6d72008-04-29 01:02:51 -07002475 return -EINVAL;
2476 }
2477
2478 part_shift = 0;
Namhyung Kim5988ce22011-05-28 14:44:46 +02002479 if (max_part > 0) {
Laurent Vivierd71a6d72008-04-29 01:02:51 -07002480 part_shift = fls(max_part);
2481
Namhyung Kim5988ce22011-05-28 14:44:46 +02002482 /*
2483 * Adjust max_part according to part_shift as it is exported
2484 * to user space so that user can know the max number of
2485 * partition kernel should be able to manage.
2486 *
2487 * Note that -1 is required because partition 0 is reserved
2488 * for the whole disk.
2489 */
2490 max_part = (1UL << part_shift) - 1;
2491 }
2492
Namhyung Kim3b271082011-05-28 14:44:46 +02002493 if ((1UL << part_shift) > DISK_MAX_PARTS)
2494 return -EINVAL;
2495
2496 if (nbds_max > 1UL << (MINORBITS - part_shift))
2497 return -EINVAL;
2498
Mike Christiee9e006f2019-08-04 14:10:06 -05002499 if (register_blkdev(NBD_MAJOR, "nbd"))
Josef Bacikb0d91112017-02-01 16:11:40 -05002500 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501
Hou Tao68c94172021-08-11 14:44:23 +02002502 nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
2503 if (!nbd_del_wq) {
2504 unregister_blkdev(NBD_MAJOR, "nbd");
2505 return -ENOMEM;
2506 }
2507
Josef Bacike46c7282017-04-06 17:02:00 -04002508 if (genl_register_family(&nbd_genl_family)) {
Hou Tao68c94172021-08-11 14:44:23 +02002509 destroy_workqueue(nbd_del_wq);
Josef Bacike46c7282017-04-06 17:02:00 -04002510 unregister_blkdev(NBD_MAJOR, "nbd");
Josef Bacike46c7282017-04-06 17:02:00 -04002511 return -EINVAL;
2512 }
Markus Pargmann30d53d92015-08-17 08:20:06 +02002513 nbd_dbg_init();
2514
Josef Bacikb0d91112017-02-01 16:11:40 -05002515 for (i = 0; i < nbds_max; i++)
Christoph Hellwig6e4df4c2021-08-11 14:44:28 +02002516 nbd_dev_add(i, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 return 0;
Josef Bacikb0d91112017-02-01 16:11:40 -05002518}
2519
2520static int nbd_exit_cb(int id, void *ptr, void *data)
2521{
Josef Bacikc6a47592017-04-06 17:02:06 -04002522 struct list_head *list = (struct list_head *)data;
Josef Bacikb0d91112017-02-01 16:11:40 -05002523 struct nbd_device *nbd = ptr;
Josef Bacikc6a47592017-04-06 17:02:06 -04002524
Hou Tao68c94172021-08-11 14:44:23 +02002525 /* Skip nbd that is being removed asynchronously */
2526 if (refcount_read(&nbd->refs))
2527 list_add_tail(&nbd->list, list);
2528
Josef Bacikb0d91112017-02-01 16:11:40 -05002529 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530}
2531
2532static void __exit nbd_cleanup(void)
2533{
Josef Bacikc6a47592017-04-06 17:02:06 -04002534 struct nbd_device *nbd;
2535 LIST_HEAD(del_list);
2536
Markus Pargmann30d53d92015-08-17 08:20:06 +02002537 nbd_dbg_close();
2538
Josef Bacikc6a47592017-04-06 17:02:06 -04002539 mutex_lock(&nbd_index_mutex);
2540 idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list);
2541 mutex_unlock(&nbd_index_mutex);
2542
Josef Bacik60ae36a2017-04-28 09:49:19 -04002543 while (!list_empty(&del_list)) {
2544 nbd = list_first_entry(&del_list, struct nbd_device, list);
2545 list_del_init(&nbd->list);
2546 if (refcount_read(&nbd->refs) != 1)
Josef Bacikc6a47592017-04-06 17:02:06 -04002547 printk(KERN_ERR "nbd: possibly leaking a device\n");
2548 nbd_put(nbd);
Josef Bacikc6a47592017-04-06 17:02:06 -04002549 }
2550
Hou Tao68c94172021-08-11 14:44:23 +02002551 /* Also wait for nbd_dev_remove_work() completes */
2552 destroy_workqueue(nbd_del_wq);
2553
Josef Bacikb0d91112017-02-01 16:11:40 -05002554 idr_destroy(&nbd_index_idr);
Josef Bacike46c7282017-04-06 17:02:00 -04002555 genl_unregister_family(&nbd_genl_family);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 unregister_blkdev(NBD_MAJOR, "nbd");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557}
2558
2559module_init(nbd_init);
2560module_exit(nbd_cleanup);
2561
2562MODULE_DESCRIPTION("Network Block Device");
2563MODULE_LICENSE("GPL");
2564
Lars Marowsky-Bree40be0c22005-05-01 08:59:07 -07002565module_param(nbds_max, int, 0444);
Laurent Vivierd71a6d72008-04-29 01:02:51 -07002566MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
2567module_param(max_part, int, 0444);
Josef Bacik7a8362a2017-08-14 18:56:16 +00002568MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");