blob: 6a65b0516180f1908f822a6a4f48fc73d077f20d [file] [log] [blame]
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics TCP host.
4 * Copyright (c) 2018 Lightbits Labs. All rights reserved.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/err.h>
11#include <linux/nvme-tcp.h>
12#include <net/sock.h>
13#include <net/tcp.h>
14#include <linux/blk-mq.h>
15#include <crypto/hash.h>
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070016#include <net/busy_poll.h>
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080017
18#include "nvme.h"
19#include "fabrics.h"
20
21struct nvme_tcp_queue;
22
Wunderlich, Mark9912ade2020-01-16 00:46:12 +000023/* Define the socket priority to use for connections were it is desirable
24 * that the NIC consider performing optimized packet processing or filtering.
25 * A non-zero value being sufficient to indicate general consideration of any
26 * possible optimization. Making it a module param allows for alternative
27 * values that may be unique for some NIC implementations.
28 */
29static int so_priority;
30module_param(so_priority, int, 0644);
31MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
32
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080033enum nvme_tcp_send_state {
34 NVME_TCP_SEND_CMD_PDU = 0,
35 NVME_TCP_SEND_H2C_PDU,
36 NVME_TCP_SEND_DATA,
37 NVME_TCP_SEND_DDGST,
38};
39
40struct nvme_tcp_request {
41 struct nvme_request req;
42 void *pdu;
43 struct nvme_tcp_queue *queue;
44 u32 data_len;
45 u32 pdu_len;
46 u32 pdu_sent;
47 u16 ttag;
48 struct list_head entry;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070049 struct llist_node lentry;
Christoph Hellwiga7273d42018-12-13 09:46:59 +010050 __le32 ddgst;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080051
52 struct bio *curr_bio;
53 struct iov_iter iter;
54
55 /* send state */
56 size_t offset;
57 size_t data_sent;
58 enum nvme_tcp_send_state state;
59};
60
61enum nvme_tcp_queue_flags {
62 NVME_TCP_Q_ALLOCATED = 0,
63 NVME_TCP_Q_LIVE = 1,
Sagi Grimberg72e5d752020-05-01 14:25:44 -070064 NVME_TCP_Q_POLLING = 2,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080065};
66
67enum nvme_tcp_recv_state {
68 NVME_TCP_RECV_PDU = 0,
69 NVME_TCP_RECV_DATA,
70 NVME_TCP_RECV_DDGST,
71};
72
73struct nvme_tcp_ctrl;
74struct nvme_tcp_queue {
75 struct socket *sock;
76 struct work_struct io_work;
77 int io_cpu;
78
Chao Leng9ebbfe42021-01-14 17:09:26 +080079 struct mutex queue_lock;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -070080 struct mutex send_mutex;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070081 struct llist_head req_list;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080082 struct list_head send_list;
Sagi Grimberg122e5b92020-06-18 17:30:24 -070083 bool more_requests;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080084
85 /* recv state */
86 void *pdu;
87 int pdu_remaining;
88 int pdu_offset;
89 size_t data_remaining;
90 size_t ddgst_remaining;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070091 unsigned int nr_cqe;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080092
93 /* send state */
94 struct nvme_tcp_request *request;
95
96 int queue_size;
97 size_t cmnd_capsule_len;
98 struct nvme_tcp_ctrl *ctrl;
99 unsigned long flags;
100 bool rd_enabled;
101
102 bool hdr_digest;
103 bool data_digest;
104 struct ahash_request *rcv_hash;
105 struct ahash_request *snd_hash;
106 __le32 exp_ddgst;
107 __le32 recv_ddgst;
108
109 struct page_frag_cache pf_cache;
110
111 void (*state_change)(struct sock *);
112 void (*data_ready)(struct sock *);
113 void (*write_space)(struct sock *);
114};
115
116struct nvme_tcp_ctrl {
117 /* read only in the hot path */
118 struct nvme_tcp_queue *queues;
119 struct blk_mq_tag_set tag_set;
120
121 /* other member variables */
122 struct list_head list;
123 struct blk_mq_tag_set admin_tag_set;
124 struct sockaddr_storage addr;
125 struct sockaddr_storage src_addr;
Martin Belanger3ede8f72021-05-20 15:09:34 -0400126 struct net_device *ndev;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800127 struct nvme_ctrl ctrl;
128
129 struct work_struct err_work;
130 struct delayed_work connect_work;
131 struct nvme_tcp_request async_req;
Sagi Grimberg64861992019-05-28 22:49:05 -0700132 u32 io_queues[HCTX_MAX_TYPES];
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800133};
134
135static LIST_HEAD(nvme_tcp_ctrl_list);
136static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
137static struct workqueue_struct *nvme_tcp_wq;
Rikard Falkeborn6acbd962020-05-29 00:25:07 +0200138static const struct blk_mq_ops nvme_tcp_mq_ops;
139static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700140static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800141
142static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
143{
144 return container_of(ctrl, struct nvme_tcp_ctrl, ctrl);
145}
146
147static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
148{
149 return queue - queue->ctrl->queues;
150}
151
152static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
153{
154 u32 queue_idx = nvme_tcp_queue_id(queue);
155
156 if (queue_idx == 0)
157 return queue->ctrl->admin_tag_set.tags[queue_idx];
158 return queue->ctrl->tag_set.tags[queue_idx - 1];
159}
160
161static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
162{
163 return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
164}
165
166static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
167{
168 return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
169}
170
171static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
172{
173 return queue->cmnd_capsule_len - sizeof(struct nvme_command);
174}
175
176static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
177{
178 return req == &req->queue->ctrl->async_req;
179}
180
181static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
182{
183 struct request *rq;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800184
185 if (unlikely(nvme_tcp_async_req(req)))
186 return false; /* async events don't have a request */
187
188 rq = blk_mq_rq_from_pdu(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800189
Sagi Grimberg25e5cb72020-03-23 15:06:30 -0700190 return rq_data_dir(rq) == WRITE && req->data_len &&
191 req->data_len <= nvme_tcp_inline_data_size(req->queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800192}
193
194static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
195{
196 return req->iter.bvec->bv_page;
197}
198
199static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
200{
201 return req->iter.bvec->bv_offset + req->iter.iov_offset;
202}
203
204static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
205{
Sagi Grimbergca1ff672021-01-13 13:56:57 -0800206 return min_t(size_t, iov_iter_single_seg_count(&req->iter),
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800207 req->pdu_len - req->pdu_sent);
208}
209
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800210static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req)
211{
212 return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ?
213 req->pdu_len - req->pdu_sent : 0;
214}
215
216static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
217 int len)
218{
219 return nvme_tcp_pdu_data_left(req) <= len;
220}
221
222static void nvme_tcp_init_iter(struct nvme_tcp_request *req,
223 unsigned int dir)
224{
225 struct request *rq = blk_mq_rq_from_pdu(req);
226 struct bio_vec *vec;
227 unsigned int size;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800228 int nr_bvec;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800229 size_t offset;
230
231 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
232 vec = &rq->special_vec;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800233 nr_bvec = 1;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800234 size = blk_rq_payload_bytes(rq);
235 offset = 0;
236 } else {
237 struct bio *bio = req->curr_bio;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800238 struct bvec_iter bi;
239 struct bio_vec bv;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800240
241 vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800242 nr_bvec = 0;
243 bio_for_each_bvec(bv, bio, bi) {
244 nr_bvec++;
245 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800246 size = bio->bi_iter.bi_size;
247 offset = bio->bi_iter.bi_bvec_done;
248 }
249
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800250 iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800251 req->iter.iov_offset = offset;
252}
253
254static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
255 int len)
256{
257 req->data_sent += len;
258 req->pdu_sent += len;
259 iov_iter_advance(&req->iter, len);
260 if (!iov_iter_count(&req->iter) &&
261 req->data_sent < req->data_len) {
262 req->curr_bio = req->curr_bio->bi_next;
263 nvme_tcp_init_iter(req, WRITE);
264 }
265}
266
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800267static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
268{
269 int ret;
270
271 /* drain the send queue as much as we can... */
272 do {
273 ret = nvme_tcp_try_send(queue);
274 } while (ret > 0);
275}
276
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700277static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
Sagi Grimberg86f03482020-06-18 17:30:23 -0700278 bool sync, bool last)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800279{
280 struct nvme_tcp_queue *queue = req->queue;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700281 bool empty;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800282
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700283 empty = llist_add(&req->lentry, &queue->req_list) &&
284 list_empty(&queue->send_list) && !queue->request;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800285
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700286 /*
287 * if we're the first on the send_list and we can try to send
288 * directly, otherwise queue io_work. Also, only do that if we
289 * are on the same cpu, so we don't introduce contention.
290 */
Sagi Grimbergbb833372021-03-15 13:53:47 -0700291 if (queue->io_cpu == raw_smp_processor_id() &&
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700292 sync && empty && mutex_trylock(&queue->send_mutex)) {
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700293 queue->more_requests = !last;
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800294 nvme_tcp_send_all(queue);
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700295 queue->more_requests = false;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700296 mutex_unlock(&queue->send_mutex);
Sagi Grimberg86f03482020-06-18 17:30:23 -0700297 } else if (last) {
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700298 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
299 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800300}
301
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700302static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
303{
304 struct nvme_tcp_request *req;
305 struct llist_node *node;
306
307 for (node = llist_del_all(&queue->req_list); node; node = node->next) {
308 req = llist_entry(node, struct nvme_tcp_request, lentry);
309 list_add(&req->entry, &queue->send_list);
310 }
311}
312
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800313static inline struct nvme_tcp_request *
314nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
315{
316 struct nvme_tcp_request *req;
317
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800318 req = list_first_entry_or_null(&queue->send_list,
319 struct nvme_tcp_request, entry);
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700320 if (!req) {
321 nvme_tcp_process_req_list(queue);
322 req = list_first_entry_or_null(&queue->send_list,
323 struct nvme_tcp_request, entry);
324 if (unlikely(!req))
325 return NULL;
326 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800327
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700328 list_del(&req->entry);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800329 return req;
330}
331
Christoph Hellwiga7273d42018-12-13 09:46:59 +0100332static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
333 __le32 *dgst)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800334{
335 ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
336 crypto_ahash_final(hash);
337}
338
339static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
340 struct page *page, off_t off, size_t len)
341{
342 struct scatterlist sg;
343
344 sg_init_marker(&sg, 1);
345 sg_set_page(&sg, page, len, off);
346 ahash_request_set_crypt(hash, &sg, NULL, len);
347 crypto_ahash_update(hash);
348}
349
350static inline void nvme_tcp_hdgst(struct ahash_request *hash,
351 void *pdu, size_t len)
352{
353 struct scatterlist sg;
354
355 sg_init_one(&sg, pdu, len);
356 ahash_request_set_crypt(hash, &sg, pdu + len, len);
357 crypto_ahash_digest(hash);
358}
359
360static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
361 void *pdu, size_t pdu_len)
362{
363 struct nvme_tcp_hdr *hdr = pdu;
364 __le32 recv_digest;
365 __le32 exp_digest;
366
367 if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
368 dev_err(queue->ctrl->ctrl.device,
369 "queue %d: header digest flag is cleared\n",
370 nvme_tcp_queue_id(queue));
371 return -EPROTO;
372 }
373
374 recv_digest = *(__le32 *)(pdu + hdr->hlen);
375 nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
376 exp_digest = *(__le32 *)(pdu + hdr->hlen);
377 if (recv_digest != exp_digest) {
378 dev_err(queue->ctrl->ctrl.device,
379 "header digest error: recv %#x expected %#x\n",
380 le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
381 return -EIO;
382 }
383
384 return 0;
385}
386
387static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
388{
389 struct nvme_tcp_hdr *hdr = pdu;
390 u8 digest_len = nvme_tcp_hdgst_len(queue);
391 u32 len;
392
393 len = le32_to_cpu(hdr->plen) - hdr->hlen -
394 ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0);
395
396 if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
397 dev_err(queue->ctrl->ctrl.device,
398 "queue %d: data digest flag is cleared\n",
399 nvme_tcp_queue_id(queue));
400 return -EPROTO;
401 }
402 crypto_ahash_init(queue->rcv_hash);
403
404 return 0;
405}
406
407static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
408 struct request *rq, unsigned int hctx_idx)
409{
410 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
411
412 page_frag_free(req->pdu);
413}
414
415static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
416 struct request *rq, unsigned int hctx_idx,
417 unsigned int numa_node)
418{
419 struct nvme_tcp_ctrl *ctrl = set->driver_data;
420 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
Keith Buschf4b9e6c2021-03-17 13:37:03 -0700421 struct nvme_tcp_cmd_pdu *pdu;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800422 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
423 struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
424 u8 hdgst = nvme_tcp_hdgst_len(queue);
425
426 req->pdu = page_frag_alloc(&queue->pf_cache,
427 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
428 GFP_KERNEL | __GFP_ZERO);
429 if (!req->pdu)
430 return -ENOMEM;
431
Keith Buschf4b9e6c2021-03-17 13:37:03 -0700432 pdu = req->pdu;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800433 req->queue = queue;
434 nvme_req(rq)->ctrl = &ctrl->ctrl;
Keith Buschf4b9e6c2021-03-17 13:37:03 -0700435 nvme_req(rq)->cmd = &pdu->cmd;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800436
437 return 0;
438}
439
440static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
441 unsigned int hctx_idx)
442{
443 struct nvme_tcp_ctrl *ctrl = data;
444 struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
445
446 hctx->driver_data = queue;
447 return 0;
448}
449
450static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
451 unsigned int hctx_idx)
452{
453 struct nvme_tcp_ctrl *ctrl = data;
454 struct nvme_tcp_queue *queue = &ctrl->queues[0];
455
456 hctx->driver_data = queue;
457 return 0;
458}
459
460static enum nvme_tcp_recv_state
461nvme_tcp_recv_state(struct nvme_tcp_queue *queue)
462{
463 return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU :
464 (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST :
465 NVME_TCP_RECV_DATA;
466}
467
468static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
469{
470 queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
471 nvme_tcp_hdgst_len(queue);
472 queue->pdu_offset = 0;
473 queue->data_remaining = -1;
474 queue->ddgst_remaining = 0;
475}
476
477static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
478{
479 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
480 return;
481
Sagi Grimberg236187c2020-07-28 13:16:36 -0700482 dev_warn(ctrl->device, "starting error recovery\n");
Nigel Kirkland97b25122020-02-10 16:01:45 -0800483 queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800484}
485
486static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
487 struct nvme_completion *cqe)
488{
489 struct request *rq;
490
491 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
492 if (!rq) {
493 dev_err(queue->ctrl->ctrl.device,
494 "queue %d tag 0x%x not found\n",
495 nvme_tcp_queue_id(queue), cqe->command_id);
496 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
497 return -EINVAL;
498 }
499
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200500 if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
Christoph Hellwigff029452020-06-11 08:44:52 +0200501 nvme_complete_rq(rq);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700502 queue->nr_cqe++;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800503
504 return 0;
505}
506
507static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
508 struct nvme_tcp_data_pdu *pdu)
509{
510 struct request *rq;
511
512 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
513 if (!rq) {
514 dev_err(queue->ctrl->ctrl.device,
515 "queue %d tag %#x not found\n",
516 nvme_tcp_queue_id(queue), pdu->command_id);
517 return -ENOENT;
518 }
519
520 if (!blk_rq_payload_bytes(rq)) {
521 dev_err(queue->ctrl->ctrl.device,
522 "queue %d tag %#x unexpected data\n",
523 nvme_tcp_queue_id(queue), rq->tag);
524 return -EIO;
525 }
526
527 queue->data_remaining = le32_to_cpu(pdu->data_length);
528
Sagi Grimberg602d6742019-03-13 18:55:10 +0100529 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS &&
530 unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) {
531 dev_err(queue->ctrl->ctrl.device,
532 "queue %d tag %#x SUCCESS set but not last PDU\n",
533 nvme_tcp_queue_id(queue), rq->tag);
534 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
535 return -EPROTO;
536 }
537
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800538 return 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800539}
540
541static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
542 struct nvme_tcp_rsp_pdu *pdu)
543{
544 struct nvme_completion *cqe = &pdu->cqe;
545 int ret = 0;
546
547 /*
548 * AEN requests are special as they don't time out and can
549 * survive any kind of queue freeze and often don't respond to
550 * aborts. We don't even bother to allocate a struct request
551 * for them but rather special case them here.
552 */
Israel Rukshin58a8df62019-10-13 19:57:31 +0300553 if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
554 cqe->command_id)))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800555 nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
556 &cqe->result);
557 else
558 ret = nvme_tcp_process_nvme_cqe(queue, cqe);
559
560 return ret;
561}
562
563static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
564 struct nvme_tcp_r2t_pdu *pdu)
565{
566 struct nvme_tcp_data_pdu *data = req->pdu;
567 struct nvme_tcp_queue *queue = req->queue;
568 struct request *rq = blk_mq_rq_from_pdu(req);
569 u8 hdgst = nvme_tcp_hdgst_len(queue);
570 u8 ddgst = nvme_tcp_ddgst_len(queue);
571
572 req->pdu_len = le32_to_cpu(pdu->r2t_length);
573 req->pdu_sent = 0;
574
Sagi Grimbergfd0823f2021-03-15 14:08:11 -0700575 if (unlikely(!req->pdu_len)) {
576 dev_err(queue->ctrl->ctrl.device,
577 "req %d r2t len is %u, probably a bug...\n",
578 rq->tag, req->pdu_len);
579 return -EPROTO;
580 }
581
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800582 if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
583 dev_err(queue->ctrl->ctrl.device,
584 "req %d r2t len %u exceeded data len %u (%zu sent)\n",
585 rq->tag, req->pdu_len, req->data_len,
586 req->data_sent);
587 return -EPROTO;
588 }
589
590 if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
591 dev_err(queue->ctrl->ctrl.device,
592 "req %d unexpected r2t offset %u (expected %zu)\n",
593 rq->tag, le32_to_cpu(pdu->r2t_offset),
594 req->data_sent);
595 return -EPROTO;
596 }
597
598 memset(data, 0, sizeof(*data));
599 data->hdr.type = nvme_tcp_h2c_data;
600 data->hdr.flags = NVME_TCP_F_DATA_LAST;
601 if (queue->hdr_digest)
602 data->hdr.flags |= NVME_TCP_F_HDGST;
603 if (queue->data_digest)
604 data->hdr.flags |= NVME_TCP_F_DDGST;
605 data->hdr.hlen = sizeof(*data);
606 data->hdr.pdo = data->hdr.hlen + hdgst;
607 data->hdr.plen =
608 cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
609 data->ttag = pdu->ttag;
610 data->command_id = rq->tag;
611 data->data_offset = cpu_to_le32(req->data_sent);
612 data->data_length = cpu_to_le32(req->pdu_len);
613 return 0;
614}
615
616static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
617 struct nvme_tcp_r2t_pdu *pdu)
618{
619 struct nvme_tcp_request *req;
620 struct request *rq;
621 int ret;
622
623 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
624 if (!rq) {
625 dev_err(queue->ctrl->ctrl.device,
626 "queue %d tag %#x not found\n",
627 nvme_tcp_queue_id(queue), pdu->command_id);
628 return -ENOENT;
629 }
630 req = blk_mq_rq_to_pdu(rq);
631
632 ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
633 if (unlikely(ret))
634 return ret;
635
636 req->state = NVME_TCP_SEND_H2C_PDU;
637 req->offset = 0;
638
Sagi Grimberg86f03482020-06-18 17:30:23 -0700639 nvme_tcp_queue_request(req, false, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800640
641 return 0;
642}
643
644static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
645 unsigned int *offset, size_t *len)
646{
647 struct nvme_tcp_hdr *hdr;
648 char *pdu = queue->pdu;
649 size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
650 int ret;
651
652 ret = skb_copy_bits(skb, *offset,
653 &pdu[queue->pdu_offset], rcv_len);
654 if (unlikely(ret))
655 return ret;
656
657 queue->pdu_remaining -= rcv_len;
658 queue->pdu_offset += rcv_len;
659 *offset += rcv_len;
660 *len -= rcv_len;
661 if (queue->pdu_remaining)
662 return 0;
663
664 hdr = queue->pdu;
665 if (queue->hdr_digest) {
666 ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
667 if (unlikely(ret))
668 return ret;
669 }
670
671
672 if (queue->data_digest) {
673 ret = nvme_tcp_check_ddgst(queue, queue->pdu);
674 if (unlikely(ret))
675 return ret;
676 }
677
678 switch (hdr->type) {
679 case nvme_tcp_c2h_data:
Sagi Grimberg6be18262019-07-19 12:46:46 -0700680 return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800681 case nvme_tcp_rsp:
682 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700683 return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800684 case nvme_tcp_r2t:
685 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700686 return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800687 default:
688 dev_err(queue->ctrl->ctrl.device,
689 "unsupported pdu type (%d)\n", hdr->type);
690 return -EINVAL;
691 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800692}
693
Christoph Hellwig988aef9e2019-03-15 08:41:04 +0100694static inline void nvme_tcp_end_request(struct request *rq, u16 status)
Sagi Grimberg602d6742019-03-13 18:55:10 +0100695{
696 union nvme_result res = {};
697
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200698 if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
Christoph Hellwigff029452020-06-11 08:44:52 +0200699 nvme_complete_rq(rq);
Sagi Grimberg602d6742019-03-13 18:55:10 +0100700}
701
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800702static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
703 unsigned int *offset, size_t *len)
704{
705 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
706 struct nvme_tcp_request *req;
707 struct request *rq;
708
709 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
710 if (!rq) {
711 dev_err(queue->ctrl->ctrl.device,
712 "queue %d tag %#x not found\n",
713 nvme_tcp_queue_id(queue), pdu->command_id);
714 return -ENOENT;
715 }
716 req = blk_mq_rq_to_pdu(rq);
717
718 while (true) {
719 int recv_len, ret;
720
721 recv_len = min_t(size_t, *len, queue->data_remaining);
722 if (!recv_len)
723 break;
724
725 if (!iov_iter_count(&req->iter)) {
726 req->curr_bio = req->curr_bio->bi_next;
727
728 /*
729 * If we don`t have any bios it means that controller
730 * sent more data than we requested, hence error
731 */
732 if (!req->curr_bio) {
733 dev_err(queue->ctrl->ctrl.device,
734 "queue %d no space in request %#x",
735 nvme_tcp_queue_id(queue), rq->tag);
736 nvme_tcp_init_recv_ctx(queue);
737 return -EIO;
738 }
739 nvme_tcp_init_iter(req, READ);
740 }
741
742 /* we can read only from what is left in this bio */
743 recv_len = min_t(size_t, recv_len,
744 iov_iter_count(&req->iter));
745
746 if (queue->data_digest)
747 ret = skb_copy_and_hash_datagram_iter(skb, *offset,
748 &req->iter, recv_len, queue->rcv_hash);
749 else
750 ret = skb_copy_datagram_iter(skb, *offset,
751 &req->iter, recv_len);
752 if (ret) {
753 dev_err(queue->ctrl->ctrl.device,
754 "queue %d failed to copy request %#x data",
755 nvme_tcp_queue_id(queue), rq->tag);
756 return ret;
757 }
758
759 *len -= recv_len;
760 *offset += recv_len;
761 queue->data_remaining -= recv_len;
762 }
763
764 if (!queue->data_remaining) {
765 if (queue->data_digest) {
766 nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
767 queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
768 } else {
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700769 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
Sagi Grimberg602d6742019-03-13 18:55:10 +0100770 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700771 queue->nr_cqe++;
772 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800773 nvme_tcp_init_recv_ctx(queue);
774 }
775 }
776
777 return 0;
778}
779
780static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
781 struct sk_buff *skb, unsigned int *offset, size_t *len)
782{
Sagi Grimberg602d6742019-03-13 18:55:10 +0100783 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800784 char *ddgst = (char *)&queue->recv_ddgst;
785 size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
786 off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
787 int ret;
788
789 ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
790 if (unlikely(ret))
791 return ret;
792
793 queue->ddgst_remaining -= recv_len;
794 *offset += recv_len;
795 *len -= recv_len;
796 if (queue->ddgst_remaining)
797 return 0;
798
799 if (queue->recv_ddgst != queue->exp_ddgst) {
800 dev_err(queue->ctrl->ctrl.device,
801 "data digest error: recv %#x expected %#x\n",
802 le32_to_cpu(queue->recv_ddgst),
803 le32_to_cpu(queue->exp_ddgst));
804 return -EIO;
805 }
806
Sagi Grimberg602d6742019-03-13 18:55:10 +0100807 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
808 struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
809 pdu->command_id);
810
811 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700812 queue->nr_cqe++;
Sagi Grimberg602d6742019-03-13 18:55:10 +0100813 }
814
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800815 nvme_tcp_init_recv_ctx(queue);
816 return 0;
817}
818
819static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
820 unsigned int offset, size_t len)
821{
822 struct nvme_tcp_queue *queue = desc->arg.data;
823 size_t consumed = len;
824 int result;
825
826 while (len) {
827 switch (nvme_tcp_recv_state(queue)) {
828 case NVME_TCP_RECV_PDU:
829 result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
830 break;
831 case NVME_TCP_RECV_DATA:
832 result = nvme_tcp_recv_data(queue, skb, &offset, &len);
833 break;
834 case NVME_TCP_RECV_DDGST:
835 result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
836 break;
837 default:
838 result = -EFAULT;
839 }
840 if (result) {
841 dev_err(queue->ctrl->ctrl.device,
842 "receive failed: %d\n", result);
843 queue->rd_enabled = false;
844 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
845 return result;
846 }
847 }
848
849 return consumed;
850}
851
852static void nvme_tcp_data_ready(struct sock *sk)
853{
854 struct nvme_tcp_queue *queue;
855
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700856 read_lock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800857 queue = sk->sk_user_data;
Sagi Grimberg72e5d752020-05-01 14:25:44 -0700858 if (likely(queue && queue->rd_enabled) &&
859 !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800860 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700861 read_unlock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800862}
863
864static void nvme_tcp_write_space(struct sock *sk)
865{
866 struct nvme_tcp_queue *queue;
867
868 read_lock_bh(&sk->sk_callback_lock);
869 queue = sk->sk_user_data;
870 if (likely(queue && sk_stream_is_writeable(sk))) {
871 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
872 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
873 }
874 read_unlock_bh(&sk->sk_callback_lock);
875}
876
877static void nvme_tcp_state_change(struct sock *sk)
878{
879 struct nvme_tcp_queue *queue;
880
Sagi Grimberg8b73b452021-03-21 00:08:48 -0700881 read_lock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800882 queue = sk->sk_user_data;
883 if (!queue)
884 goto done;
885
886 switch (sk->sk_state) {
887 case TCP_CLOSE:
888 case TCP_CLOSE_WAIT:
889 case TCP_LAST_ACK:
890 case TCP_FIN_WAIT1:
891 case TCP_FIN_WAIT2:
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800892 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
893 break;
894 default:
895 dev_info(queue->ctrl->ctrl.device,
896 "queue %d socket state %d\n",
897 nvme_tcp_queue_id(queue), sk->sk_state);
898 }
899
900 queue->state_change(sk);
901done:
Sagi Grimberg8b73b452021-03-21 00:08:48 -0700902 read_unlock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800903}
904
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700905static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
906{
907 return !list_empty(&queue->send_list) ||
908 !llist_empty(&queue->req_list) || queue->more_requests;
909}
910
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800911static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
912{
913 queue->request = NULL;
914}
915
916static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
917{
Sagi Grimberg16686012019-08-02 18:17:52 -0700918 nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800919}
920
921static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
922{
923 struct nvme_tcp_queue *queue = req->queue;
924
925 while (true) {
926 struct page *page = nvme_tcp_req_cur_page(req);
927 size_t offset = nvme_tcp_req_cur_offset(req);
928 size_t len = nvme_tcp_req_cur_length(req);
929 bool last = nvme_tcp_pdu_last_send(req, len);
930 int ret, flags = MSG_DONTWAIT;
931
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700932 if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800933 flags |= MSG_EOR;
934 else
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700935 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800936
Coly Li7d4194a2020-10-02 16:27:30 +0800937 if (sendpage_ok(page)) {
938 ret = kernel_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200939 flags);
940 } else {
Coly Li7d4194a2020-10-02 16:27:30 +0800941 ret = sock_no_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200942 flags);
943 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800944 if (ret <= 0)
945 return ret;
946
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800947 if (queue->data_digest)
948 nvme_tcp_ddgst_update(queue->snd_hash, page,
949 offset, ret);
950
951 /* fully successful last write*/
952 if (last && ret == len) {
953 if (queue->data_digest) {
954 nvme_tcp_ddgst_final(queue->snd_hash,
955 &req->ddgst);
956 req->state = NVME_TCP_SEND_DDGST;
957 req->offset = 0;
958 } else {
959 nvme_tcp_done_send_req(queue);
960 }
961 return 1;
962 }
Sagi Grimberg825619b2021-05-17 14:07:45 -0700963 nvme_tcp_advance_req(req, ret);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800964 }
965 return -EAGAIN;
966}
967
968static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
969{
970 struct nvme_tcp_queue *queue = req->queue;
971 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
972 bool inline_data = nvme_tcp_has_inline_data(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800973 u8 hdgst = nvme_tcp_hdgst_len(queue);
974 int len = sizeof(*pdu) + hdgst - req->offset;
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700975 int flags = MSG_DONTWAIT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800976 int ret;
977
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700978 if (inline_data || nvme_tcp_queue_more(queue))
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700979 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
980 else
981 flags |= MSG_EOR;
982
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800983 if (queue->hdr_digest && !req->offset)
984 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
985
986 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
987 offset_in_page(pdu) + req->offset, len, flags);
988 if (unlikely(ret <= 0))
989 return ret;
990
991 len -= ret;
992 if (!len) {
993 if (inline_data) {
994 req->state = NVME_TCP_SEND_DATA;
995 if (queue->data_digest)
996 crypto_ahash_init(queue->snd_hash);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800997 } else {
998 nvme_tcp_done_send_req(queue);
999 }
1000 return 1;
1001 }
1002 req->offset += ret;
1003
1004 return -EAGAIN;
1005}
1006
1007static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
1008{
1009 struct nvme_tcp_queue *queue = req->queue;
1010 struct nvme_tcp_data_pdu *pdu = req->pdu;
1011 u8 hdgst = nvme_tcp_hdgst_len(queue);
1012 int len = sizeof(*pdu) - req->offset + hdgst;
1013 int ret;
1014
1015 if (queue->hdr_digest && !req->offset)
1016 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
1017
1018 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
1019 offset_in_page(pdu) + req->offset, len,
Sagi Grimberg5bb052d2020-05-04 22:20:01 -07001020 MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001021 if (unlikely(ret <= 0))
1022 return ret;
1023
1024 len -= ret;
1025 if (!len) {
1026 req->state = NVME_TCP_SEND_DATA;
1027 if (queue->data_digest)
1028 crypto_ahash_init(queue->snd_hash);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001029 return 1;
1030 }
1031 req->offset += ret;
1032
1033 return -EAGAIN;
1034}
1035
1036static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
1037{
1038 struct nvme_tcp_queue *queue = req->queue;
1039 int ret;
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001040 struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001041 struct kvec iov = {
1042 .iov_base = &req->ddgst + req->offset,
1043 .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
1044 };
1045
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001046 if (nvme_tcp_queue_more(queue))
1047 msg.msg_flags |= MSG_MORE;
1048 else
1049 msg.msg_flags |= MSG_EOR;
1050
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001051 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1052 if (unlikely(ret <= 0))
1053 return ret;
1054
1055 if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
1056 nvme_tcp_done_send_req(queue);
1057 return 1;
1058 }
1059
1060 req->offset += ret;
1061 return -EAGAIN;
1062}
1063
1064static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
1065{
1066 struct nvme_tcp_request *req;
1067 int ret = 1;
1068
1069 if (!queue->request) {
1070 queue->request = nvme_tcp_fetch_request(queue);
1071 if (!queue->request)
1072 return 0;
1073 }
1074 req = queue->request;
1075
1076 if (req->state == NVME_TCP_SEND_CMD_PDU) {
1077 ret = nvme_tcp_try_send_cmd_pdu(req);
1078 if (ret <= 0)
1079 goto done;
1080 if (!nvme_tcp_has_inline_data(req))
1081 return ret;
1082 }
1083
1084 if (req->state == NVME_TCP_SEND_H2C_PDU) {
1085 ret = nvme_tcp_try_send_data_pdu(req);
1086 if (ret <= 0)
1087 goto done;
1088 }
1089
1090 if (req->state == NVME_TCP_SEND_DATA) {
1091 ret = nvme_tcp_try_send_data(req);
1092 if (ret <= 0)
1093 goto done;
1094 }
1095
1096 if (req->state == NVME_TCP_SEND_DDGST)
1097 ret = nvme_tcp_try_send_ddgst(req);
1098done:
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001099 if (ret == -EAGAIN) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001100 ret = 0;
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001101 } else if (ret < 0) {
1102 dev_err(queue->ctrl->ctrl.device,
1103 "failed to send request %d\n", ret);
1104 if (ret != -EPIPE && ret != -ECONNRESET)
1105 nvme_tcp_fail_request(queue->request);
1106 nvme_tcp_done_send_req(queue);
1107 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001108 return ret;
1109}
1110
1111static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
1112{
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301113 struct socket *sock = queue->sock;
1114 struct sock *sk = sock->sk;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001115 read_descriptor_t rd_desc;
1116 int consumed;
1117
1118 rd_desc.arg.data = queue;
1119 rd_desc.count = 1;
1120 lock_sock(sk);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001121 queue->nr_cqe = 0;
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301122 consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001123 release_sock(sk);
1124 return consumed;
1125}
1126
1127static void nvme_tcp_io_work(struct work_struct *w)
1128{
1129 struct nvme_tcp_queue *queue =
1130 container_of(w, struct nvme_tcp_queue, io_work);
Wunderlich, Markddef2952019-09-18 23:36:37 +00001131 unsigned long deadline = jiffies + msecs_to_jiffies(1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001132
1133 do {
1134 bool pending = false;
1135 int result;
1136
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001137 if (mutex_trylock(&queue->send_mutex)) {
1138 result = nvme_tcp_try_send(queue);
1139 mutex_unlock(&queue->send_mutex);
1140 if (result > 0)
1141 pending = true;
1142 else if (unlikely(result < 0))
1143 break;
Keith Buscha0fdd142021-05-17 15:36:43 -07001144 } else
1145 pending = !llist_empty(&queue->req_list);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001146
1147 result = nvme_tcp_try_recv(queue);
1148 if (result > 0)
1149 pending = true;
Sagi Grimberg761ad262020-02-25 16:43:24 -08001150 else if (unlikely(result < 0))
Sagi Grimberg39d06079a2020-03-31 22:44:23 -07001151 return;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001152
1153 if (!pending)
1154 return;
1155
Wunderlich, Markddef2952019-09-18 23:36:37 +00001156 } while (!time_after(jiffies, deadline)); /* quota is exhausted */
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001157
1158 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
1159}
1160
1161static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
1162{
1163 struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
1164
1165 ahash_request_free(queue->rcv_hash);
1166 ahash_request_free(queue->snd_hash);
1167 crypto_free_ahash(tfm);
1168}
1169
1170static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
1171{
1172 struct crypto_ahash *tfm;
1173
1174 tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
1175 if (IS_ERR(tfm))
1176 return PTR_ERR(tfm);
1177
1178 queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1179 if (!queue->snd_hash)
1180 goto free_tfm;
1181 ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
1182
1183 queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1184 if (!queue->rcv_hash)
1185 goto free_snd_hash;
1186 ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
1187
1188 return 0;
1189free_snd_hash:
1190 ahash_request_free(queue->snd_hash);
1191free_tfm:
1192 crypto_free_ahash(tfm);
1193 return -ENOMEM;
1194}
1195
1196static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
1197{
1198 struct nvme_tcp_request *async = &ctrl->async_req;
1199
1200 page_frag_free(async->pdu);
1201}
1202
1203static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
1204{
1205 struct nvme_tcp_queue *queue = &ctrl->queues[0];
1206 struct nvme_tcp_request *async = &ctrl->async_req;
1207 u8 hdgst = nvme_tcp_hdgst_len(queue);
1208
1209 async->pdu = page_frag_alloc(&queue->pf_cache,
1210 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
1211 GFP_KERNEL | __GFP_ZERO);
1212 if (!async->pdu)
1213 return -ENOMEM;
1214
1215 async->queue = &ctrl->queues[0];
1216 return 0;
1217}
1218
1219static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
1220{
1221 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1222 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1223
1224 if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
1225 return;
1226
1227 if (queue->hdr_digest || queue->data_digest)
1228 nvme_tcp_free_crypto(queue);
1229
1230 sock_release(queue->sock);
1231 kfree(queue->pdu);
Chao Leng9ebbfe42021-01-14 17:09:26 +08001232 mutex_destroy(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001233}
1234
1235static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
1236{
1237 struct nvme_tcp_icreq_pdu *icreq;
1238 struct nvme_tcp_icresp_pdu *icresp;
1239 struct msghdr msg = {};
1240 struct kvec iov;
1241 bool ctrl_hdgst, ctrl_ddgst;
1242 int ret;
1243
1244 icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
1245 if (!icreq)
1246 return -ENOMEM;
1247
1248 icresp = kzalloc(sizeof(*icresp), GFP_KERNEL);
1249 if (!icresp) {
1250 ret = -ENOMEM;
1251 goto free_icreq;
1252 }
1253
1254 icreq->hdr.type = nvme_tcp_icreq;
1255 icreq->hdr.hlen = sizeof(*icreq);
1256 icreq->hdr.pdo = 0;
1257 icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen);
1258 icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
1259 icreq->maxr2t = 0; /* single inflight r2t supported */
1260 icreq->hpda = 0; /* no alignment constraint */
1261 if (queue->hdr_digest)
1262 icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
1263 if (queue->data_digest)
1264 icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
1265
1266 iov.iov_base = icreq;
1267 iov.iov_len = sizeof(*icreq);
1268 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1269 if (ret < 0)
1270 goto free_icresp;
1271
1272 memset(&msg, 0, sizeof(msg));
1273 iov.iov_base = icresp;
1274 iov.iov_len = sizeof(*icresp);
1275 ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
1276 iov.iov_len, msg.msg_flags);
1277 if (ret < 0)
1278 goto free_icresp;
1279
1280 ret = -EINVAL;
1281 if (icresp->hdr.type != nvme_tcp_icresp) {
1282 pr_err("queue %d: bad type returned %d\n",
1283 nvme_tcp_queue_id(queue), icresp->hdr.type);
1284 goto free_icresp;
1285 }
1286
1287 if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) {
1288 pr_err("queue %d: bad pdu length returned %d\n",
1289 nvme_tcp_queue_id(queue), icresp->hdr.plen);
1290 goto free_icresp;
1291 }
1292
1293 if (icresp->pfv != NVME_TCP_PFV_1_0) {
1294 pr_err("queue %d: bad pfv returned %d\n",
1295 nvme_tcp_queue_id(queue), icresp->pfv);
1296 goto free_icresp;
1297 }
1298
1299 ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE);
1300 if ((queue->data_digest && !ctrl_ddgst) ||
1301 (!queue->data_digest && ctrl_ddgst)) {
1302 pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n",
1303 nvme_tcp_queue_id(queue),
1304 queue->data_digest ? "enabled" : "disabled",
1305 ctrl_ddgst ? "enabled" : "disabled");
1306 goto free_icresp;
1307 }
1308
1309 ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE);
1310 if ((queue->hdr_digest && !ctrl_hdgst) ||
1311 (!queue->hdr_digest && ctrl_hdgst)) {
1312 pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n",
1313 nvme_tcp_queue_id(queue),
1314 queue->hdr_digest ? "enabled" : "disabled",
1315 ctrl_hdgst ? "enabled" : "disabled");
1316 goto free_icresp;
1317 }
1318
1319 if (icresp->cpda != 0) {
1320 pr_err("queue %d: unsupported cpda returned %d\n",
1321 nvme_tcp_queue_id(queue), icresp->cpda);
1322 goto free_icresp;
1323 }
1324
1325 ret = 0;
1326free_icresp:
1327 kfree(icresp);
1328free_icreq:
1329 kfree(icreq);
1330 return ret;
1331}
1332
Sagi Grimberg40510a62020-02-25 15:53:09 -08001333static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
1334{
1335 return nvme_tcp_queue_id(queue) == 0;
1336}
1337
1338static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
1339{
1340 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1341 int qid = nvme_tcp_queue_id(queue);
1342
1343 return !nvme_tcp_admin_queue(queue) &&
1344 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
1345}
1346
1347static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
1348{
1349 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1350 int qid = nvme_tcp_queue_id(queue);
1351
1352 return !nvme_tcp_admin_queue(queue) &&
1353 !nvme_tcp_default_queue(queue) &&
1354 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1355 ctrl->io_queues[HCTX_TYPE_READ];
1356}
1357
1358static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
1359{
1360 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1361 int qid = nvme_tcp_queue_id(queue);
1362
1363 return !nvme_tcp_admin_queue(queue) &&
1364 !nvme_tcp_default_queue(queue) &&
1365 !nvme_tcp_read_queue(queue) &&
1366 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1367 ctrl->io_queues[HCTX_TYPE_READ] +
1368 ctrl->io_queues[HCTX_TYPE_POLL];
1369}
1370
1371static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
1372{
1373 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1374 int qid = nvme_tcp_queue_id(queue);
1375 int n = 0;
1376
1377 if (nvme_tcp_default_queue(queue))
1378 n = qid - 1;
1379 else if (nvme_tcp_read_queue(queue))
1380 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
1381 else if (nvme_tcp_poll_queue(queue))
1382 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
1383 ctrl->io_queues[HCTX_TYPE_READ] - 1;
1384 queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
1385}
1386
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001387static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
1388 int qid, size_t queue_size)
1389{
1390 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1391 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001392 int ret, rcv_pdu_size;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001393
Chao Leng9ebbfe42021-01-14 17:09:26 +08001394 mutex_init(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001395 queue->ctrl = ctrl;
Sagi Grimberg15ec9282020-06-18 17:30:22 -07001396 init_llist_head(&queue->req_list);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001397 INIT_LIST_HEAD(&queue->send_list);
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001398 mutex_init(&queue->send_mutex);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001399 INIT_WORK(&queue->io_work, nvme_tcp_io_work);
1400 queue->queue_size = queue_size;
1401
1402 if (qid > 0)
Israel Rukshin9924b032019-08-18 12:08:53 +03001403 queue->cmnd_capsule_len = nctrl->ioccsz * 16;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001404 else
1405 queue->cmnd_capsule_len = sizeof(struct nvme_command) +
1406 NVME_TCP_ADMIN_CCSZ;
1407
1408 ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
1409 IPPROTO_TCP, &queue->sock);
1410 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001411 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001412 "failed to create socket: %d\n", ret);
Chao Leng9ebbfe42021-01-14 17:09:26 +08001413 goto err_destroy_mutex;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001414 }
1415
1416 /* Single syn retry */
Christoph Hellwig557eadf2020-05-28 07:12:21 +02001417 tcp_sock_set_syncnt(queue->sock->sk, 1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001418
1419 /* Set TCP no delay */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001420 tcp_sock_set_nodelay(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001421
1422 /*
1423 * Cleanup whatever is sitting in the TCP transmit queue on socket
1424 * close. This is done to prevent stale data from being sent should
1425 * the network connection be restored before TCP times out.
1426 */
Christoph Hellwigc4335942020-05-28 07:12:10 +02001427 sock_no_linger(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001428
Christoph Hellwig6e434962020-05-28 07:12:11 +02001429 if (so_priority > 0)
1430 sock_set_priority(queue->sock->sk, so_priority);
Wunderlich, Mark9912ade2020-01-16 00:46:12 +00001431
Israel Rukshinbb139852019-08-18 12:08:54 +03001432 /* Set socket type of service */
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001433 if (nctrl->opts->tos >= 0)
1434 ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
Israel Rukshinbb139852019-08-18 12:08:54 +03001435
Sagi Grimbergadc99fd2020-07-23 16:42:26 -07001436 /* Set 10 seconds timeout for icresp recvmsg */
1437 queue->sock->sk->sk_rcvtimeo = 10 * HZ;
1438
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001439 queue->sock->sk->sk_allocation = GFP_ATOMIC;
Sagi Grimberg40510a62020-02-25 15:53:09 -08001440 nvme_tcp_set_queue_io_cpu(queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001441 queue->request = NULL;
1442 queue->data_remaining = 0;
1443 queue->ddgst_remaining = 0;
1444 queue->pdu_remaining = 0;
1445 queue->pdu_offset = 0;
1446 sk_set_memalloc(queue->sock->sk);
1447
Israel Rukshin9924b032019-08-18 12:08:53 +03001448 if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001449 ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
1450 sizeof(ctrl->src_addr));
1451 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001452 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001453 "failed to bind queue %d socket %d\n",
1454 qid, ret);
1455 goto err_sock;
1456 }
1457 }
1458
Martin Belanger3ede8f72021-05-20 15:09:34 -04001459 if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) {
1460 char *iface = nctrl->opts->host_iface;
1461 sockptr_t optval = KERNEL_SOCKPTR(iface);
1462
1463 ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
1464 optval, strlen(iface));
1465 if (ret) {
1466 dev_err(nctrl->device,
1467 "failed to bind to interface %s queue %d err %d\n",
1468 iface, qid, ret);
1469 goto err_sock;
1470 }
1471 }
1472
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001473 queue->hdr_digest = nctrl->opts->hdr_digest;
1474 queue->data_digest = nctrl->opts->data_digest;
1475 if (queue->hdr_digest || queue->data_digest) {
1476 ret = nvme_tcp_alloc_crypto(queue);
1477 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001478 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001479 "failed to allocate queue %d crypto\n", qid);
1480 goto err_sock;
1481 }
1482 }
1483
1484 rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
1485 nvme_tcp_hdgst_len(queue);
1486 queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
1487 if (!queue->pdu) {
1488 ret = -ENOMEM;
1489 goto err_crypto;
1490 }
1491
Israel Rukshin9924b032019-08-18 12:08:53 +03001492 dev_dbg(nctrl->device, "connecting queue %d\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001493 nvme_tcp_queue_id(queue));
1494
1495 ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
1496 sizeof(ctrl->addr), 0);
1497 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001498 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001499 "failed to connect socket: %d\n", ret);
1500 goto err_rcv_pdu;
1501 }
1502
1503 ret = nvme_tcp_init_connection(queue);
1504 if (ret)
1505 goto err_init_connect;
1506
1507 queue->rd_enabled = true;
1508 set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
1509 nvme_tcp_init_recv_ctx(queue);
1510
1511 write_lock_bh(&queue->sock->sk->sk_callback_lock);
1512 queue->sock->sk->sk_user_data = queue;
1513 queue->state_change = queue->sock->sk->sk_state_change;
1514 queue->data_ready = queue->sock->sk->sk_data_ready;
1515 queue->write_space = queue->sock->sk->sk_write_space;
1516 queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
1517 queue->sock->sk->sk_state_change = nvme_tcp_state_change;
1518 queue->sock->sk->sk_write_space = nvme_tcp_write_space;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001519#ifdef CONFIG_NET_RX_BUSY_POLL
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001520 queue->sock->sk->sk_ll_usec = 1;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001521#endif
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001522 write_unlock_bh(&queue->sock->sk->sk_callback_lock);
1523
1524 return 0;
1525
1526err_init_connect:
1527 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1528err_rcv_pdu:
1529 kfree(queue->pdu);
1530err_crypto:
1531 if (queue->hdr_digest || queue->data_digest)
1532 nvme_tcp_free_crypto(queue);
1533err_sock:
1534 sock_release(queue->sock);
1535 queue->sock = NULL;
Chao Leng9ebbfe42021-01-14 17:09:26 +08001536err_destroy_mutex:
1537 mutex_destroy(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001538 return ret;
1539}
1540
1541static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
1542{
1543 struct socket *sock = queue->sock;
1544
1545 write_lock_bh(&sock->sk->sk_callback_lock);
1546 sock->sk->sk_user_data = NULL;
1547 sock->sk->sk_data_ready = queue->data_ready;
1548 sock->sk->sk_state_change = queue->state_change;
1549 sock->sk->sk_write_space = queue->write_space;
1550 write_unlock_bh(&sock->sk->sk_callback_lock);
1551}
1552
1553static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
1554{
1555 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1556 nvme_tcp_restore_sock_calls(queue);
1557 cancel_work_sync(&queue->io_work);
1558}
1559
1560static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
1561{
1562 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1563 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1564
Chao Leng9ebbfe42021-01-14 17:09:26 +08001565 mutex_lock(&queue->queue_lock);
1566 if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
1567 __nvme_tcp_stop_queue(queue);
1568 mutex_unlock(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001569}
1570
1571static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
1572{
1573 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1574 int ret;
1575
1576 if (idx)
Sagi Grimberg26c68222018-12-14 11:06:08 -08001577 ret = nvmf_connect_io_queue(nctrl, idx, false);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001578 else
1579 ret = nvmf_connect_admin_queue(nctrl);
1580
1581 if (!ret) {
1582 set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
1583 } else {
Sagi Grimbergf34e2582019-04-29 16:25:48 -07001584 if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
1585 __nvme_tcp_stop_queue(&ctrl->queues[idx]);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001586 dev_err(nctrl->device,
1587 "failed to connect queue: %d ret=%d\n", idx, ret);
1588 }
1589 return ret;
1590}
1591
1592static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
1593 bool admin)
1594{
1595 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1596 struct blk_mq_tag_set *set;
1597 int ret;
1598
1599 if (admin) {
1600 set = &ctrl->admin_tag_set;
1601 memset(set, 0, sizeof(*set));
1602 set->ops = &nvme_tcp_admin_mq_ops;
1603 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
Christoph Hellwiged01fee2021-03-03 13:28:22 +01001604 set->reserved_tags = NVMF_RESERVED_TAGS;
Max Gurtovoy610c8232020-06-16 12:34:24 +03001605 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001606 set->flags = BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001607 set->cmd_size = sizeof(struct nvme_tcp_request);
1608 set->driver_data = ctrl;
1609 set->nr_hw_queues = 1;
Chaitanya Kulkarnidc96f932020-11-09 16:33:45 -08001610 set->timeout = NVME_ADMIN_TIMEOUT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001611 } else {
1612 set = &ctrl->tag_set;
1613 memset(set, 0, sizeof(*set));
1614 set->ops = &nvme_tcp_mq_ops;
1615 set->queue_depth = nctrl->sqsize + 1;
Christoph Hellwiged01fee2021-03-03 13:28:22 +01001616 set->reserved_tags = NVMF_RESERVED_TAGS;
Max Gurtovoy610c8232020-06-16 12:34:24 +03001617 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001618 set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001619 set->cmd_size = sizeof(struct nvme_tcp_request);
1620 set->driver_data = ctrl;
1621 set->nr_hw_queues = nctrl->queue_count - 1;
1622 set->timeout = NVME_IO_TIMEOUT;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001623 set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001624 }
1625
1626 ret = blk_mq_alloc_tag_set(set);
1627 if (ret)
1628 return ERR_PTR(ret);
1629
1630 return set;
1631}
1632
1633static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
1634{
1635 if (to_tcp_ctrl(ctrl)->async_req.pdu) {
David Milburnceb1e082020-09-02 17:42:53 -05001636 cancel_work_sync(&ctrl->async_event_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001637 nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
1638 to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
1639 }
1640
1641 nvme_tcp_free_queue(ctrl, 0);
1642}
1643
1644static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
1645{
1646 int i;
1647
1648 for (i = 1; i < ctrl->queue_count; i++)
1649 nvme_tcp_free_queue(ctrl, i);
1650}
1651
1652static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
1653{
1654 int i;
1655
1656 for (i = 1; i < ctrl->queue_count; i++)
1657 nvme_tcp_stop_queue(ctrl, i);
1658}
1659
1660static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
1661{
1662 int i, ret = 0;
1663
1664 for (i = 1; i < ctrl->queue_count; i++) {
1665 ret = nvme_tcp_start_queue(ctrl, i);
1666 if (ret)
1667 goto out_stop_queues;
1668 }
1669
1670 return 0;
1671
1672out_stop_queues:
1673 for (i--; i >= 1; i--)
1674 nvme_tcp_stop_queue(ctrl, i);
1675 return ret;
1676}
1677
1678static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
1679{
1680 int ret;
1681
1682 ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
1683 if (ret)
1684 return ret;
1685
1686 ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
1687 if (ret)
1688 goto out_free_queue;
1689
1690 return 0;
1691
1692out_free_queue:
1693 nvme_tcp_free_queue(ctrl, 0);
1694 return ret;
1695}
1696
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001697static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001698{
1699 int i, ret;
1700
1701 for (i = 1; i < ctrl->queue_count; i++) {
1702 ret = nvme_tcp_alloc_queue(ctrl, i,
1703 ctrl->sqsize + 1);
1704 if (ret)
1705 goto out_free_queues;
1706 }
1707
1708 return 0;
1709
1710out_free_queues:
1711 for (i--; i >= 1; i--)
1712 nvme_tcp_free_queue(ctrl, i);
1713
1714 return ret;
1715}
1716
1717static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
1718{
Sagi Grimberg873946f2018-12-11 23:38:57 -08001719 unsigned int nr_io_queues;
1720
1721 nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
1722 nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001723 nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
Sagi Grimberg873946f2018-12-11 23:38:57 -08001724
1725 return nr_io_queues;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001726}
1727
Sagi Grimberg64861992019-05-28 22:49:05 -07001728static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
1729 unsigned int nr_io_queues)
1730{
1731 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1732 struct nvmf_ctrl_options *opts = nctrl->opts;
1733
1734 if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
1735 /*
1736 * separate read/write queues
1737 * hand out dedicated default queues only after we have
1738 * sufficient read queues.
1739 */
1740 ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
1741 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
1742 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1743 min(opts->nr_write_queues, nr_io_queues);
1744 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1745 } else {
1746 /*
1747 * shared read/write queues
1748 * either no write queues were requested, or we don't have
1749 * sufficient queue count to have dedicated default queues.
1750 */
1751 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1752 min(opts->nr_io_queues, nr_io_queues);
1753 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1754 }
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001755
1756 if (opts->nr_poll_queues && nr_io_queues) {
1757 /* map dedicated poll queues only if we have queues left */
1758 ctrl->io_queues[HCTX_TYPE_POLL] =
1759 min(opts->nr_poll_queues, nr_io_queues);
1760 }
Sagi Grimberg64861992019-05-28 22:49:05 -07001761}
1762
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001763static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001764{
1765 unsigned int nr_io_queues;
1766 int ret;
1767
1768 nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
1769 ret = nvme_set_queue_count(ctrl, &nr_io_queues);
1770 if (ret)
1771 return ret;
1772
1773 ctrl->queue_count = nr_io_queues + 1;
Sagi Grimberg72f57242021-03-15 14:04:26 -07001774 if (ctrl->queue_count < 2) {
1775 dev_err(ctrl->device,
1776 "unable to set any I/O queues\n");
1777 return -ENOMEM;
1778 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001779
1780 dev_info(ctrl->device,
1781 "creating %d I/O queues.\n", nr_io_queues);
1782
Sagi Grimberg64861992019-05-28 22:49:05 -07001783 nvme_tcp_set_io_queues(ctrl, nr_io_queues);
1784
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001785 return __nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001786}
1787
1788static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
1789{
1790 nvme_tcp_stop_io_queues(ctrl);
1791 if (remove) {
Sagi Grimberge85037a2018-12-31 23:58:30 -08001792 blk_cleanup_queue(ctrl->connect_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001793 blk_mq_free_tag_set(ctrl->tagset);
1794 }
1795 nvme_tcp_free_io_queues(ctrl);
1796}
1797
1798static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
1799{
1800 int ret;
1801
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001802 ret = nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001803 if (ret)
1804 return ret;
1805
1806 if (new) {
1807 ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
1808 if (IS_ERR(ctrl->tagset)) {
1809 ret = PTR_ERR(ctrl->tagset);
1810 goto out_free_io_queues;
1811 }
1812
Sagi Grimberge85037a2018-12-31 23:58:30 -08001813 ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
1814 if (IS_ERR(ctrl->connect_q)) {
1815 ret = PTR_ERR(ctrl->connect_q);
1816 goto out_free_tag_set;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001817 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001818 }
1819
1820 ret = nvme_tcp_start_io_queues(ctrl);
1821 if (ret)
1822 goto out_cleanup_connect_q;
1823
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001824 if (!new) {
1825 nvme_start_queues(ctrl);
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001826 if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
1827 /*
1828 * If we timed out waiting for freeze we are likely to
1829 * be stuck. Fail the controller initialization just
1830 * to be safe.
1831 */
1832 ret = -ENODEV;
1833 goto out_wait_freeze_timed_out;
1834 }
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001835 blk_mq_update_nr_hw_queues(ctrl->tagset,
1836 ctrl->queue_count - 1);
1837 nvme_unfreeze(ctrl);
1838 }
1839
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001840 return 0;
1841
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001842out_wait_freeze_timed_out:
1843 nvme_stop_queues(ctrl);
Chao Leng70a99572021-01-21 11:32:38 +08001844 nvme_sync_io_queues(ctrl);
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001845 nvme_tcp_stop_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001846out_cleanup_connect_q:
Chao Leng70a99572021-01-21 11:32:38 +08001847 nvme_cancel_tagset(ctrl);
Sagi Grimberge85037a2018-12-31 23:58:30 -08001848 if (new)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001849 blk_cleanup_queue(ctrl->connect_q);
1850out_free_tag_set:
1851 if (new)
1852 blk_mq_free_tag_set(ctrl->tagset);
1853out_free_io_queues:
1854 nvme_tcp_free_io_queues(ctrl);
1855 return ret;
1856}
1857
1858static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
1859{
1860 nvme_tcp_stop_queue(ctrl, 0);
1861 if (remove) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001862 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001863 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001864 blk_mq_free_tag_set(ctrl->admin_tagset);
1865 }
1866 nvme_tcp_free_admin_queue(ctrl);
1867}
1868
1869static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
1870{
1871 int error;
1872
1873 error = nvme_tcp_alloc_admin_queue(ctrl);
1874 if (error)
1875 return error;
1876
1877 if (new) {
1878 ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
1879 if (IS_ERR(ctrl->admin_tagset)) {
1880 error = PTR_ERR(ctrl->admin_tagset);
1881 goto out_free_queue;
1882 }
1883
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001884 ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
1885 if (IS_ERR(ctrl->fabrics_q)) {
1886 error = PTR_ERR(ctrl->fabrics_q);
1887 goto out_free_tagset;
1888 }
1889
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001890 ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
1891 if (IS_ERR(ctrl->admin_q)) {
1892 error = PTR_ERR(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001893 goto out_cleanup_fabrics_q;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001894 }
1895 }
1896
1897 error = nvme_tcp_start_queue(ctrl, 0);
1898 if (error)
1899 goto out_cleanup_queue;
1900
Sagi Grimbergc0f2f452019-07-22 17:06:53 -07001901 error = nvme_enable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001902 if (error)
1903 goto out_stop_queue;
1904
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001905 blk_mq_unquiesce_queue(ctrl->admin_q);
1906
Chaitanya Kulkarnif21c47692021-02-28 18:06:04 -08001907 error = nvme_init_ctrl_finish(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001908 if (error)
Chao Leng70a99572021-01-21 11:32:38 +08001909 goto out_quiesce_queue;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001910
1911 return 0;
1912
Chao Leng70a99572021-01-21 11:32:38 +08001913out_quiesce_queue:
1914 blk_mq_quiesce_queue(ctrl->admin_q);
1915 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001916out_stop_queue:
1917 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng70a99572021-01-21 11:32:38 +08001918 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001919out_cleanup_queue:
1920 if (new)
1921 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001922out_cleanup_fabrics_q:
1923 if (new)
1924 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001925out_free_tagset:
1926 if (new)
1927 blk_mq_free_tag_set(ctrl->admin_tagset);
1928out_free_queue:
1929 nvme_tcp_free_admin_queue(ctrl);
1930 return error;
1931}
1932
1933static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
1934 bool remove)
1935{
1936 blk_mq_quiesce_queue(ctrl->admin_q);
Chao Lengd6f66212020-10-22 10:15:15 +08001937 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001938 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng563c8152021-01-21 11:32:40 +08001939 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001940 if (remove)
1941 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001942 nvme_tcp_destroy_admin_queue(ctrl, remove);
1943}
1944
1945static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
1946 bool remove)
1947{
1948 if (ctrl->queue_count <= 1)
Chao Lengd6f66212020-10-22 10:15:15 +08001949 return;
Sagi Grimbergd4d61472020-08-05 18:13:48 -07001950 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001951 nvme_start_freeze(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001952 nvme_stop_queues(ctrl);
Chao Lengd6f66212020-10-22 10:15:15 +08001953 nvme_sync_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001954 nvme_tcp_stop_io_queues(ctrl);
Chao Leng563c8152021-01-21 11:32:40 +08001955 nvme_cancel_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001956 if (remove)
1957 nvme_start_queues(ctrl);
1958 nvme_tcp_destroy_io_queues(ctrl, remove);
1959}
1960
1961static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
1962{
1963 /* If we are resetting/deleting then do nothing */
1964 if (ctrl->state != NVME_CTRL_CONNECTING) {
1965 WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
1966 ctrl->state == NVME_CTRL_LIVE);
1967 return;
1968 }
1969
1970 if (nvmf_should_reconnect(ctrl)) {
1971 dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
1972 ctrl->opts->reconnect_delay);
1973 queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
1974 ctrl->opts->reconnect_delay * HZ);
1975 } else {
1976 dev_info(ctrl->device, "Removing controller...\n");
1977 nvme_delete_ctrl(ctrl);
1978 }
1979}
1980
1981static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
1982{
1983 struct nvmf_ctrl_options *opts = ctrl->opts;
Colin Ian King312910f2019-09-05 15:34:35 +01001984 int ret;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001985
1986 ret = nvme_tcp_configure_admin_queue(ctrl, new);
1987 if (ret)
1988 return ret;
1989
1990 if (ctrl->icdoff) {
Dan Carpenter522af602021-06-05 15:48:16 +03001991 ret = -EOPNOTSUPP;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001992 dev_err(ctrl->device, "icdoff is not supported!\n");
1993 goto destroy_admin;
1994 }
1995
Max Gurtovoy73ffcef2021-03-30 23:01:19 +00001996 if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) {
Dan Carpenter522af602021-06-05 15:48:16 +03001997 ret = -EOPNOTSUPP;
Max Gurtovoy73ffcef2021-03-30 23:01:19 +00001998 dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
1999 goto destroy_admin;
2000 }
2001
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002002 if (opts->queue_size > ctrl->sqsize + 1)
2003 dev_warn(ctrl->device,
2004 "queue_size %zu > ctrl sqsize %u, clamping down\n",
2005 opts->queue_size, ctrl->sqsize + 1);
2006
2007 if (ctrl->sqsize + 1 > ctrl->maxcmd) {
2008 dev_warn(ctrl->device,
2009 "sqsize %u > ctrl maxcmd %u, clamping down\n",
2010 ctrl->sqsize + 1, ctrl->maxcmd);
2011 ctrl->sqsize = ctrl->maxcmd - 1;
2012 }
2013
2014 if (ctrl->queue_count > 1) {
2015 ret = nvme_tcp_configure_io_queues(ctrl, new);
2016 if (ret)
2017 goto destroy_admin;
2018 }
2019
2020 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
Israel Rukshinbea54ef2020-03-24 17:29:45 +02002021 /*
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002022 * state change failure is ok if we started ctrl delete,
Israel Rukshinbea54ef2020-03-24 17:29:45 +02002023 * unless we're during creation of a new controller to
2024 * avoid races with teardown flow.
2025 */
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002026 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2027 ctrl->state != NVME_CTRL_DELETING_NOIO);
Israel Rukshinbea54ef2020-03-24 17:29:45 +02002028 WARN_ON_ONCE(new);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002029 ret = -EINVAL;
2030 goto destroy_io;
2031 }
2032
2033 nvme_start_ctrl(ctrl);
2034 return 0;
2035
2036destroy_io:
Chao Leng70a99572021-01-21 11:32:38 +08002037 if (ctrl->queue_count > 1) {
2038 nvme_stop_queues(ctrl);
2039 nvme_sync_io_queues(ctrl);
2040 nvme_tcp_stop_io_queues(ctrl);
2041 nvme_cancel_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002042 nvme_tcp_destroy_io_queues(ctrl, new);
Chao Leng70a99572021-01-21 11:32:38 +08002043 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002044destroy_admin:
Chao Leng70a99572021-01-21 11:32:38 +08002045 blk_mq_quiesce_queue(ctrl->admin_q);
2046 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002047 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng70a99572021-01-21 11:32:38 +08002048 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002049 nvme_tcp_destroy_admin_queue(ctrl, new);
2050 return ret;
2051}
2052
2053static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
2054{
2055 struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
2056 struct nvme_tcp_ctrl, connect_work);
2057 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2058
2059 ++ctrl->nr_reconnects;
2060
2061 if (nvme_tcp_setup_ctrl(ctrl, false))
2062 goto requeue;
2063
Colin Ian King56a77d22018-12-14 11:42:43 +00002064 dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002065 ctrl->nr_reconnects);
2066
2067 ctrl->nr_reconnects = 0;
2068
2069 return;
2070
2071requeue:
2072 dev_info(ctrl->device, "Failed reconnect attempt %d\n",
2073 ctrl->nr_reconnects);
2074 nvme_tcp_reconnect_or_remove(ctrl);
2075}
2076
2077static void nvme_tcp_error_recovery_work(struct work_struct *work)
2078{
2079 struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
2080 struct nvme_tcp_ctrl, err_work);
2081 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2082
2083 nvme_stop_keep_alive(ctrl);
2084 nvme_tcp_teardown_io_queues(ctrl, false);
2085 /* unquiesce to fail fast pending requests */
2086 nvme_start_queues(ctrl);
2087 nvme_tcp_teardown_admin_queue(ctrl, false);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002088 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002089
2090 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002091 /* state change failure is ok if we started ctrl delete */
2092 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2093 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002094 return;
2095 }
2096
2097 nvme_tcp_reconnect_or_remove(ctrl);
2098}
2099
2100static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
2101{
Sagi Grimberg794a4cb2019-01-01 00:19:30 -08002102 cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
2103 cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
2104
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002105 nvme_tcp_teardown_io_queues(ctrl, shutdown);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002106 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002107 if (shutdown)
2108 nvme_shutdown_ctrl(ctrl);
2109 else
Sagi Grimbergb5b05042019-07-22 17:06:54 -07002110 nvme_disable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002111 nvme_tcp_teardown_admin_queue(ctrl, shutdown);
2112}
2113
2114static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
2115{
2116 nvme_tcp_teardown_ctrl(ctrl, true);
2117}
2118
2119static void nvme_reset_ctrl_work(struct work_struct *work)
2120{
2121 struct nvme_ctrl *ctrl =
2122 container_of(work, struct nvme_ctrl, reset_work);
2123
2124 nvme_stop_ctrl(ctrl);
2125 nvme_tcp_teardown_ctrl(ctrl, false);
2126
2127 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002128 /* state change failure is ok if we started ctrl delete */
2129 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2130 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002131 return;
2132 }
2133
2134 if (nvme_tcp_setup_ctrl(ctrl, false))
2135 goto out_fail;
2136
2137 return;
2138
2139out_fail:
2140 ++ctrl->nr_reconnects;
2141 nvme_tcp_reconnect_or_remove(ctrl);
2142}
2143
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002144static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
2145{
2146 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
2147
2148 if (list_empty(&ctrl->list))
2149 goto free_ctrl;
2150
2151 mutex_lock(&nvme_tcp_ctrl_mutex);
2152 list_del(&ctrl->list);
2153 mutex_unlock(&nvme_tcp_ctrl_mutex);
2154
2155 nvmf_free_options(nctrl->opts);
2156free_ctrl:
2157 kfree(ctrl->queues);
2158 kfree(ctrl);
2159}
2160
2161static void nvme_tcp_set_sg_null(struct nvme_command *c)
2162{
2163 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2164
2165 sg->addr = 0;
2166 sg->length = 0;
2167 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2168 NVME_SGL_FMT_TRANSPORT_A;
2169}
2170
2171static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue,
2172 struct nvme_command *c, u32 data_len)
2173{
2174 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2175
2176 sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
2177 sg->length = cpu_to_le32(data_len);
2178 sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
2179}
2180
2181static void nvme_tcp_set_sg_host_data(struct nvme_command *c,
2182 u32 data_len)
2183{
2184 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2185
2186 sg->addr = 0;
2187 sg->length = cpu_to_le32(data_len);
2188 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2189 NVME_SGL_FMT_TRANSPORT_A;
2190}
2191
2192static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
2193{
2194 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg);
2195 struct nvme_tcp_queue *queue = &ctrl->queues[0];
2196 struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu;
2197 struct nvme_command *cmd = &pdu->cmd;
2198 u8 hdgst = nvme_tcp_hdgst_len(queue);
2199
2200 memset(pdu, 0, sizeof(*pdu));
2201 pdu->hdr.type = nvme_tcp_cmd;
2202 if (queue->hdr_digest)
2203 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2204 pdu->hdr.hlen = sizeof(*pdu);
2205 pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
2206
2207 cmd->common.opcode = nvme_admin_async_event;
2208 cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
2209 cmd->common.flags |= NVME_CMD_SGL_METABUF;
2210 nvme_tcp_set_sg_null(cmd);
2211
2212 ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU;
2213 ctrl->async_req.offset = 0;
2214 ctrl->async_req.curr_bio = NULL;
2215 ctrl->async_req.data_len = 0;
2216
Sagi Grimberg86f03482020-06-18 17:30:23 -07002217 nvme_tcp_queue_request(&ctrl->async_req, true, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002218}
2219
Sagi Grimberg236187c2020-07-28 13:16:36 -07002220static void nvme_tcp_complete_timed_out(struct request *rq)
2221{
2222 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2223 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
2224
Sagi Grimberg236187c2020-07-28 13:16:36 -07002225 nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
Sagi Grimberg0a8a2c852020-10-22 10:15:31 +08002226 if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
Sagi Grimberg236187c2020-07-28 13:16:36 -07002227 nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
2228 blk_mq_complete_request(rq);
2229 }
Sagi Grimberg236187c2020-07-28 13:16:36 -07002230}
2231
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002232static enum blk_eh_timer_return
2233nvme_tcp_timeout(struct request *rq, bool reserved)
2234{
2235 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
Sagi Grimberg236187c2020-07-28 13:16:36 -07002236 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002237 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2238
Sagi Grimberg236187c2020-07-28 13:16:36 -07002239 dev_warn(ctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002240 "queue %d: timeout request %#x type %d\n",
Sagi Grimberg39d57752019-01-08 01:01:30 -08002241 nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002242
Sagi Grimberg236187c2020-07-28 13:16:36 -07002243 if (ctrl->state != NVME_CTRL_LIVE) {
Sagi Grimberg39d57752019-01-08 01:01:30 -08002244 /*
Sagi Grimberg236187c2020-07-28 13:16:36 -07002245 * If we are resetting, connecting or deleting we should
2246 * complete immediately because we may block controller
2247 * teardown or setup sequence
2248 * - ctrl disable/shutdown fabrics requests
2249 * - connect requests
2250 * - initialization admin requests
2251 * - I/O requests that entered after unquiescing and
2252 * the controller stopped responding
2253 *
2254 * All other requests should be cancelled by the error
2255 * recovery work, so it's fine that we fail it here.
Sagi Grimberg39d57752019-01-08 01:01:30 -08002256 */
Sagi Grimberg236187c2020-07-28 13:16:36 -07002257 nvme_tcp_complete_timed_out(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002258 return BLK_EH_DONE;
2259 }
2260
Sagi Grimberg236187c2020-07-28 13:16:36 -07002261 /*
2262 * LIVE state should trigger the normal error recovery which will
2263 * handle completing this request.
2264 */
2265 nvme_tcp_error_recovery(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002266 return BLK_EH_RESET_TIMER;
2267}
2268
2269static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
2270 struct request *rq)
2271{
2272 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2273 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2274 struct nvme_command *c = &pdu->cmd;
2275
2276 c->common.flags |= NVME_CMD_SGL_METABUF;
2277
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002278 if (!blk_rq_nr_phys_segments(rq))
2279 nvme_tcp_set_sg_null(c);
2280 else if (rq_data_dir(rq) == WRITE &&
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002281 req->data_len <= nvme_tcp_inline_data_size(queue))
2282 nvme_tcp_set_sg_inline(queue, c, req->data_len);
2283 else
2284 nvme_tcp_set_sg_host_data(c, req->data_len);
2285
2286 return 0;
2287}
2288
2289static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
2290 struct request *rq)
2291{
2292 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2293 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2294 struct nvme_tcp_queue *queue = req->queue;
2295 u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
2296 blk_status_t ret;
2297
Keith Buschf4b9e6c2021-03-17 13:37:03 -07002298 ret = nvme_setup_cmd(ns, rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002299 if (ret)
2300 return ret;
2301
2302 req->state = NVME_TCP_SEND_CMD_PDU;
2303 req->offset = 0;
2304 req->data_sent = 0;
2305 req->pdu_len = 0;
2306 req->pdu_sent = 0;
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002307 req->data_len = blk_rq_nr_phys_segments(rq) ?
2308 blk_rq_payload_bytes(rq) : 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002309 req->curr_bio = rq->bio;
Sagi Grimberge11e5112021-02-10 14:04:00 -08002310 if (req->curr_bio && req->data_len)
Sagi Grimbergcb9b8702021-01-14 13:15:24 -08002311 nvme_tcp_init_iter(req, rq_data_dir(rq));
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002312
2313 if (rq_data_dir(rq) == WRITE &&
2314 req->data_len <= nvme_tcp_inline_data_size(queue))
2315 req->pdu_len = req->data_len;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002316
2317 pdu->hdr.type = nvme_tcp_cmd;
2318 pdu->hdr.flags = 0;
2319 if (queue->hdr_digest)
2320 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2321 if (queue->data_digest && req->pdu_len) {
2322 pdu->hdr.flags |= NVME_TCP_F_DDGST;
2323 ddgst = nvme_tcp_ddgst_len(queue);
2324 }
2325 pdu->hdr.hlen = sizeof(*pdu);
2326 pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0;
2327 pdu->hdr.plen =
2328 cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst);
2329
2330 ret = nvme_tcp_map_data(queue, rq);
2331 if (unlikely(ret)) {
Max Gurtovoy28a4cac2019-10-13 19:57:38 +03002332 nvme_cleanup_cmd(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002333 dev_err(queue->ctrl->ctrl.device,
2334 "Failed to map data (%d)\n", ret);
2335 return ret;
2336 }
2337
2338 return 0;
2339}
2340
Sagi Grimberg86f03482020-06-18 17:30:23 -07002341static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
2342{
2343 struct nvme_tcp_queue *queue = hctx->driver_data;
2344
2345 if (!llist_empty(&queue->req_list))
2346 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
2347}
2348
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002349static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
2350 const struct blk_mq_queue_data *bd)
2351{
2352 struct nvme_ns *ns = hctx->queue->queuedata;
2353 struct nvme_tcp_queue *queue = hctx->driver_data;
2354 struct request *rq = bd->rq;
2355 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2356 bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
2357 blk_status_t ret;
2358
Tao Chiua9715742021-04-26 10:53:10 +08002359 if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2360 return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002361
2362 ret = nvme_tcp_setup_cmd_pdu(ns, rq);
2363 if (unlikely(ret))
2364 return ret;
2365
2366 blk_mq_start_request(rq);
2367
Sagi Grimberg86f03482020-06-18 17:30:23 -07002368 nvme_tcp_queue_request(req, true, bd->last);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002369
2370 return BLK_STS_OK;
2371}
2372
Sagi Grimberg873946f2018-12-11 23:38:57 -08002373static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
2374{
2375 struct nvme_tcp_ctrl *ctrl = set->driver_data;
Sagi Grimberg64861992019-05-28 22:49:05 -07002376 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
Sagi Grimberg873946f2018-12-11 23:38:57 -08002377
Sagi Grimberg64861992019-05-28 22:49:05 -07002378 if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
Sagi Grimberg873946f2018-12-11 23:38:57 -08002379 /* separate read/write queues */
2380 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002381 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2382 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2383 set->map[HCTX_TYPE_READ].nr_queues =
2384 ctrl->io_queues[HCTX_TYPE_READ];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002385 set->map[HCTX_TYPE_READ].queue_offset =
Sagi Grimberg64861992019-05-28 22:49:05 -07002386 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002387 } else {
Sagi Grimberg64861992019-05-28 22:49:05 -07002388 /* shared read/write queues */
Sagi Grimberg873946f2018-12-11 23:38:57 -08002389 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002390 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2391 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2392 set->map[HCTX_TYPE_READ].nr_queues =
2393 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002394 set->map[HCTX_TYPE_READ].queue_offset = 0;
2395 }
2396 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
2397 blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002398
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002399 if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
2400 /* map dedicated poll queues only if we have queues left */
2401 set->map[HCTX_TYPE_POLL].nr_queues =
2402 ctrl->io_queues[HCTX_TYPE_POLL];
2403 set->map[HCTX_TYPE_POLL].queue_offset =
2404 ctrl->io_queues[HCTX_TYPE_DEFAULT] +
2405 ctrl->io_queues[HCTX_TYPE_READ];
2406 blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
2407 }
2408
Sagi Grimberg64861992019-05-28 22:49:05 -07002409 dev_info(ctrl->ctrl.device,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002410 "mapped %d/%d/%d default/read/poll queues.\n",
Sagi Grimberg64861992019-05-28 22:49:05 -07002411 ctrl->io_queues[HCTX_TYPE_DEFAULT],
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002412 ctrl->io_queues[HCTX_TYPE_READ],
2413 ctrl->io_queues[HCTX_TYPE_POLL]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002414
Sagi Grimberg873946f2018-12-11 23:38:57 -08002415 return 0;
2416}
2417
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002418static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
2419{
2420 struct nvme_tcp_queue *queue = hctx->driver_data;
2421 struct sock *sk = queue->sock->sk;
2422
Sagi Grimbergf86e5bf2020-03-23 16:43:52 -07002423 if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
2424 return 0;
2425
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002426 set_bit(NVME_TCP_Q_POLLING, &queue->flags);
Eric Dumazet3f926af2019-10-23 22:44:51 -07002427 if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002428 sk_busy_loop(sk, true);
2429 nvme_tcp_try_recv(queue);
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002430 clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002431 return queue->nr_cqe;
2432}
2433
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002434static const struct blk_mq_ops nvme_tcp_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002435 .queue_rq = nvme_tcp_queue_rq,
Sagi Grimberg86f03482020-06-18 17:30:23 -07002436 .commit_rqs = nvme_tcp_commit_rqs,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002437 .complete = nvme_complete_rq,
2438 .init_request = nvme_tcp_init_request,
2439 .exit_request = nvme_tcp_exit_request,
2440 .init_hctx = nvme_tcp_init_hctx,
2441 .timeout = nvme_tcp_timeout,
Sagi Grimberg873946f2018-12-11 23:38:57 -08002442 .map_queues = nvme_tcp_map_queues,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002443 .poll = nvme_tcp_poll,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002444};
2445
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002446static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002447 .queue_rq = nvme_tcp_queue_rq,
2448 .complete = nvme_complete_rq,
2449 .init_request = nvme_tcp_init_request,
2450 .exit_request = nvme_tcp_exit_request,
2451 .init_hctx = nvme_tcp_init_admin_hctx,
2452 .timeout = nvme_tcp_timeout,
2453};
2454
2455static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
2456 .name = "tcp",
2457 .module = THIS_MODULE,
2458 .flags = NVME_F_FABRICS,
2459 .reg_read32 = nvmf_reg_read32,
2460 .reg_read64 = nvmf_reg_read64,
2461 .reg_write32 = nvmf_reg_write32,
2462 .free_ctrl = nvme_tcp_free_ctrl,
2463 .submit_async_event = nvme_tcp_submit_async_event,
2464 .delete_ctrl = nvme_tcp_delete_ctrl,
2465 .get_address = nvmf_get_address,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002466};
2467
2468static bool
2469nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
2470{
2471 struct nvme_tcp_ctrl *ctrl;
2472 bool found = false;
2473
2474 mutex_lock(&nvme_tcp_ctrl_mutex);
2475 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) {
2476 found = nvmf_ip_options_match(&ctrl->ctrl, opts);
2477 if (found)
2478 break;
2479 }
2480 mutex_unlock(&nvme_tcp_ctrl_mutex);
2481
2482 return found;
2483}
2484
2485static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
2486 struct nvmf_ctrl_options *opts)
2487{
2488 struct nvme_tcp_ctrl *ctrl;
2489 int ret;
2490
2491 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
2492 if (!ctrl)
2493 return ERR_PTR(-ENOMEM);
2494
2495 INIT_LIST_HEAD(&ctrl->list);
2496 ctrl->ctrl.opts = opts;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002497 ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
2498 opts->nr_poll_queues + 1;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002499 ctrl->ctrl.sqsize = opts->queue_size - 1;
2500 ctrl->ctrl.kato = opts->kato;
2501
2502 INIT_DELAYED_WORK(&ctrl->connect_work,
2503 nvme_tcp_reconnect_ctrl_work);
2504 INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
2505 INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
2506
2507 if (!(opts->mask & NVMF_OPT_TRSVCID)) {
2508 opts->trsvcid =
2509 kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
2510 if (!opts->trsvcid) {
2511 ret = -ENOMEM;
2512 goto out_free_ctrl;
2513 }
2514 opts->mask |= NVMF_OPT_TRSVCID;
2515 }
2516
2517 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2518 opts->traddr, opts->trsvcid, &ctrl->addr);
2519 if (ret) {
2520 pr_err("malformed address passed: %s:%s\n",
2521 opts->traddr, opts->trsvcid);
2522 goto out_free_ctrl;
2523 }
2524
2525 if (opts->mask & NVMF_OPT_HOST_TRADDR) {
2526 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2527 opts->host_traddr, NULL, &ctrl->src_addr);
2528 if (ret) {
2529 pr_err("malformed src address passed: %s\n",
2530 opts->host_traddr);
2531 goto out_free_ctrl;
2532 }
2533 }
2534
Martin Belanger3ede8f72021-05-20 15:09:34 -04002535 if (opts->mask & NVMF_OPT_HOST_IFACE) {
2536 ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface);
2537 if (!ctrl->ndev) {
2538 pr_err("invalid interface passed: %s\n",
2539 opts->host_iface);
2540 ret = -ENODEV;
2541 goto out_free_ctrl;
2542 }
2543 }
2544
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002545 if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
2546 ret = -EALREADY;
2547 goto out_free_ctrl;
2548 }
2549
Sagi Grimberg873946f2018-12-11 23:38:57 -08002550 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002551 GFP_KERNEL);
2552 if (!ctrl->queues) {
2553 ret = -ENOMEM;
2554 goto out_free_ctrl;
2555 }
2556
2557 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
2558 if (ret)
2559 goto out_kfree_queues;
2560
2561 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
2562 WARN_ON_ONCE(1);
2563 ret = -EINTR;
2564 goto out_uninit_ctrl;
2565 }
2566
2567 ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
2568 if (ret)
2569 goto out_uninit_ctrl;
2570
2571 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
2572 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2573
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002574 mutex_lock(&nvme_tcp_ctrl_mutex);
2575 list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
2576 mutex_unlock(&nvme_tcp_ctrl_mutex);
2577
2578 return &ctrl->ctrl;
2579
2580out_uninit_ctrl:
2581 nvme_uninit_ctrl(&ctrl->ctrl);
2582 nvme_put_ctrl(&ctrl->ctrl);
2583 if (ret > 0)
2584 ret = -EIO;
2585 return ERR_PTR(ret);
2586out_kfree_queues:
2587 kfree(ctrl->queues);
2588out_free_ctrl:
2589 kfree(ctrl);
2590 return ERR_PTR(ret);
2591}
2592
2593static struct nvmf_transport_ops nvme_tcp_transport = {
2594 .name = "tcp",
2595 .module = THIS_MODULE,
2596 .required_opts = NVMF_OPT_TRADDR,
2597 .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
2598 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
Sagi Grimberg873946f2018-12-11 23:38:57 -08002599 NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
Israel Rukshinbb139852019-08-18 12:08:54 +03002600 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
Martin Belanger3ede8f72021-05-20 15:09:34 -04002601 NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002602 .create_ctrl = nvme_tcp_create_ctrl,
2603};
2604
2605static int __init nvme_tcp_init_module(void)
2606{
2607 nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
2608 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2609 if (!nvme_tcp_wq)
2610 return -ENOMEM;
2611
2612 nvmf_register_transport(&nvme_tcp_transport);
2613 return 0;
2614}
2615
2616static void __exit nvme_tcp_cleanup_module(void)
2617{
2618 struct nvme_tcp_ctrl *ctrl;
2619
2620 nvmf_unregister_transport(&nvme_tcp_transport);
2621
2622 mutex_lock(&nvme_tcp_ctrl_mutex);
2623 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list)
2624 nvme_delete_ctrl(&ctrl->ctrl);
2625 mutex_unlock(&nvme_tcp_ctrl_mutex);
2626 flush_workqueue(nvme_delete_wq);
2627
2628 destroy_workqueue(nvme_tcp_wq);
2629}
2630
2631module_init(nvme_tcp_init_module);
2632module_exit(nvme_tcp_cleanup_module);
2633
2634MODULE_LICENSE("GPL v2");