blob: 619b0d8f6e3825c80c1f0c98e2fa3471631f6c61 [file] [log] [blame]
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics TCP host.
4 * Copyright (c) 2018 Lightbits Labs. All rights reserved.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/err.h>
11#include <linux/nvme-tcp.h>
12#include <net/sock.h>
13#include <net/tcp.h>
14#include <linux/blk-mq.h>
15#include <crypto/hash.h>
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070016#include <net/busy_poll.h>
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080017
18#include "nvme.h"
19#include "fabrics.h"
20
21struct nvme_tcp_queue;
22
Wunderlich, Mark9912ade2020-01-16 00:46:12 +000023/* Define the socket priority to use for connections were it is desirable
24 * that the NIC consider performing optimized packet processing or filtering.
25 * A non-zero value being sufficient to indicate general consideration of any
26 * possible optimization. Making it a module param allows for alternative
27 * values that may be unique for some NIC implementations.
28 */
29static int so_priority;
30module_param(so_priority, int, 0644);
31MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
32
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080033enum nvme_tcp_send_state {
34 NVME_TCP_SEND_CMD_PDU = 0,
35 NVME_TCP_SEND_H2C_PDU,
36 NVME_TCP_SEND_DATA,
37 NVME_TCP_SEND_DDGST,
38};
39
40struct nvme_tcp_request {
41 struct nvme_request req;
42 void *pdu;
43 struct nvme_tcp_queue *queue;
44 u32 data_len;
45 u32 pdu_len;
46 u32 pdu_sent;
47 u16 ttag;
48 struct list_head entry;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070049 struct llist_node lentry;
Christoph Hellwiga7273d42018-12-13 09:46:59 +010050 __le32 ddgst;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080051
52 struct bio *curr_bio;
53 struct iov_iter iter;
54
55 /* send state */
56 size_t offset;
57 size_t data_sent;
58 enum nvme_tcp_send_state state;
59};
60
61enum nvme_tcp_queue_flags {
62 NVME_TCP_Q_ALLOCATED = 0,
63 NVME_TCP_Q_LIVE = 1,
Sagi Grimberg72e5d752020-05-01 14:25:44 -070064 NVME_TCP_Q_POLLING = 2,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080065};
66
67enum nvme_tcp_recv_state {
68 NVME_TCP_RECV_PDU = 0,
69 NVME_TCP_RECV_DATA,
70 NVME_TCP_RECV_DDGST,
71};
72
73struct nvme_tcp_ctrl;
74struct nvme_tcp_queue {
75 struct socket *sock;
76 struct work_struct io_work;
77 int io_cpu;
78
Chao Leng9ebbfe42021-01-14 17:09:26 +080079 struct mutex queue_lock;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -070080 struct mutex send_mutex;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070081 struct llist_head req_list;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080082 struct list_head send_list;
Sagi Grimberg122e5b92020-06-18 17:30:24 -070083 bool more_requests;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080084
85 /* recv state */
86 void *pdu;
87 int pdu_remaining;
88 int pdu_offset;
89 size_t data_remaining;
90 size_t ddgst_remaining;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070091 unsigned int nr_cqe;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080092
93 /* send state */
94 struct nvme_tcp_request *request;
95
96 int queue_size;
97 size_t cmnd_capsule_len;
98 struct nvme_tcp_ctrl *ctrl;
99 unsigned long flags;
100 bool rd_enabled;
101
102 bool hdr_digest;
103 bool data_digest;
104 struct ahash_request *rcv_hash;
105 struct ahash_request *snd_hash;
106 __le32 exp_ddgst;
107 __le32 recv_ddgst;
108
109 struct page_frag_cache pf_cache;
110
111 void (*state_change)(struct sock *);
112 void (*data_ready)(struct sock *);
113 void (*write_space)(struct sock *);
114};
115
116struct nvme_tcp_ctrl {
117 /* read only in the hot path */
118 struct nvme_tcp_queue *queues;
119 struct blk_mq_tag_set tag_set;
120
121 /* other member variables */
122 struct list_head list;
123 struct blk_mq_tag_set admin_tag_set;
124 struct sockaddr_storage addr;
125 struct sockaddr_storage src_addr;
126 struct nvme_ctrl ctrl;
127
128 struct work_struct err_work;
129 struct delayed_work connect_work;
130 struct nvme_tcp_request async_req;
Sagi Grimberg64861992019-05-28 22:49:05 -0700131 u32 io_queues[HCTX_MAX_TYPES];
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800132};
133
134static LIST_HEAD(nvme_tcp_ctrl_list);
135static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
136static struct workqueue_struct *nvme_tcp_wq;
Rikard Falkeborn6acbd962020-05-29 00:25:07 +0200137static const struct blk_mq_ops nvme_tcp_mq_ops;
138static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700139static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800140
141static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
142{
143 return container_of(ctrl, struct nvme_tcp_ctrl, ctrl);
144}
145
146static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
147{
148 return queue - queue->ctrl->queues;
149}
150
151static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
152{
153 u32 queue_idx = nvme_tcp_queue_id(queue);
154
155 if (queue_idx == 0)
156 return queue->ctrl->admin_tag_set.tags[queue_idx];
157 return queue->ctrl->tag_set.tags[queue_idx - 1];
158}
159
160static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
161{
162 return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
163}
164
165static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
166{
167 return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
168}
169
170static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
171{
172 return queue->cmnd_capsule_len - sizeof(struct nvme_command);
173}
174
175static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
176{
177 return req == &req->queue->ctrl->async_req;
178}
179
180static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
181{
182 struct request *rq;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800183
184 if (unlikely(nvme_tcp_async_req(req)))
185 return false; /* async events don't have a request */
186
187 rq = blk_mq_rq_from_pdu(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800188
Sagi Grimberg25e5cb72020-03-23 15:06:30 -0700189 return rq_data_dir(rq) == WRITE && req->data_len &&
190 req->data_len <= nvme_tcp_inline_data_size(req->queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800191}
192
193static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
194{
195 return req->iter.bvec->bv_page;
196}
197
198static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
199{
200 return req->iter.bvec->bv_offset + req->iter.iov_offset;
201}
202
203static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
204{
Sagi Grimbergca1ff672021-01-13 13:56:57 -0800205 return min_t(size_t, iov_iter_single_seg_count(&req->iter),
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800206 req->pdu_len - req->pdu_sent);
207}
208
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800209static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req)
210{
211 return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ?
212 req->pdu_len - req->pdu_sent : 0;
213}
214
215static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
216 int len)
217{
218 return nvme_tcp_pdu_data_left(req) <= len;
219}
220
221static void nvme_tcp_init_iter(struct nvme_tcp_request *req,
222 unsigned int dir)
223{
224 struct request *rq = blk_mq_rq_from_pdu(req);
225 struct bio_vec *vec;
226 unsigned int size;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800227 int nr_bvec;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800228 size_t offset;
229
230 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
231 vec = &rq->special_vec;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800232 nr_bvec = 1;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800233 size = blk_rq_payload_bytes(rq);
234 offset = 0;
235 } else {
236 struct bio *bio = req->curr_bio;
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800237 struct bvec_iter bi;
238 struct bio_vec bv;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800239
240 vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800241 nr_bvec = 0;
242 bio_for_each_bvec(bv, bio, bi) {
243 nr_bvec++;
244 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800245 size = bio->bi_iter.bi_size;
246 offset = bio->bi_iter.bi_bvec_done;
247 }
248
Sagi Grimberg0dc9eda2021-01-14 13:15:26 -0800249 iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800250 req->iter.iov_offset = offset;
251}
252
253static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
254 int len)
255{
256 req->data_sent += len;
257 req->pdu_sent += len;
258 iov_iter_advance(&req->iter, len);
259 if (!iov_iter_count(&req->iter) &&
260 req->data_sent < req->data_len) {
261 req->curr_bio = req->curr_bio->bi_next;
262 nvme_tcp_init_iter(req, WRITE);
263 }
264}
265
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800266static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
267{
268 int ret;
269
270 /* drain the send queue as much as we can... */
271 do {
272 ret = nvme_tcp_try_send(queue);
273 } while (ret > 0);
274}
275
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700276static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
Sagi Grimberg86f03482020-06-18 17:30:23 -0700277 bool sync, bool last)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800278{
279 struct nvme_tcp_queue *queue = req->queue;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700280 bool empty;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800281
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700282 empty = llist_add(&req->lentry, &queue->req_list) &&
283 list_empty(&queue->send_list) && !queue->request;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800284
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700285 /*
286 * if we're the first on the send_list and we can try to send
287 * directly, otherwise queue io_work. Also, only do that if we
288 * are on the same cpu, so we don't introduce contention.
289 */
Sagi Grimbergada831772021-01-13 14:03:04 -0800290 if (queue->io_cpu == __smp_processor_id() &&
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700291 sync && empty && mutex_trylock(&queue->send_mutex)) {
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700292 queue->more_requests = !last;
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800293 nvme_tcp_send_all(queue);
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700294 queue->more_requests = false;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700295 mutex_unlock(&queue->send_mutex);
Sagi Grimberg86f03482020-06-18 17:30:23 -0700296 } else if (last) {
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700297 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
298 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800299}
300
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700301static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
302{
303 struct nvme_tcp_request *req;
304 struct llist_node *node;
305
306 for (node = llist_del_all(&queue->req_list); node; node = node->next) {
307 req = llist_entry(node, struct nvme_tcp_request, lentry);
308 list_add(&req->entry, &queue->send_list);
309 }
310}
311
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800312static inline struct nvme_tcp_request *
313nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
314{
315 struct nvme_tcp_request *req;
316
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800317 req = list_first_entry_or_null(&queue->send_list,
318 struct nvme_tcp_request, entry);
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700319 if (!req) {
320 nvme_tcp_process_req_list(queue);
321 req = list_first_entry_or_null(&queue->send_list,
322 struct nvme_tcp_request, entry);
323 if (unlikely(!req))
324 return NULL;
325 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800326
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700327 list_del(&req->entry);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800328 return req;
329}
330
Christoph Hellwiga7273d42018-12-13 09:46:59 +0100331static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
332 __le32 *dgst)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800333{
334 ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
335 crypto_ahash_final(hash);
336}
337
338static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
339 struct page *page, off_t off, size_t len)
340{
341 struct scatterlist sg;
342
343 sg_init_marker(&sg, 1);
344 sg_set_page(&sg, page, len, off);
345 ahash_request_set_crypt(hash, &sg, NULL, len);
346 crypto_ahash_update(hash);
347}
348
349static inline void nvme_tcp_hdgst(struct ahash_request *hash,
350 void *pdu, size_t len)
351{
352 struct scatterlist sg;
353
354 sg_init_one(&sg, pdu, len);
355 ahash_request_set_crypt(hash, &sg, pdu + len, len);
356 crypto_ahash_digest(hash);
357}
358
359static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
360 void *pdu, size_t pdu_len)
361{
362 struct nvme_tcp_hdr *hdr = pdu;
363 __le32 recv_digest;
364 __le32 exp_digest;
365
366 if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
367 dev_err(queue->ctrl->ctrl.device,
368 "queue %d: header digest flag is cleared\n",
369 nvme_tcp_queue_id(queue));
370 return -EPROTO;
371 }
372
373 recv_digest = *(__le32 *)(pdu + hdr->hlen);
374 nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
375 exp_digest = *(__le32 *)(pdu + hdr->hlen);
376 if (recv_digest != exp_digest) {
377 dev_err(queue->ctrl->ctrl.device,
378 "header digest error: recv %#x expected %#x\n",
379 le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
380 return -EIO;
381 }
382
383 return 0;
384}
385
386static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
387{
388 struct nvme_tcp_hdr *hdr = pdu;
389 u8 digest_len = nvme_tcp_hdgst_len(queue);
390 u32 len;
391
392 len = le32_to_cpu(hdr->plen) - hdr->hlen -
393 ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0);
394
395 if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
396 dev_err(queue->ctrl->ctrl.device,
397 "queue %d: data digest flag is cleared\n",
398 nvme_tcp_queue_id(queue));
399 return -EPROTO;
400 }
401 crypto_ahash_init(queue->rcv_hash);
402
403 return 0;
404}
405
406static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
407 struct request *rq, unsigned int hctx_idx)
408{
409 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
410
411 page_frag_free(req->pdu);
412}
413
414static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
415 struct request *rq, unsigned int hctx_idx,
416 unsigned int numa_node)
417{
418 struct nvme_tcp_ctrl *ctrl = set->driver_data;
419 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
420 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
421 struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
422 u8 hdgst = nvme_tcp_hdgst_len(queue);
423
424 req->pdu = page_frag_alloc(&queue->pf_cache,
425 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
426 GFP_KERNEL | __GFP_ZERO);
427 if (!req->pdu)
428 return -ENOMEM;
429
430 req->queue = queue;
431 nvme_req(rq)->ctrl = &ctrl->ctrl;
432
433 return 0;
434}
435
436static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
437 unsigned int hctx_idx)
438{
439 struct nvme_tcp_ctrl *ctrl = data;
440 struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
441
442 hctx->driver_data = queue;
443 return 0;
444}
445
446static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
447 unsigned int hctx_idx)
448{
449 struct nvme_tcp_ctrl *ctrl = data;
450 struct nvme_tcp_queue *queue = &ctrl->queues[0];
451
452 hctx->driver_data = queue;
453 return 0;
454}
455
456static enum nvme_tcp_recv_state
457nvme_tcp_recv_state(struct nvme_tcp_queue *queue)
458{
459 return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU :
460 (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST :
461 NVME_TCP_RECV_DATA;
462}
463
464static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
465{
466 queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
467 nvme_tcp_hdgst_len(queue);
468 queue->pdu_offset = 0;
469 queue->data_remaining = -1;
470 queue->ddgst_remaining = 0;
471}
472
473static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
474{
475 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
476 return;
477
Sagi Grimberg236187c2020-07-28 13:16:36 -0700478 dev_warn(ctrl->device, "starting error recovery\n");
Nigel Kirkland97b25122020-02-10 16:01:45 -0800479 queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800480}
481
482static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
483 struct nvme_completion *cqe)
484{
485 struct request *rq;
486
487 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
488 if (!rq) {
489 dev_err(queue->ctrl->ctrl.device,
490 "queue %d tag 0x%x not found\n",
491 nvme_tcp_queue_id(queue), cqe->command_id);
492 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
493 return -EINVAL;
494 }
495
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200496 if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
Christoph Hellwigff029452020-06-11 08:44:52 +0200497 nvme_complete_rq(rq);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700498 queue->nr_cqe++;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800499
500 return 0;
501}
502
503static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
504 struct nvme_tcp_data_pdu *pdu)
505{
506 struct request *rq;
507
508 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
509 if (!rq) {
510 dev_err(queue->ctrl->ctrl.device,
511 "queue %d tag %#x not found\n",
512 nvme_tcp_queue_id(queue), pdu->command_id);
513 return -ENOENT;
514 }
515
516 if (!blk_rq_payload_bytes(rq)) {
517 dev_err(queue->ctrl->ctrl.device,
518 "queue %d tag %#x unexpected data\n",
519 nvme_tcp_queue_id(queue), rq->tag);
520 return -EIO;
521 }
522
523 queue->data_remaining = le32_to_cpu(pdu->data_length);
524
Sagi Grimberg602d6742019-03-13 18:55:10 +0100525 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS &&
526 unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) {
527 dev_err(queue->ctrl->ctrl.device,
528 "queue %d tag %#x SUCCESS set but not last PDU\n",
529 nvme_tcp_queue_id(queue), rq->tag);
530 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
531 return -EPROTO;
532 }
533
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800534 return 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800535}
536
537static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
538 struct nvme_tcp_rsp_pdu *pdu)
539{
540 struct nvme_completion *cqe = &pdu->cqe;
541 int ret = 0;
542
543 /*
544 * AEN requests are special as they don't time out and can
545 * survive any kind of queue freeze and often don't respond to
546 * aborts. We don't even bother to allocate a struct request
547 * for them but rather special case them here.
548 */
Israel Rukshin58a8df62019-10-13 19:57:31 +0300549 if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
550 cqe->command_id)))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800551 nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
552 &cqe->result);
553 else
554 ret = nvme_tcp_process_nvme_cqe(queue, cqe);
555
556 return ret;
557}
558
559static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
560 struct nvme_tcp_r2t_pdu *pdu)
561{
562 struct nvme_tcp_data_pdu *data = req->pdu;
563 struct nvme_tcp_queue *queue = req->queue;
564 struct request *rq = blk_mq_rq_from_pdu(req);
565 u8 hdgst = nvme_tcp_hdgst_len(queue);
566 u8 ddgst = nvme_tcp_ddgst_len(queue);
567
568 req->pdu_len = le32_to_cpu(pdu->r2t_length);
569 req->pdu_sent = 0;
570
571 if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
572 dev_err(queue->ctrl->ctrl.device,
573 "req %d r2t len %u exceeded data len %u (%zu sent)\n",
574 rq->tag, req->pdu_len, req->data_len,
575 req->data_sent);
576 return -EPROTO;
577 }
578
579 if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
580 dev_err(queue->ctrl->ctrl.device,
581 "req %d unexpected r2t offset %u (expected %zu)\n",
582 rq->tag, le32_to_cpu(pdu->r2t_offset),
583 req->data_sent);
584 return -EPROTO;
585 }
586
587 memset(data, 0, sizeof(*data));
588 data->hdr.type = nvme_tcp_h2c_data;
589 data->hdr.flags = NVME_TCP_F_DATA_LAST;
590 if (queue->hdr_digest)
591 data->hdr.flags |= NVME_TCP_F_HDGST;
592 if (queue->data_digest)
593 data->hdr.flags |= NVME_TCP_F_DDGST;
594 data->hdr.hlen = sizeof(*data);
595 data->hdr.pdo = data->hdr.hlen + hdgst;
596 data->hdr.plen =
597 cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
598 data->ttag = pdu->ttag;
599 data->command_id = rq->tag;
600 data->data_offset = cpu_to_le32(req->data_sent);
601 data->data_length = cpu_to_le32(req->pdu_len);
602 return 0;
603}
604
605static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
606 struct nvme_tcp_r2t_pdu *pdu)
607{
608 struct nvme_tcp_request *req;
609 struct request *rq;
610 int ret;
611
612 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
613 if (!rq) {
614 dev_err(queue->ctrl->ctrl.device,
615 "queue %d tag %#x not found\n",
616 nvme_tcp_queue_id(queue), pdu->command_id);
617 return -ENOENT;
618 }
619 req = blk_mq_rq_to_pdu(rq);
620
621 ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
622 if (unlikely(ret))
623 return ret;
624
625 req->state = NVME_TCP_SEND_H2C_PDU;
626 req->offset = 0;
627
Sagi Grimberg86f03482020-06-18 17:30:23 -0700628 nvme_tcp_queue_request(req, false, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800629
630 return 0;
631}
632
633static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
634 unsigned int *offset, size_t *len)
635{
636 struct nvme_tcp_hdr *hdr;
637 char *pdu = queue->pdu;
638 size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
639 int ret;
640
641 ret = skb_copy_bits(skb, *offset,
642 &pdu[queue->pdu_offset], rcv_len);
643 if (unlikely(ret))
644 return ret;
645
646 queue->pdu_remaining -= rcv_len;
647 queue->pdu_offset += rcv_len;
648 *offset += rcv_len;
649 *len -= rcv_len;
650 if (queue->pdu_remaining)
651 return 0;
652
653 hdr = queue->pdu;
654 if (queue->hdr_digest) {
655 ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
656 if (unlikely(ret))
657 return ret;
658 }
659
660
661 if (queue->data_digest) {
662 ret = nvme_tcp_check_ddgst(queue, queue->pdu);
663 if (unlikely(ret))
664 return ret;
665 }
666
667 switch (hdr->type) {
668 case nvme_tcp_c2h_data:
Sagi Grimberg6be18262019-07-19 12:46:46 -0700669 return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800670 case nvme_tcp_rsp:
671 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700672 return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800673 case nvme_tcp_r2t:
674 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700675 return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800676 default:
677 dev_err(queue->ctrl->ctrl.device,
678 "unsupported pdu type (%d)\n", hdr->type);
679 return -EINVAL;
680 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800681}
682
Christoph Hellwig988aef9e2019-03-15 08:41:04 +0100683static inline void nvme_tcp_end_request(struct request *rq, u16 status)
Sagi Grimberg602d6742019-03-13 18:55:10 +0100684{
685 union nvme_result res = {};
686
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200687 if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
Christoph Hellwigff029452020-06-11 08:44:52 +0200688 nvme_complete_rq(rq);
Sagi Grimberg602d6742019-03-13 18:55:10 +0100689}
690
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800691static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
692 unsigned int *offset, size_t *len)
693{
694 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
695 struct nvme_tcp_request *req;
696 struct request *rq;
697
698 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
699 if (!rq) {
700 dev_err(queue->ctrl->ctrl.device,
701 "queue %d tag %#x not found\n",
702 nvme_tcp_queue_id(queue), pdu->command_id);
703 return -ENOENT;
704 }
705 req = blk_mq_rq_to_pdu(rq);
706
707 while (true) {
708 int recv_len, ret;
709
710 recv_len = min_t(size_t, *len, queue->data_remaining);
711 if (!recv_len)
712 break;
713
714 if (!iov_iter_count(&req->iter)) {
715 req->curr_bio = req->curr_bio->bi_next;
716
717 /*
718 * If we don`t have any bios it means that controller
719 * sent more data than we requested, hence error
720 */
721 if (!req->curr_bio) {
722 dev_err(queue->ctrl->ctrl.device,
723 "queue %d no space in request %#x",
724 nvme_tcp_queue_id(queue), rq->tag);
725 nvme_tcp_init_recv_ctx(queue);
726 return -EIO;
727 }
728 nvme_tcp_init_iter(req, READ);
729 }
730
731 /* we can read only from what is left in this bio */
732 recv_len = min_t(size_t, recv_len,
733 iov_iter_count(&req->iter));
734
735 if (queue->data_digest)
736 ret = skb_copy_and_hash_datagram_iter(skb, *offset,
737 &req->iter, recv_len, queue->rcv_hash);
738 else
739 ret = skb_copy_datagram_iter(skb, *offset,
740 &req->iter, recv_len);
741 if (ret) {
742 dev_err(queue->ctrl->ctrl.device,
743 "queue %d failed to copy request %#x data",
744 nvme_tcp_queue_id(queue), rq->tag);
745 return ret;
746 }
747
748 *len -= recv_len;
749 *offset += recv_len;
750 queue->data_remaining -= recv_len;
751 }
752
753 if (!queue->data_remaining) {
754 if (queue->data_digest) {
755 nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
756 queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
757 } else {
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700758 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
Sagi Grimberg602d6742019-03-13 18:55:10 +0100759 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700760 queue->nr_cqe++;
761 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800762 nvme_tcp_init_recv_ctx(queue);
763 }
764 }
765
766 return 0;
767}
768
769static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
770 struct sk_buff *skb, unsigned int *offset, size_t *len)
771{
Sagi Grimberg602d6742019-03-13 18:55:10 +0100772 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800773 char *ddgst = (char *)&queue->recv_ddgst;
774 size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
775 off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
776 int ret;
777
778 ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
779 if (unlikely(ret))
780 return ret;
781
782 queue->ddgst_remaining -= recv_len;
783 *offset += recv_len;
784 *len -= recv_len;
785 if (queue->ddgst_remaining)
786 return 0;
787
788 if (queue->recv_ddgst != queue->exp_ddgst) {
789 dev_err(queue->ctrl->ctrl.device,
790 "data digest error: recv %#x expected %#x\n",
791 le32_to_cpu(queue->recv_ddgst),
792 le32_to_cpu(queue->exp_ddgst));
793 return -EIO;
794 }
795
Sagi Grimberg602d6742019-03-13 18:55:10 +0100796 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
797 struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
798 pdu->command_id);
799
800 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700801 queue->nr_cqe++;
Sagi Grimberg602d6742019-03-13 18:55:10 +0100802 }
803
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800804 nvme_tcp_init_recv_ctx(queue);
805 return 0;
806}
807
808static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
809 unsigned int offset, size_t len)
810{
811 struct nvme_tcp_queue *queue = desc->arg.data;
812 size_t consumed = len;
813 int result;
814
815 while (len) {
816 switch (nvme_tcp_recv_state(queue)) {
817 case NVME_TCP_RECV_PDU:
818 result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
819 break;
820 case NVME_TCP_RECV_DATA:
821 result = nvme_tcp_recv_data(queue, skb, &offset, &len);
822 break;
823 case NVME_TCP_RECV_DDGST:
824 result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
825 break;
826 default:
827 result = -EFAULT;
828 }
829 if (result) {
830 dev_err(queue->ctrl->ctrl.device,
831 "receive failed: %d\n", result);
832 queue->rd_enabled = false;
833 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
834 return result;
835 }
836 }
837
838 return consumed;
839}
840
841static void nvme_tcp_data_ready(struct sock *sk)
842{
843 struct nvme_tcp_queue *queue;
844
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700845 read_lock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800846 queue = sk->sk_user_data;
Sagi Grimberg72e5d752020-05-01 14:25:44 -0700847 if (likely(queue && queue->rd_enabled) &&
848 !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800849 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700850 read_unlock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800851}
852
853static void nvme_tcp_write_space(struct sock *sk)
854{
855 struct nvme_tcp_queue *queue;
856
857 read_lock_bh(&sk->sk_callback_lock);
858 queue = sk->sk_user_data;
859 if (likely(queue && sk_stream_is_writeable(sk))) {
860 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
861 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
862 }
863 read_unlock_bh(&sk->sk_callback_lock);
864}
865
866static void nvme_tcp_state_change(struct sock *sk)
867{
868 struct nvme_tcp_queue *queue;
869
870 read_lock(&sk->sk_callback_lock);
871 queue = sk->sk_user_data;
872 if (!queue)
873 goto done;
874
875 switch (sk->sk_state) {
876 case TCP_CLOSE:
877 case TCP_CLOSE_WAIT:
878 case TCP_LAST_ACK:
879 case TCP_FIN_WAIT1:
880 case TCP_FIN_WAIT2:
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800881 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
882 break;
883 default:
884 dev_info(queue->ctrl->ctrl.device,
885 "queue %d socket state %d\n",
886 nvme_tcp_queue_id(queue), sk->sk_state);
887 }
888
889 queue->state_change(sk);
890done:
891 read_unlock(&sk->sk_callback_lock);
892}
893
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700894static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
895{
896 return !list_empty(&queue->send_list) ||
897 !llist_empty(&queue->req_list) || queue->more_requests;
898}
899
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800900static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
901{
902 queue->request = NULL;
903}
904
905static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
906{
Sagi Grimberg16686012019-08-02 18:17:52 -0700907 nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800908}
909
910static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
911{
912 struct nvme_tcp_queue *queue = req->queue;
913
914 while (true) {
915 struct page *page = nvme_tcp_req_cur_page(req);
916 size_t offset = nvme_tcp_req_cur_offset(req);
917 size_t len = nvme_tcp_req_cur_length(req);
918 bool last = nvme_tcp_pdu_last_send(req, len);
919 int ret, flags = MSG_DONTWAIT;
920
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700921 if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800922 flags |= MSG_EOR;
923 else
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700924 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800925
Coly Li7d4194a2020-10-02 16:27:30 +0800926 if (sendpage_ok(page)) {
927 ret = kernel_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200928 flags);
929 } else {
Coly Li7d4194a2020-10-02 16:27:30 +0800930 ret = sock_no_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200931 flags);
932 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800933 if (ret <= 0)
934 return ret;
935
936 nvme_tcp_advance_req(req, ret);
937 if (queue->data_digest)
938 nvme_tcp_ddgst_update(queue->snd_hash, page,
939 offset, ret);
940
941 /* fully successful last write*/
942 if (last && ret == len) {
943 if (queue->data_digest) {
944 nvme_tcp_ddgst_final(queue->snd_hash,
945 &req->ddgst);
946 req->state = NVME_TCP_SEND_DDGST;
947 req->offset = 0;
948 } else {
949 nvme_tcp_done_send_req(queue);
950 }
951 return 1;
952 }
953 }
954 return -EAGAIN;
955}
956
957static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
958{
959 struct nvme_tcp_queue *queue = req->queue;
960 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
961 bool inline_data = nvme_tcp_has_inline_data(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800962 u8 hdgst = nvme_tcp_hdgst_len(queue);
963 int len = sizeof(*pdu) + hdgst - req->offset;
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700964 int flags = MSG_DONTWAIT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800965 int ret;
966
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700967 if (inline_data || nvme_tcp_queue_more(queue))
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700968 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
969 else
970 flags |= MSG_EOR;
971
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800972 if (queue->hdr_digest && !req->offset)
973 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
974
975 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
976 offset_in_page(pdu) + req->offset, len, flags);
977 if (unlikely(ret <= 0))
978 return ret;
979
980 len -= ret;
981 if (!len) {
982 if (inline_data) {
983 req->state = NVME_TCP_SEND_DATA;
984 if (queue->data_digest)
985 crypto_ahash_init(queue->snd_hash);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800986 } else {
987 nvme_tcp_done_send_req(queue);
988 }
989 return 1;
990 }
991 req->offset += ret;
992
993 return -EAGAIN;
994}
995
996static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
997{
998 struct nvme_tcp_queue *queue = req->queue;
999 struct nvme_tcp_data_pdu *pdu = req->pdu;
1000 u8 hdgst = nvme_tcp_hdgst_len(queue);
1001 int len = sizeof(*pdu) - req->offset + hdgst;
1002 int ret;
1003
1004 if (queue->hdr_digest && !req->offset)
1005 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
1006
1007 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
1008 offset_in_page(pdu) + req->offset, len,
Sagi Grimberg5bb052d2020-05-04 22:20:01 -07001009 MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001010 if (unlikely(ret <= 0))
1011 return ret;
1012
1013 len -= ret;
1014 if (!len) {
1015 req->state = NVME_TCP_SEND_DATA;
1016 if (queue->data_digest)
1017 crypto_ahash_init(queue->snd_hash);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001018 return 1;
1019 }
1020 req->offset += ret;
1021
1022 return -EAGAIN;
1023}
1024
1025static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
1026{
1027 struct nvme_tcp_queue *queue = req->queue;
1028 int ret;
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001029 struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001030 struct kvec iov = {
1031 .iov_base = &req->ddgst + req->offset,
1032 .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
1033 };
1034
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001035 if (nvme_tcp_queue_more(queue))
1036 msg.msg_flags |= MSG_MORE;
1037 else
1038 msg.msg_flags |= MSG_EOR;
1039
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001040 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1041 if (unlikely(ret <= 0))
1042 return ret;
1043
1044 if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
1045 nvme_tcp_done_send_req(queue);
1046 return 1;
1047 }
1048
1049 req->offset += ret;
1050 return -EAGAIN;
1051}
1052
1053static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
1054{
1055 struct nvme_tcp_request *req;
1056 int ret = 1;
1057
1058 if (!queue->request) {
1059 queue->request = nvme_tcp_fetch_request(queue);
1060 if (!queue->request)
1061 return 0;
1062 }
1063 req = queue->request;
1064
1065 if (req->state == NVME_TCP_SEND_CMD_PDU) {
1066 ret = nvme_tcp_try_send_cmd_pdu(req);
1067 if (ret <= 0)
1068 goto done;
1069 if (!nvme_tcp_has_inline_data(req))
1070 return ret;
1071 }
1072
1073 if (req->state == NVME_TCP_SEND_H2C_PDU) {
1074 ret = nvme_tcp_try_send_data_pdu(req);
1075 if (ret <= 0)
1076 goto done;
1077 }
1078
1079 if (req->state == NVME_TCP_SEND_DATA) {
1080 ret = nvme_tcp_try_send_data(req);
1081 if (ret <= 0)
1082 goto done;
1083 }
1084
1085 if (req->state == NVME_TCP_SEND_DDGST)
1086 ret = nvme_tcp_try_send_ddgst(req);
1087done:
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001088 if (ret == -EAGAIN) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001089 ret = 0;
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001090 } else if (ret < 0) {
1091 dev_err(queue->ctrl->ctrl.device,
1092 "failed to send request %d\n", ret);
1093 if (ret != -EPIPE && ret != -ECONNRESET)
1094 nvme_tcp_fail_request(queue->request);
1095 nvme_tcp_done_send_req(queue);
1096 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001097 return ret;
1098}
1099
1100static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
1101{
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301102 struct socket *sock = queue->sock;
1103 struct sock *sk = sock->sk;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001104 read_descriptor_t rd_desc;
1105 int consumed;
1106
1107 rd_desc.arg.data = queue;
1108 rd_desc.count = 1;
1109 lock_sock(sk);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001110 queue->nr_cqe = 0;
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301111 consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001112 release_sock(sk);
1113 return consumed;
1114}
1115
1116static void nvme_tcp_io_work(struct work_struct *w)
1117{
1118 struct nvme_tcp_queue *queue =
1119 container_of(w, struct nvme_tcp_queue, io_work);
Wunderlich, Markddef2952019-09-18 23:36:37 +00001120 unsigned long deadline = jiffies + msecs_to_jiffies(1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001121
1122 do {
1123 bool pending = false;
1124 int result;
1125
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001126 if (mutex_trylock(&queue->send_mutex)) {
1127 result = nvme_tcp_try_send(queue);
1128 mutex_unlock(&queue->send_mutex);
1129 if (result > 0)
1130 pending = true;
1131 else if (unlikely(result < 0))
1132 break;
1133 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001134
1135 result = nvme_tcp_try_recv(queue);
1136 if (result > 0)
1137 pending = true;
Sagi Grimberg761ad262020-02-25 16:43:24 -08001138 else if (unlikely(result < 0))
Sagi Grimberg39d06079a2020-03-31 22:44:23 -07001139 return;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001140
1141 if (!pending)
1142 return;
1143
Wunderlich, Markddef2952019-09-18 23:36:37 +00001144 } while (!time_after(jiffies, deadline)); /* quota is exhausted */
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001145
1146 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
1147}
1148
1149static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
1150{
1151 struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
1152
1153 ahash_request_free(queue->rcv_hash);
1154 ahash_request_free(queue->snd_hash);
1155 crypto_free_ahash(tfm);
1156}
1157
1158static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
1159{
1160 struct crypto_ahash *tfm;
1161
1162 tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
1163 if (IS_ERR(tfm))
1164 return PTR_ERR(tfm);
1165
1166 queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1167 if (!queue->snd_hash)
1168 goto free_tfm;
1169 ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
1170
1171 queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1172 if (!queue->rcv_hash)
1173 goto free_snd_hash;
1174 ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
1175
1176 return 0;
1177free_snd_hash:
1178 ahash_request_free(queue->snd_hash);
1179free_tfm:
1180 crypto_free_ahash(tfm);
1181 return -ENOMEM;
1182}
1183
1184static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
1185{
1186 struct nvme_tcp_request *async = &ctrl->async_req;
1187
1188 page_frag_free(async->pdu);
1189}
1190
1191static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
1192{
1193 struct nvme_tcp_queue *queue = &ctrl->queues[0];
1194 struct nvme_tcp_request *async = &ctrl->async_req;
1195 u8 hdgst = nvme_tcp_hdgst_len(queue);
1196
1197 async->pdu = page_frag_alloc(&queue->pf_cache,
1198 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
1199 GFP_KERNEL | __GFP_ZERO);
1200 if (!async->pdu)
1201 return -ENOMEM;
1202
1203 async->queue = &ctrl->queues[0];
1204 return 0;
1205}
1206
1207static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
1208{
1209 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1210 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1211
1212 if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
1213 return;
1214
1215 if (queue->hdr_digest || queue->data_digest)
1216 nvme_tcp_free_crypto(queue);
1217
1218 sock_release(queue->sock);
1219 kfree(queue->pdu);
Chao Leng9ebbfe42021-01-14 17:09:26 +08001220 mutex_destroy(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001221}
1222
1223static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
1224{
1225 struct nvme_tcp_icreq_pdu *icreq;
1226 struct nvme_tcp_icresp_pdu *icresp;
1227 struct msghdr msg = {};
1228 struct kvec iov;
1229 bool ctrl_hdgst, ctrl_ddgst;
1230 int ret;
1231
1232 icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
1233 if (!icreq)
1234 return -ENOMEM;
1235
1236 icresp = kzalloc(sizeof(*icresp), GFP_KERNEL);
1237 if (!icresp) {
1238 ret = -ENOMEM;
1239 goto free_icreq;
1240 }
1241
1242 icreq->hdr.type = nvme_tcp_icreq;
1243 icreq->hdr.hlen = sizeof(*icreq);
1244 icreq->hdr.pdo = 0;
1245 icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen);
1246 icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
1247 icreq->maxr2t = 0; /* single inflight r2t supported */
1248 icreq->hpda = 0; /* no alignment constraint */
1249 if (queue->hdr_digest)
1250 icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
1251 if (queue->data_digest)
1252 icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
1253
1254 iov.iov_base = icreq;
1255 iov.iov_len = sizeof(*icreq);
1256 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1257 if (ret < 0)
1258 goto free_icresp;
1259
1260 memset(&msg, 0, sizeof(msg));
1261 iov.iov_base = icresp;
1262 iov.iov_len = sizeof(*icresp);
1263 ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
1264 iov.iov_len, msg.msg_flags);
1265 if (ret < 0)
1266 goto free_icresp;
1267
1268 ret = -EINVAL;
1269 if (icresp->hdr.type != nvme_tcp_icresp) {
1270 pr_err("queue %d: bad type returned %d\n",
1271 nvme_tcp_queue_id(queue), icresp->hdr.type);
1272 goto free_icresp;
1273 }
1274
1275 if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) {
1276 pr_err("queue %d: bad pdu length returned %d\n",
1277 nvme_tcp_queue_id(queue), icresp->hdr.plen);
1278 goto free_icresp;
1279 }
1280
1281 if (icresp->pfv != NVME_TCP_PFV_1_0) {
1282 pr_err("queue %d: bad pfv returned %d\n",
1283 nvme_tcp_queue_id(queue), icresp->pfv);
1284 goto free_icresp;
1285 }
1286
1287 ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE);
1288 if ((queue->data_digest && !ctrl_ddgst) ||
1289 (!queue->data_digest && ctrl_ddgst)) {
1290 pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n",
1291 nvme_tcp_queue_id(queue),
1292 queue->data_digest ? "enabled" : "disabled",
1293 ctrl_ddgst ? "enabled" : "disabled");
1294 goto free_icresp;
1295 }
1296
1297 ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE);
1298 if ((queue->hdr_digest && !ctrl_hdgst) ||
1299 (!queue->hdr_digest && ctrl_hdgst)) {
1300 pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n",
1301 nvme_tcp_queue_id(queue),
1302 queue->hdr_digest ? "enabled" : "disabled",
1303 ctrl_hdgst ? "enabled" : "disabled");
1304 goto free_icresp;
1305 }
1306
1307 if (icresp->cpda != 0) {
1308 pr_err("queue %d: unsupported cpda returned %d\n",
1309 nvme_tcp_queue_id(queue), icresp->cpda);
1310 goto free_icresp;
1311 }
1312
1313 ret = 0;
1314free_icresp:
1315 kfree(icresp);
1316free_icreq:
1317 kfree(icreq);
1318 return ret;
1319}
1320
Sagi Grimberg40510a62020-02-25 15:53:09 -08001321static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
1322{
1323 return nvme_tcp_queue_id(queue) == 0;
1324}
1325
1326static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
1327{
1328 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1329 int qid = nvme_tcp_queue_id(queue);
1330
1331 return !nvme_tcp_admin_queue(queue) &&
1332 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
1333}
1334
1335static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
1336{
1337 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1338 int qid = nvme_tcp_queue_id(queue);
1339
1340 return !nvme_tcp_admin_queue(queue) &&
1341 !nvme_tcp_default_queue(queue) &&
1342 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1343 ctrl->io_queues[HCTX_TYPE_READ];
1344}
1345
1346static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
1347{
1348 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1349 int qid = nvme_tcp_queue_id(queue);
1350
1351 return !nvme_tcp_admin_queue(queue) &&
1352 !nvme_tcp_default_queue(queue) &&
1353 !nvme_tcp_read_queue(queue) &&
1354 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1355 ctrl->io_queues[HCTX_TYPE_READ] +
1356 ctrl->io_queues[HCTX_TYPE_POLL];
1357}
1358
1359static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
1360{
1361 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1362 int qid = nvme_tcp_queue_id(queue);
1363 int n = 0;
1364
1365 if (nvme_tcp_default_queue(queue))
1366 n = qid - 1;
1367 else if (nvme_tcp_read_queue(queue))
1368 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
1369 else if (nvme_tcp_poll_queue(queue))
1370 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
1371 ctrl->io_queues[HCTX_TYPE_READ] - 1;
1372 queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
1373}
1374
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001375static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
1376 int qid, size_t queue_size)
1377{
1378 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1379 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001380 int ret, rcv_pdu_size;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001381
Chao Leng9ebbfe42021-01-14 17:09:26 +08001382 mutex_init(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001383 queue->ctrl = ctrl;
Sagi Grimberg15ec9282020-06-18 17:30:22 -07001384 init_llist_head(&queue->req_list);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001385 INIT_LIST_HEAD(&queue->send_list);
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001386 mutex_init(&queue->send_mutex);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001387 INIT_WORK(&queue->io_work, nvme_tcp_io_work);
1388 queue->queue_size = queue_size;
1389
1390 if (qid > 0)
Israel Rukshin9924b032019-08-18 12:08:53 +03001391 queue->cmnd_capsule_len = nctrl->ioccsz * 16;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001392 else
1393 queue->cmnd_capsule_len = sizeof(struct nvme_command) +
1394 NVME_TCP_ADMIN_CCSZ;
1395
1396 ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
1397 IPPROTO_TCP, &queue->sock);
1398 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001399 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001400 "failed to create socket: %d\n", ret);
Chao Leng9ebbfe42021-01-14 17:09:26 +08001401 goto err_destroy_mutex;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001402 }
1403
1404 /* Single syn retry */
Christoph Hellwig557eadf2020-05-28 07:12:21 +02001405 tcp_sock_set_syncnt(queue->sock->sk, 1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001406
1407 /* Set TCP no delay */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001408 tcp_sock_set_nodelay(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001409
1410 /*
1411 * Cleanup whatever is sitting in the TCP transmit queue on socket
1412 * close. This is done to prevent stale data from being sent should
1413 * the network connection be restored before TCP times out.
1414 */
Christoph Hellwigc4335942020-05-28 07:12:10 +02001415 sock_no_linger(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001416
Christoph Hellwig6e434962020-05-28 07:12:11 +02001417 if (so_priority > 0)
1418 sock_set_priority(queue->sock->sk, so_priority);
Wunderlich, Mark9912ade2020-01-16 00:46:12 +00001419
Israel Rukshinbb139852019-08-18 12:08:54 +03001420 /* Set socket type of service */
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001421 if (nctrl->opts->tos >= 0)
1422 ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
Israel Rukshinbb139852019-08-18 12:08:54 +03001423
Sagi Grimbergadc99fd2020-07-23 16:42:26 -07001424 /* Set 10 seconds timeout for icresp recvmsg */
1425 queue->sock->sk->sk_rcvtimeo = 10 * HZ;
1426
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001427 queue->sock->sk->sk_allocation = GFP_ATOMIC;
Sagi Grimberg40510a62020-02-25 15:53:09 -08001428 nvme_tcp_set_queue_io_cpu(queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001429 queue->request = NULL;
1430 queue->data_remaining = 0;
1431 queue->ddgst_remaining = 0;
1432 queue->pdu_remaining = 0;
1433 queue->pdu_offset = 0;
1434 sk_set_memalloc(queue->sock->sk);
1435
Israel Rukshin9924b032019-08-18 12:08:53 +03001436 if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001437 ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
1438 sizeof(ctrl->src_addr));
1439 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001440 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001441 "failed to bind queue %d socket %d\n",
1442 qid, ret);
1443 goto err_sock;
1444 }
1445 }
1446
1447 queue->hdr_digest = nctrl->opts->hdr_digest;
1448 queue->data_digest = nctrl->opts->data_digest;
1449 if (queue->hdr_digest || queue->data_digest) {
1450 ret = nvme_tcp_alloc_crypto(queue);
1451 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001452 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001453 "failed to allocate queue %d crypto\n", qid);
1454 goto err_sock;
1455 }
1456 }
1457
1458 rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
1459 nvme_tcp_hdgst_len(queue);
1460 queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
1461 if (!queue->pdu) {
1462 ret = -ENOMEM;
1463 goto err_crypto;
1464 }
1465
Israel Rukshin9924b032019-08-18 12:08:53 +03001466 dev_dbg(nctrl->device, "connecting queue %d\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001467 nvme_tcp_queue_id(queue));
1468
1469 ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
1470 sizeof(ctrl->addr), 0);
1471 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001472 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001473 "failed to connect socket: %d\n", ret);
1474 goto err_rcv_pdu;
1475 }
1476
1477 ret = nvme_tcp_init_connection(queue);
1478 if (ret)
1479 goto err_init_connect;
1480
1481 queue->rd_enabled = true;
1482 set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
1483 nvme_tcp_init_recv_ctx(queue);
1484
1485 write_lock_bh(&queue->sock->sk->sk_callback_lock);
1486 queue->sock->sk->sk_user_data = queue;
1487 queue->state_change = queue->sock->sk->sk_state_change;
1488 queue->data_ready = queue->sock->sk->sk_data_ready;
1489 queue->write_space = queue->sock->sk->sk_write_space;
1490 queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
1491 queue->sock->sk->sk_state_change = nvme_tcp_state_change;
1492 queue->sock->sk->sk_write_space = nvme_tcp_write_space;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001493#ifdef CONFIG_NET_RX_BUSY_POLL
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001494 queue->sock->sk->sk_ll_usec = 1;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001495#endif
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001496 write_unlock_bh(&queue->sock->sk->sk_callback_lock);
1497
1498 return 0;
1499
1500err_init_connect:
1501 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1502err_rcv_pdu:
1503 kfree(queue->pdu);
1504err_crypto:
1505 if (queue->hdr_digest || queue->data_digest)
1506 nvme_tcp_free_crypto(queue);
1507err_sock:
1508 sock_release(queue->sock);
1509 queue->sock = NULL;
Chao Leng9ebbfe42021-01-14 17:09:26 +08001510err_destroy_mutex:
1511 mutex_destroy(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001512 return ret;
1513}
1514
1515static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
1516{
1517 struct socket *sock = queue->sock;
1518
1519 write_lock_bh(&sock->sk->sk_callback_lock);
1520 sock->sk->sk_user_data = NULL;
1521 sock->sk->sk_data_ready = queue->data_ready;
1522 sock->sk->sk_state_change = queue->state_change;
1523 sock->sk->sk_write_space = queue->write_space;
1524 write_unlock_bh(&sock->sk->sk_callback_lock);
1525}
1526
1527static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
1528{
1529 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1530 nvme_tcp_restore_sock_calls(queue);
1531 cancel_work_sync(&queue->io_work);
1532}
1533
1534static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
1535{
1536 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1537 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1538
Chao Leng9ebbfe42021-01-14 17:09:26 +08001539 mutex_lock(&queue->queue_lock);
1540 if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
1541 __nvme_tcp_stop_queue(queue);
1542 mutex_unlock(&queue->queue_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001543}
1544
1545static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
1546{
1547 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1548 int ret;
1549
1550 if (idx)
Sagi Grimberg26c68222018-12-14 11:06:08 -08001551 ret = nvmf_connect_io_queue(nctrl, idx, false);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001552 else
1553 ret = nvmf_connect_admin_queue(nctrl);
1554
1555 if (!ret) {
1556 set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
1557 } else {
Sagi Grimbergf34e2582019-04-29 16:25:48 -07001558 if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
1559 __nvme_tcp_stop_queue(&ctrl->queues[idx]);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001560 dev_err(nctrl->device,
1561 "failed to connect queue: %d ret=%d\n", idx, ret);
1562 }
1563 return ret;
1564}
1565
1566static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
1567 bool admin)
1568{
1569 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1570 struct blk_mq_tag_set *set;
1571 int ret;
1572
1573 if (admin) {
1574 set = &ctrl->admin_tag_set;
1575 memset(set, 0, sizeof(*set));
1576 set->ops = &nvme_tcp_admin_mq_ops;
1577 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
1578 set->reserved_tags = 2; /* connect + keep-alive */
Max Gurtovoy610c8232020-06-16 12:34:24 +03001579 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001580 set->flags = BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001581 set->cmd_size = sizeof(struct nvme_tcp_request);
1582 set->driver_data = ctrl;
1583 set->nr_hw_queues = 1;
Chaitanya Kulkarnidc96f932020-11-09 16:33:45 -08001584 set->timeout = NVME_ADMIN_TIMEOUT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001585 } else {
1586 set = &ctrl->tag_set;
1587 memset(set, 0, sizeof(*set));
1588 set->ops = &nvme_tcp_mq_ops;
1589 set->queue_depth = nctrl->sqsize + 1;
1590 set->reserved_tags = 1; /* fabric connect */
Max Gurtovoy610c8232020-06-16 12:34:24 +03001591 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001592 set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001593 set->cmd_size = sizeof(struct nvme_tcp_request);
1594 set->driver_data = ctrl;
1595 set->nr_hw_queues = nctrl->queue_count - 1;
1596 set->timeout = NVME_IO_TIMEOUT;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001597 set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001598 }
1599
1600 ret = blk_mq_alloc_tag_set(set);
1601 if (ret)
1602 return ERR_PTR(ret);
1603
1604 return set;
1605}
1606
1607static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
1608{
1609 if (to_tcp_ctrl(ctrl)->async_req.pdu) {
David Milburnceb1e082020-09-02 17:42:53 -05001610 cancel_work_sync(&ctrl->async_event_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001611 nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
1612 to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
1613 }
1614
1615 nvme_tcp_free_queue(ctrl, 0);
1616}
1617
1618static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
1619{
1620 int i;
1621
1622 for (i = 1; i < ctrl->queue_count; i++)
1623 nvme_tcp_free_queue(ctrl, i);
1624}
1625
1626static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
1627{
1628 int i;
1629
1630 for (i = 1; i < ctrl->queue_count; i++)
1631 nvme_tcp_stop_queue(ctrl, i);
1632}
1633
1634static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
1635{
1636 int i, ret = 0;
1637
1638 for (i = 1; i < ctrl->queue_count; i++) {
1639 ret = nvme_tcp_start_queue(ctrl, i);
1640 if (ret)
1641 goto out_stop_queues;
1642 }
1643
1644 return 0;
1645
1646out_stop_queues:
1647 for (i--; i >= 1; i--)
1648 nvme_tcp_stop_queue(ctrl, i);
1649 return ret;
1650}
1651
1652static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
1653{
1654 int ret;
1655
1656 ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
1657 if (ret)
1658 return ret;
1659
1660 ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
1661 if (ret)
1662 goto out_free_queue;
1663
1664 return 0;
1665
1666out_free_queue:
1667 nvme_tcp_free_queue(ctrl, 0);
1668 return ret;
1669}
1670
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001671static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001672{
1673 int i, ret;
1674
1675 for (i = 1; i < ctrl->queue_count; i++) {
1676 ret = nvme_tcp_alloc_queue(ctrl, i,
1677 ctrl->sqsize + 1);
1678 if (ret)
1679 goto out_free_queues;
1680 }
1681
1682 return 0;
1683
1684out_free_queues:
1685 for (i--; i >= 1; i--)
1686 nvme_tcp_free_queue(ctrl, i);
1687
1688 return ret;
1689}
1690
1691static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
1692{
Sagi Grimberg873946f2018-12-11 23:38:57 -08001693 unsigned int nr_io_queues;
1694
1695 nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
1696 nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001697 nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
Sagi Grimberg873946f2018-12-11 23:38:57 -08001698
1699 return nr_io_queues;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001700}
1701
Sagi Grimberg64861992019-05-28 22:49:05 -07001702static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
1703 unsigned int nr_io_queues)
1704{
1705 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1706 struct nvmf_ctrl_options *opts = nctrl->opts;
1707
1708 if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
1709 /*
1710 * separate read/write queues
1711 * hand out dedicated default queues only after we have
1712 * sufficient read queues.
1713 */
1714 ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
1715 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
1716 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1717 min(opts->nr_write_queues, nr_io_queues);
1718 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1719 } else {
1720 /*
1721 * shared read/write queues
1722 * either no write queues were requested, or we don't have
1723 * sufficient queue count to have dedicated default queues.
1724 */
1725 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1726 min(opts->nr_io_queues, nr_io_queues);
1727 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1728 }
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001729
1730 if (opts->nr_poll_queues && nr_io_queues) {
1731 /* map dedicated poll queues only if we have queues left */
1732 ctrl->io_queues[HCTX_TYPE_POLL] =
1733 min(opts->nr_poll_queues, nr_io_queues);
1734 }
Sagi Grimberg64861992019-05-28 22:49:05 -07001735}
1736
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001737static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001738{
1739 unsigned int nr_io_queues;
1740 int ret;
1741
1742 nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
1743 ret = nvme_set_queue_count(ctrl, &nr_io_queues);
1744 if (ret)
1745 return ret;
1746
1747 ctrl->queue_count = nr_io_queues + 1;
1748 if (ctrl->queue_count < 2)
1749 return 0;
1750
1751 dev_info(ctrl->device,
1752 "creating %d I/O queues.\n", nr_io_queues);
1753
Sagi Grimberg64861992019-05-28 22:49:05 -07001754 nvme_tcp_set_io_queues(ctrl, nr_io_queues);
1755
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001756 return __nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001757}
1758
1759static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
1760{
1761 nvme_tcp_stop_io_queues(ctrl);
1762 if (remove) {
Sagi Grimberge85037a2018-12-31 23:58:30 -08001763 blk_cleanup_queue(ctrl->connect_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001764 blk_mq_free_tag_set(ctrl->tagset);
1765 }
1766 nvme_tcp_free_io_queues(ctrl);
1767}
1768
1769static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
1770{
1771 int ret;
1772
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001773 ret = nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001774 if (ret)
1775 return ret;
1776
1777 if (new) {
1778 ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
1779 if (IS_ERR(ctrl->tagset)) {
1780 ret = PTR_ERR(ctrl->tagset);
1781 goto out_free_io_queues;
1782 }
1783
Sagi Grimberge85037a2018-12-31 23:58:30 -08001784 ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
1785 if (IS_ERR(ctrl->connect_q)) {
1786 ret = PTR_ERR(ctrl->connect_q);
1787 goto out_free_tag_set;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001788 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001789 }
1790
1791 ret = nvme_tcp_start_io_queues(ctrl);
1792 if (ret)
1793 goto out_cleanup_connect_q;
1794
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001795 if (!new) {
1796 nvme_start_queues(ctrl);
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001797 if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
1798 /*
1799 * If we timed out waiting for freeze we are likely to
1800 * be stuck. Fail the controller initialization just
1801 * to be safe.
1802 */
1803 ret = -ENODEV;
1804 goto out_wait_freeze_timed_out;
1805 }
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001806 blk_mq_update_nr_hw_queues(ctrl->tagset,
1807 ctrl->queue_count - 1);
1808 nvme_unfreeze(ctrl);
1809 }
1810
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001811 return 0;
1812
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001813out_wait_freeze_timed_out:
1814 nvme_stop_queues(ctrl);
Chao Leng70a99572021-01-21 11:32:38 +08001815 nvme_sync_io_queues(ctrl);
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001816 nvme_tcp_stop_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001817out_cleanup_connect_q:
Chao Leng70a99572021-01-21 11:32:38 +08001818 nvme_cancel_tagset(ctrl);
Sagi Grimberge85037a2018-12-31 23:58:30 -08001819 if (new)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001820 blk_cleanup_queue(ctrl->connect_q);
1821out_free_tag_set:
1822 if (new)
1823 blk_mq_free_tag_set(ctrl->tagset);
1824out_free_io_queues:
1825 nvme_tcp_free_io_queues(ctrl);
1826 return ret;
1827}
1828
1829static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
1830{
1831 nvme_tcp_stop_queue(ctrl, 0);
1832 if (remove) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001833 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001834 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001835 blk_mq_free_tag_set(ctrl->admin_tagset);
1836 }
1837 nvme_tcp_free_admin_queue(ctrl);
1838}
1839
1840static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
1841{
1842 int error;
1843
1844 error = nvme_tcp_alloc_admin_queue(ctrl);
1845 if (error)
1846 return error;
1847
1848 if (new) {
1849 ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
1850 if (IS_ERR(ctrl->admin_tagset)) {
1851 error = PTR_ERR(ctrl->admin_tagset);
1852 goto out_free_queue;
1853 }
1854
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001855 ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
1856 if (IS_ERR(ctrl->fabrics_q)) {
1857 error = PTR_ERR(ctrl->fabrics_q);
1858 goto out_free_tagset;
1859 }
1860
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001861 ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
1862 if (IS_ERR(ctrl->admin_q)) {
1863 error = PTR_ERR(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001864 goto out_cleanup_fabrics_q;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001865 }
1866 }
1867
1868 error = nvme_tcp_start_queue(ctrl, 0);
1869 if (error)
1870 goto out_cleanup_queue;
1871
Sagi Grimbergc0f2f452019-07-22 17:06:53 -07001872 error = nvme_enable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001873 if (error)
1874 goto out_stop_queue;
1875
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001876 blk_mq_unquiesce_queue(ctrl->admin_q);
1877
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001878 error = nvme_init_identify(ctrl);
1879 if (error)
Chao Leng70a99572021-01-21 11:32:38 +08001880 goto out_quiesce_queue;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001881
1882 return 0;
1883
Chao Leng70a99572021-01-21 11:32:38 +08001884out_quiesce_queue:
1885 blk_mq_quiesce_queue(ctrl->admin_q);
1886 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001887out_stop_queue:
1888 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng70a99572021-01-21 11:32:38 +08001889 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001890out_cleanup_queue:
1891 if (new)
1892 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001893out_cleanup_fabrics_q:
1894 if (new)
1895 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001896out_free_tagset:
1897 if (new)
1898 blk_mq_free_tag_set(ctrl->admin_tagset);
1899out_free_queue:
1900 nvme_tcp_free_admin_queue(ctrl);
1901 return error;
1902}
1903
1904static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
1905 bool remove)
1906{
1907 blk_mq_quiesce_queue(ctrl->admin_q);
Chao Lengd6f66212020-10-22 10:15:15 +08001908 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001909 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng563c8152021-01-21 11:32:40 +08001910 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001911 if (remove)
1912 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001913 nvme_tcp_destroy_admin_queue(ctrl, remove);
1914}
1915
1916static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
1917 bool remove)
1918{
1919 if (ctrl->queue_count <= 1)
Chao Lengd6f66212020-10-22 10:15:15 +08001920 return;
Sagi Grimbergd4d61472020-08-05 18:13:48 -07001921 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001922 nvme_start_freeze(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001923 nvme_stop_queues(ctrl);
Chao Lengd6f66212020-10-22 10:15:15 +08001924 nvme_sync_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001925 nvme_tcp_stop_io_queues(ctrl);
Chao Leng563c8152021-01-21 11:32:40 +08001926 nvme_cancel_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001927 if (remove)
1928 nvme_start_queues(ctrl);
1929 nvme_tcp_destroy_io_queues(ctrl, remove);
1930}
1931
1932static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
1933{
1934 /* If we are resetting/deleting then do nothing */
1935 if (ctrl->state != NVME_CTRL_CONNECTING) {
1936 WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
1937 ctrl->state == NVME_CTRL_LIVE);
1938 return;
1939 }
1940
1941 if (nvmf_should_reconnect(ctrl)) {
1942 dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
1943 ctrl->opts->reconnect_delay);
1944 queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
1945 ctrl->opts->reconnect_delay * HZ);
1946 } else {
1947 dev_info(ctrl->device, "Removing controller...\n");
1948 nvme_delete_ctrl(ctrl);
1949 }
1950}
1951
1952static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
1953{
1954 struct nvmf_ctrl_options *opts = ctrl->opts;
Colin Ian King312910f2019-09-05 15:34:35 +01001955 int ret;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001956
1957 ret = nvme_tcp_configure_admin_queue(ctrl, new);
1958 if (ret)
1959 return ret;
1960
1961 if (ctrl->icdoff) {
1962 dev_err(ctrl->device, "icdoff is not supported!\n");
1963 goto destroy_admin;
1964 }
1965
1966 if (opts->queue_size > ctrl->sqsize + 1)
1967 dev_warn(ctrl->device,
1968 "queue_size %zu > ctrl sqsize %u, clamping down\n",
1969 opts->queue_size, ctrl->sqsize + 1);
1970
1971 if (ctrl->sqsize + 1 > ctrl->maxcmd) {
1972 dev_warn(ctrl->device,
1973 "sqsize %u > ctrl maxcmd %u, clamping down\n",
1974 ctrl->sqsize + 1, ctrl->maxcmd);
1975 ctrl->sqsize = ctrl->maxcmd - 1;
1976 }
1977
1978 if (ctrl->queue_count > 1) {
1979 ret = nvme_tcp_configure_io_queues(ctrl, new);
1980 if (ret)
1981 goto destroy_admin;
1982 }
1983
1984 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001985 /*
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001986 * state change failure is ok if we started ctrl delete,
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001987 * unless we're during creation of a new controller to
1988 * avoid races with teardown flow.
1989 */
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001990 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
1991 ctrl->state != NVME_CTRL_DELETING_NOIO);
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001992 WARN_ON_ONCE(new);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001993 ret = -EINVAL;
1994 goto destroy_io;
1995 }
1996
1997 nvme_start_ctrl(ctrl);
1998 return 0;
1999
2000destroy_io:
Chao Leng70a99572021-01-21 11:32:38 +08002001 if (ctrl->queue_count > 1) {
2002 nvme_stop_queues(ctrl);
2003 nvme_sync_io_queues(ctrl);
2004 nvme_tcp_stop_io_queues(ctrl);
2005 nvme_cancel_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002006 nvme_tcp_destroy_io_queues(ctrl, new);
Chao Leng70a99572021-01-21 11:32:38 +08002007 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002008destroy_admin:
Chao Leng70a99572021-01-21 11:32:38 +08002009 blk_mq_quiesce_queue(ctrl->admin_q);
2010 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002011 nvme_tcp_stop_queue(ctrl, 0);
Chao Leng70a99572021-01-21 11:32:38 +08002012 nvme_cancel_admin_tagset(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002013 nvme_tcp_destroy_admin_queue(ctrl, new);
2014 return ret;
2015}
2016
2017static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
2018{
2019 struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
2020 struct nvme_tcp_ctrl, connect_work);
2021 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2022
2023 ++ctrl->nr_reconnects;
2024
2025 if (nvme_tcp_setup_ctrl(ctrl, false))
2026 goto requeue;
2027
Colin Ian King56a77d22018-12-14 11:42:43 +00002028 dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002029 ctrl->nr_reconnects);
2030
2031 ctrl->nr_reconnects = 0;
2032
2033 return;
2034
2035requeue:
2036 dev_info(ctrl->device, "Failed reconnect attempt %d\n",
2037 ctrl->nr_reconnects);
2038 nvme_tcp_reconnect_or_remove(ctrl);
2039}
2040
2041static void nvme_tcp_error_recovery_work(struct work_struct *work)
2042{
2043 struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
2044 struct nvme_tcp_ctrl, err_work);
2045 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2046
2047 nvme_stop_keep_alive(ctrl);
2048 nvme_tcp_teardown_io_queues(ctrl, false);
2049 /* unquiesce to fail fast pending requests */
2050 nvme_start_queues(ctrl);
2051 nvme_tcp_teardown_admin_queue(ctrl, false);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002052 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002053
2054 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002055 /* state change failure is ok if we started ctrl delete */
2056 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2057 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002058 return;
2059 }
2060
2061 nvme_tcp_reconnect_or_remove(ctrl);
2062}
2063
2064static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
2065{
Sagi Grimberg794a4cb2019-01-01 00:19:30 -08002066 cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
2067 cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
2068
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002069 nvme_tcp_teardown_io_queues(ctrl, shutdown);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002070 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002071 if (shutdown)
2072 nvme_shutdown_ctrl(ctrl);
2073 else
Sagi Grimbergb5b05042019-07-22 17:06:54 -07002074 nvme_disable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002075 nvme_tcp_teardown_admin_queue(ctrl, shutdown);
2076}
2077
2078static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
2079{
2080 nvme_tcp_teardown_ctrl(ctrl, true);
2081}
2082
2083static void nvme_reset_ctrl_work(struct work_struct *work)
2084{
2085 struct nvme_ctrl *ctrl =
2086 container_of(work, struct nvme_ctrl, reset_work);
2087
2088 nvme_stop_ctrl(ctrl);
2089 nvme_tcp_teardown_ctrl(ctrl, false);
2090
2091 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002092 /* state change failure is ok if we started ctrl delete */
2093 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2094 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002095 return;
2096 }
2097
2098 if (nvme_tcp_setup_ctrl(ctrl, false))
2099 goto out_fail;
2100
2101 return;
2102
2103out_fail:
2104 ++ctrl->nr_reconnects;
2105 nvme_tcp_reconnect_or_remove(ctrl);
2106}
2107
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002108static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
2109{
2110 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
2111
2112 if (list_empty(&ctrl->list))
2113 goto free_ctrl;
2114
2115 mutex_lock(&nvme_tcp_ctrl_mutex);
2116 list_del(&ctrl->list);
2117 mutex_unlock(&nvme_tcp_ctrl_mutex);
2118
2119 nvmf_free_options(nctrl->opts);
2120free_ctrl:
2121 kfree(ctrl->queues);
2122 kfree(ctrl);
2123}
2124
2125static void nvme_tcp_set_sg_null(struct nvme_command *c)
2126{
2127 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2128
2129 sg->addr = 0;
2130 sg->length = 0;
2131 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2132 NVME_SGL_FMT_TRANSPORT_A;
2133}
2134
2135static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue,
2136 struct nvme_command *c, u32 data_len)
2137{
2138 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2139
2140 sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
2141 sg->length = cpu_to_le32(data_len);
2142 sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
2143}
2144
2145static void nvme_tcp_set_sg_host_data(struct nvme_command *c,
2146 u32 data_len)
2147{
2148 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2149
2150 sg->addr = 0;
2151 sg->length = cpu_to_le32(data_len);
2152 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2153 NVME_SGL_FMT_TRANSPORT_A;
2154}
2155
2156static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
2157{
2158 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg);
2159 struct nvme_tcp_queue *queue = &ctrl->queues[0];
2160 struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu;
2161 struct nvme_command *cmd = &pdu->cmd;
2162 u8 hdgst = nvme_tcp_hdgst_len(queue);
2163
2164 memset(pdu, 0, sizeof(*pdu));
2165 pdu->hdr.type = nvme_tcp_cmd;
2166 if (queue->hdr_digest)
2167 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2168 pdu->hdr.hlen = sizeof(*pdu);
2169 pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
2170
2171 cmd->common.opcode = nvme_admin_async_event;
2172 cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
2173 cmd->common.flags |= NVME_CMD_SGL_METABUF;
2174 nvme_tcp_set_sg_null(cmd);
2175
2176 ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU;
2177 ctrl->async_req.offset = 0;
2178 ctrl->async_req.curr_bio = NULL;
2179 ctrl->async_req.data_len = 0;
2180
Sagi Grimberg86f03482020-06-18 17:30:23 -07002181 nvme_tcp_queue_request(&ctrl->async_req, true, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002182}
2183
Sagi Grimberg236187c2020-07-28 13:16:36 -07002184static void nvme_tcp_complete_timed_out(struct request *rq)
2185{
2186 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2187 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
2188
Sagi Grimberg236187c2020-07-28 13:16:36 -07002189 nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
Sagi Grimberg0a8a2c852020-10-22 10:15:31 +08002190 if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
Sagi Grimberg236187c2020-07-28 13:16:36 -07002191 nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
2192 blk_mq_complete_request(rq);
2193 }
Sagi Grimberg236187c2020-07-28 13:16:36 -07002194}
2195
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002196static enum blk_eh_timer_return
2197nvme_tcp_timeout(struct request *rq, bool reserved)
2198{
2199 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
Sagi Grimberg236187c2020-07-28 13:16:36 -07002200 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002201 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2202
Sagi Grimberg236187c2020-07-28 13:16:36 -07002203 dev_warn(ctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002204 "queue %d: timeout request %#x type %d\n",
Sagi Grimberg39d57752019-01-08 01:01:30 -08002205 nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002206
Sagi Grimberg236187c2020-07-28 13:16:36 -07002207 if (ctrl->state != NVME_CTRL_LIVE) {
Sagi Grimberg39d57752019-01-08 01:01:30 -08002208 /*
Sagi Grimberg236187c2020-07-28 13:16:36 -07002209 * If we are resetting, connecting or deleting we should
2210 * complete immediately because we may block controller
2211 * teardown or setup sequence
2212 * - ctrl disable/shutdown fabrics requests
2213 * - connect requests
2214 * - initialization admin requests
2215 * - I/O requests that entered after unquiescing and
2216 * the controller stopped responding
2217 *
2218 * All other requests should be cancelled by the error
2219 * recovery work, so it's fine that we fail it here.
Sagi Grimberg39d57752019-01-08 01:01:30 -08002220 */
Sagi Grimberg236187c2020-07-28 13:16:36 -07002221 nvme_tcp_complete_timed_out(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002222 return BLK_EH_DONE;
2223 }
2224
Sagi Grimberg236187c2020-07-28 13:16:36 -07002225 /*
2226 * LIVE state should trigger the normal error recovery which will
2227 * handle completing this request.
2228 */
2229 nvme_tcp_error_recovery(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002230 return BLK_EH_RESET_TIMER;
2231}
2232
2233static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
2234 struct request *rq)
2235{
2236 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2237 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2238 struct nvme_command *c = &pdu->cmd;
2239
2240 c->common.flags |= NVME_CMD_SGL_METABUF;
2241
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002242 if (!blk_rq_nr_phys_segments(rq))
2243 nvme_tcp_set_sg_null(c);
2244 else if (rq_data_dir(rq) == WRITE &&
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002245 req->data_len <= nvme_tcp_inline_data_size(queue))
2246 nvme_tcp_set_sg_inline(queue, c, req->data_len);
2247 else
2248 nvme_tcp_set_sg_host_data(c, req->data_len);
2249
2250 return 0;
2251}
2252
2253static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
2254 struct request *rq)
2255{
2256 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2257 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2258 struct nvme_tcp_queue *queue = req->queue;
2259 u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
2260 blk_status_t ret;
2261
2262 ret = nvme_setup_cmd(ns, rq, &pdu->cmd);
2263 if (ret)
2264 return ret;
2265
2266 req->state = NVME_TCP_SEND_CMD_PDU;
2267 req->offset = 0;
2268 req->data_sent = 0;
2269 req->pdu_len = 0;
2270 req->pdu_sent = 0;
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002271 req->data_len = blk_rq_nr_phys_segments(rq) ?
2272 blk_rq_payload_bytes(rq) : 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002273 req->curr_bio = rq->bio;
Sagi Grimbergcb9b8702021-01-14 13:15:24 -08002274 if (req->curr_bio)
2275 nvme_tcp_init_iter(req, rq_data_dir(rq));
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002276
2277 if (rq_data_dir(rq) == WRITE &&
2278 req->data_len <= nvme_tcp_inline_data_size(queue))
2279 req->pdu_len = req->data_len;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002280
2281 pdu->hdr.type = nvme_tcp_cmd;
2282 pdu->hdr.flags = 0;
2283 if (queue->hdr_digest)
2284 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2285 if (queue->data_digest && req->pdu_len) {
2286 pdu->hdr.flags |= NVME_TCP_F_DDGST;
2287 ddgst = nvme_tcp_ddgst_len(queue);
2288 }
2289 pdu->hdr.hlen = sizeof(*pdu);
2290 pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0;
2291 pdu->hdr.plen =
2292 cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst);
2293
2294 ret = nvme_tcp_map_data(queue, rq);
2295 if (unlikely(ret)) {
Max Gurtovoy28a4cac2019-10-13 19:57:38 +03002296 nvme_cleanup_cmd(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002297 dev_err(queue->ctrl->ctrl.device,
2298 "Failed to map data (%d)\n", ret);
2299 return ret;
2300 }
2301
2302 return 0;
2303}
2304
Sagi Grimberg86f03482020-06-18 17:30:23 -07002305static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
2306{
2307 struct nvme_tcp_queue *queue = hctx->driver_data;
2308
2309 if (!llist_empty(&queue->req_list))
2310 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
2311}
2312
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002313static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
2314 const struct blk_mq_queue_data *bd)
2315{
2316 struct nvme_ns *ns = hctx->queue->queuedata;
2317 struct nvme_tcp_queue *queue = hctx->driver_data;
2318 struct request *rq = bd->rq;
2319 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2320 bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
2321 blk_status_t ret;
2322
2323 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2324 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
2325
2326 ret = nvme_tcp_setup_cmd_pdu(ns, rq);
2327 if (unlikely(ret))
2328 return ret;
2329
2330 blk_mq_start_request(rq);
2331
Sagi Grimberg86f03482020-06-18 17:30:23 -07002332 nvme_tcp_queue_request(req, true, bd->last);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002333
2334 return BLK_STS_OK;
2335}
2336
Sagi Grimberg873946f2018-12-11 23:38:57 -08002337static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
2338{
2339 struct nvme_tcp_ctrl *ctrl = set->driver_data;
Sagi Grimberg64861992019-05-28 22:49:05 -07002340 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
Sagi Grimberg873946f2018-12-11 23:38:57 -08002341
Sagi Grimberg64861992019-05-28 22:49:05 -07002342 if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
Sagi Grimberg873946f2018-12-11 23:38:57 -08002343 /* separate read/write queues */
2344 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002345 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2346 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2347 set->map[HCTX_TYPE_READ].nr_queues =
2348 ctrl->io_queues[HCTX_TYPE_READ];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002349 set->map[HCTX_TYPE_READ].queue_offset =
Sagi Grimberg64861992019-05-28 22:49:05 -07002350 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002351 } else {
Sagi Grimberg64861992019-05-28 22:49:05 -07002352 /* shared read/write queues */
Sagi Grimberg873946f2018-12-11 23:38:57 -08002353 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002354 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2355 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2356 set->map[HCTX_TYPE_READ].nr_queues =
2357 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002358 set->map[HCTX_TYPE_READ].queue_offset = 0;
2359 }
2360 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
2361 blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002362
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002363 if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
2364 /* map dedicated poll queues only if we have queues left */
2365 set->map[HCTX_TYPE_POLL].nr_queues =
2366 ctrl->io_queues[HCTX_TYPE_POLL];
2367 set->map[HCTX_TYPE_POLL].queue_offset =
2368 ctrl->io_queues[HCTX_TYPE_DEFAULT] +
2369 ctrl->io_queues[HCTX_TYPE_READ];
2370 blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
2371 }
2372
Sagi Grimberg64861992019-05-28 22:49:05 -07002373 dev_info(ctrl->ctrl.device,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002374 "mapped %d/%d/%d default/read/poll queues.\n",
Sagi Grimberg64861992019-05-28 22:49:05 -07002375 ctrl->io_queues[HCTX_TYPE_DEFAULT],
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002376 ctrl->io_queues[HCTX_TYPE_READ],
2377 ctrl->io_queues[HCTX_TYPE_POLL]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002378
Sagi Grimberg873946f2018-12-11 23:38:57 -08002379 return 0;
2380}
2381
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002382static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
2383{
2384 struct nvme_tcp_queue *queue = hctx->driver_data;
2385 struct sock *sk = queue->sock->sk;
2386
Sagi Grimbergf86e5bf2020-03-23 16:43:52 -07002387 if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
2388 return 0;
2389
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002390 set_bit(NVME_TCP_Q_POLLING, &queue->flags);
Eric Dumazet3f926af2019-10-23 22:44:51 -07002391 if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002392 sk_busy_loop(sk, true);
2393 nvme_tcp_try_recv(queue);
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002394 clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002395 return queue->nr_cqe;
2396}
2397
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002398static const struct blk_mq_ops nvme_tcp_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002399 .queue_rq = nvme_tcp_queue_rq,
Sagi Grimberg86f03482020-06-18 17:30:23 -07002400 .commit_rqs = nvme_tcp_commit_rqs,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002401 .complete = nvme_complete_rq,
2402 .init_request = nvme_tcp_init_request,
2403 .exit_request = nvme_tcp_exit_request,
2404 .init_hctx = nvme_tcp_init_hctx,
2405 .timeout = nvme_tcp_timeout,
Sagi Grimberg873946f2018-12-11 23:38:57 -08002406 .map_queues = nvme_tcp_map_queues,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002407 .poll = nvme_tcp_poll,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002408};
2409
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002410static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002411 .queue_rq = nvme_tcp_queue_rq,
2412 .complete = nvme_complete_rq,
2413 .init_request = nvme_tcp_init_request,
2414 .exit_request = nvme_tcp_exit_request,
2415 .init_hctx = nvme_tcp_init_admin_hctx,
2416 .timeout = nvme_tcp_timeout,
2417};
2418
2419static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
2420 .name = "tcp",
2421 .module = THIS_MODULE,
2422 .flags = NVME_F_FABRICS,
2423 .reg_read32 = nvmf_reg_read32,
2424 .reg_read64 = nvmf_reg_read64,
2425 .reg_write32 = nvmf_reg_write32,
2426 .free_ctrl = nvme_tcp_free_ctrl,
2427 .submit_async_event = nvme_tcp_submit_async_event,
2428 .delete_ctrl = nvme_tcp_delete_ctrl,
2429 .get_address = nvmf_get_address,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002430};
2431
2432static bool
2433nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
2434{
2435 struct nvme_tcp_ctrl *ctrl;
2436 bool found = false;
2437
2438 mutex_lock(&nvme_tcp_ctrl_mutex);
2439 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) {
2440 found = nvmf_ip_options_match(&ctrl->ctrl, opts);
2441 if (found)
2442 break;
2443 }
2444 mutex_unlock(&nvme_tcp_ctrl_mutex);
2445
2446 return found;
2447}
2448
2449static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
2450 struct nvmf_ctrl_options *opts)
2451{
2452 struct nvme_tcp_ctrl *ctrl;
2453 int ret;
2454
2455 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
2456 if (!ctrl)
2457 return ERR_PTR(-ENOMEM);
2458
2459 INIT_LIST_HEAD(&ctrl->list);
2460 ctrl->ctrl.opts = opts;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002461 ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
2462 opts->nr_poll_queues + 1;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002463 ctrl->ctrl.sqsize = opts->queue_size - 1;
2464 ctrl->ctrl.kato = opts->kato;
2465
2466 INIT_DELAYED_WORK(&ctrl->connect_work,
2467 nvme_tcp_reconnect_ctrl_work);
2468 INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
2469 INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
2470
2471 if (!(opts->mask & NVMF_OPT_TRSVCID)) {
2472 opts->trsvcid =
2473 kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
2474 if (!opts->trsvcid) {
2475 ret = -ENOMEM;
2476 goto out_free_ctrl;
2477 }
2478 opts->mask |= NVMF_OPT_TRSVCID;
2479 }
2480
2481 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2482 opts->traddr, opts->trsvcid, &ctrl->addr);
2483 if (ret) {
2484 pr_err("malformed address passed: %s:%s\n",
2485 opts->traddr, opts->trsvcid);
2486 goto out_free_ctrl;
2487 }
2488
2489 if (opts->mask & NVMF_OPT_HOST_TRADDR) {
2490 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2491 opts->host_traddr, NULL, &ctrl->src_addr);
2492 if (ret) {
2493 pr_err("malformed src address passed: %s\n",
2494 opts->host_traddr);
2495 goto out_free_ctrl;
2496 }
2497 }
2498
2499 if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
2500 ret = -EALREADY;
2501 goto out_free_ctrl;
2502 }
2503
Sagi Grimberg873946f2018-12-11 23:38:57 -08002504 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002505 GFP_KERNEL);
2506 if (!ctrl->queues) {
2507 ret = -ENOMEM;
2508 goto out_free_ctrl;
2509 }
2510
2511 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
2512 if (ret)
2513 goto out_kfree_queues;
2514
2515 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
2516 WARN_ON_ONCE(1);
2517 ret = -EINTR;
2518 goto out_uninit_ctrl;
2519 }
2520
2521 ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
2522 if (ret)
2523 goto out_uninit_ctrl;
2524
2525 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
2526 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2527
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002528 mutex_lock(&nvme_tcp_ctrl_mutex);
2529 list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
2530 mutex_unlock(&nvme_tcp_ctrl_mutex);
2531
2532 return &ctrl->ctrl;
2533
2534out_uninit_ctrl:
2535 nvme_uninit_ctrl(&ctrl->ctrl);
2536 nvme_put_ctrl(&ctrl->ctrl);
2537 if (ret > 0)
2538 ret = -EIO;
2539 return ERR_PTR(ret);
2540out_kfree_queues:
2541 kfree(ctrl->queues);
2542out_free_ctrl:
2543 kfree(ctrl);
2544 return ERR_PTR(ret);
2545}
2546
2547static struct nvmf_transport_ops nvme_tcp_transport = {
2548 .name = "tcp",
2549 .module = THIS_MODULE,
2550 .required_opts = NVMF_OPT_TRADDR,
2551 .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
2552 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
Sagi Grimberg873946f2018-12-11 23:38:57 -08002553 NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
Israel Rukshinbb139852019-08-18 12:08:54 +03002554 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
2555 NVMF_OPT_TOS,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002556 .create_ctrl = nvme_tcp_create_ctrl,
2557};
2558
2559static int __init nvme_tcp_init_module(void)
2560{
2561 nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
2562 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2563 if (!nvme_tcp_wq)
2564 return -ENOMEM;
2565
2566 nvmf_register_transport(&nvme_tcp_transport);
2567 return 0;
2568}
2569
2570static void __exit nvme_tcp_cleanup_module(void)
2571{
2572 struct nvme_tcp_ctrl *ctrl;
2573
2574 nvmf_unregister_transport(&nvme_tcp_transport);
2575
2576 mutex_lock(&nvme_tcp_ctrl_mutex);
2577 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list)
2578 nvme_delete_ctrl(&ctrl->ctrl);
2579 mutex_unlock(&nvme_tcp_ctrl_mutex);
2580 flush_workqueue(nvme_delete_wq);
2581
2582 destroy_workqueue(nvme_tcp_wq);
2583}
2584
2585module_init(nvme_tcp_init_module);
2586module_exit(nvme_tcp_cleanup_module);
2587
2588MODULE_LICENSE("GPL v2");