blob: 979ee31b8dd1c9c961ab27364d94ea972598e4b6 [file] [log] [blame]
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics TCP host.
4 * Copyright (c) 2018 Lightbits Labs. All rights reserved.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/err.h>
11#include <linux/nvme-tcp.h>
12#include <net/sock.h>
13#include <net/tcp.h>
14#include <linux/blk-mq.h>
15#include <crypto/hash.h>
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070016#include <net/busy_poll.h>
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080017
18#include "nvme.h"
19#include "fabrics.h"
20
21struct nvme_tcp_queue;
22
Wunderlich, Mark9912ade2020-01-16 00:46:12 +000023/* Define the socket priority to use for connections were it is desirable
24 * that the NIC consider performing optimized packet processing or filtering.
25 * A non-zero value being sufficient to indicate general consideration of any
26 * possible optimization. Making it a module param allows for alternative
27 * values that may be unique for some NIC implementations.
28 */
29static int so_priority;
30module_param(so_priority, int, 0644);
31MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
32
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080033enum nvme_tcp_send_state {
34 NVME_TCP_SEND_CMD_PDU = 0,
35 NVME_TCP_SEND_H2C_PDU,
36 NVME_TCP_SEND_DATA,
37 NVME_TCP_SEND_DDGST,
38};
39
40struct nvme_tcp_request {
41 struct nvme_request req;
42 void *pdu;
43 struct nvme_tcp_queue *queue;
44 u32 data_len;
45 u32 pdu_len;
46 u32 pdu_sent;
47 u16 ttag;
48 struct list_head entry;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070049 struct llist_node lentry;
Christoph Hellwiga7273d42018-12-13 09:46:59 +010050 __le32 ddgst;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080051
52 struct bio *curr_bio;
53 struct iov_iter iter;
54
55 /* send state */
56 size_t offset;
57 size_t data_sent;
58 enum nvme_tcp_send_state state;
59};
60
61enum nvme_tcp_queue_flags {
62 NVME_TCP_Q_ALLOCATED = 0,
63 NVME_TCP_Q_LIVE = 1,
Sagi Grimberg72e5d752020-05-01 14:25:44 -070064 NVME_TCP_Q_POLLING = 2,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080065};
66
67enum nvme_tcp_recv_state {
68 NVME_TCP_RECV_PDU = 0,
69 NVME_TCP_RECV_DATA,
70 NVME_TCP_RECV_DDGST,
71};
72
73struct nvme_tcp_ctrl;
74struct nvme_tcp_queue {
75 struct socket *sock;
76 struct work_struct io_work;
77 int io_cpu;
78
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -070079 struct mutex send_mutex;
Sagi Grimberg15ec9282020-06-18 17:30:22 -070080 struct llist_head req_list;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080081 struct list_head send_list;
Sagi Grimberg122e5b92020-06-18 17:30:24 -070082 bool more_requests;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080083
84 /* recv state */
85 void *pdu;
86 int pdu_remaining;
87 int pdu_offset;
88 size_t data_remaining;
89 size_t ddgst_remaining;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -070090 unsigned int nr_cqe;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -080091
92 /* send state */
93 struct nvme_tcp_request *request;
94
95 int queue_size;
96 size_t cmnd_capsule_len;
97 struct nvme_tcp_ctrl *ctrl;
98 unsigned long flags;
99 bool rd_enabled;
100
101 bool hdr_digest;
102 bool data_digest;
103 struct ahash_request *rcv_hash;
104 struct ahash_request *snd_hash;
105 __le32 exp_ddgst;
106 __le32 recv_ddgst;
107
108 struct page_frag_cache pf_cache;
109
110 void (*state_change)(struct sock *);
111 void (*data_ready)(struct sock *);
112 void (*write_space)(struct sock *);
113};
114
115struct nvme_tcp_ctrl {
116 /* read only in the hot path */
117 struct nvme_tcp_queue *queues;
118 struct blk_mq_tag_set tag_set;
119
120 /* other member variables */
121 struct list_head list;
122 struct blk_mq_tag_set admin_tag_set;
123 struct sockaddr_storage addr;
124 struct sockaddr_storage src_addr;
125 struct nvme_ctrl ctrl;
126
127 struct work_struct err_work;
128 struct delayed_work connect_work;
129 struct nvme_tcp_request async_req;
Sagi Grimberg64861992019-05-28 22:49:05 -0700130 u32 io_queues[HCTX_MAX_TYPES];
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800131};
132
133static LIST_HEAD(nvme_tcp_ctrl_list);
134static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
135static struct workqueue_struct *nvme_tcp_wq;
Rikard Falkeborn6acbd962020-05-29 00:25:07 +0200136static const struct blk_mq_ops nvme_tcp_mq_ops;
137static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700138static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800139
140static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
141{
142 return container_of(ctrl, struct nvme_tcp_ctrl, ctrl);
143}
144
145static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
146{
147 return queue - queue->ctrl->queues;
148}
149
150static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
151{
152 u32 queue_idx = nvme_tcp_queue_id(queue);
153
154 if (queue_idx == 0)
155 return queue->ctrl->admin_tag_set.tags[queue_idx];
156 return queue->ctrl->tag_set.tags[queue_idx - 1];
157}
158
159static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
160{
161 return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
162}
163
164static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
165{
166 return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
167}
168
169static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
170{
171 return queue->cmnd_capsule_len - sizeof(struct nvme_command);
172}
173
174static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
175{
176 return req == &req->queue->ctrl->async_req;
177}
178
179static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
180{
181 struct request *rq;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800182
183 if (unlikely(nvme_tcp_async_req(req)))
184 return false; /* async events don't have a request */
185
186 rq = blk_mq_rq_from_pdu(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800187
Sagi Grimberg25e5cb72020-03-23 15:06:30 -0700188 return rq_data_dir(rq) == WRITE && req->data_len &&
189 req->data_len <= nvme_tcp_inline_data_size(req->queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800190}
191
192static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
193{
194 return req->iter.bvec->bv_page;
195}
196
197static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
198{
199 return req->iter.bvec->bv_offset + req->iter.iov_offset;
200}
201
202static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
203{
204 return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset,
205 req->pdu_len - req->pdu_sent);
206}
207
208static inline size_t nvme_tcp_req_offset(struct nvme_tcp_request *req)
209{
210 return req->iter.iov_offset;
211}
212
213static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req)
214{
215 return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ?
216 req->pdu_len - req->pdu_sent : 0;
217}
218
219static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
220 int len)
221{
222 return nvme_tcp_pdu_data_left(req) <= len;
223}
224
225static void nvme_tcp_init_iter(struct nvme_tcp_request *req,
226 unsigned int dir)
227{
228 struct request *rq = blk_mq_rq_from_pdu(req);
229 struct bio_vec *vec;
230 unsigned int size;
231 int nsegs;
232 size_t offset;
233
234 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
235 vec = &rq->special_vec;
236 nsegs = 1;
237 size = blk_rq_payload_bytes(rq);
238 offset = 0;
239 } else {
240 struct bio *bio = req->curr_bio;
241
242 vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
243 nsegs = bio_segments(bio);
244 size = bio->bi_iter.bi_size;
245 offset = bio->bi_iter.bi_bvec_done;
246 }
247
248 iov_iter_bvec(&req->iter, dir, vec, nsegs, size);
249 req->iter.iov_offset = offset;
250}
251
252static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
253 int len)
254{
255 req->data_sent += len;
256 req->pdu_sent += len;
257 iov_iter_advance(&req->iter, len);
258 if (!iov_iter_count(&req->iter) &&
259 req->data_sent < req->data_len) {
260 req->curr_bio = req->curr_bio->bi_next;
261 nvme_tcp_init_iter(req, WRITE);
262 }
263}
264
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800265static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
266{
267 int ret;
268
269 /* drain the send queue as much as we can... */
270 do {
271 ret = nvme_tcp_try_send(queue);
272 } while (ret > 0);
273}
274
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700275static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
Sagi Grimberg86f03482020-06-18 17:30:23 -0700276 bool sync, bool last)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800277{
278 struct nvme_tcp_queue *queue = req->queue;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700279 bool empty;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800280
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700281 empty = llist_add(&req->lentry, &queue->req_list) &&
282 list_empty(&queue->send_list) && !queue->request;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800283
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700284 /*
285 * if we're the first on the send_list and we can try to send
286 * directly, otherwise queue io_work. Also, only do that if we
287 * are on the same cpu, so we don't introduce contention.
288 */
289 if (queue->io_cpu == smp_processor_id() &&
290 sync && empty && mutex_trylock(&queue->send_mutex)) {
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700291 queue->more_requests = !last;
Sagi Grimberg5c11f7d2020-12-21 00:03:39 -0800292 nvme_tcp_send_all(queue);
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700293 queue->more_requests = false;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700294 mutex_unlock(&queue->send_mutex);
Sagi Grimberg86f03482020-06-18 17:30:23 -0700295 } else if (last) {
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -0700296 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
297 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800298}
299
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700300static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
301{
302 struct nvme_tcp_request *req;
303 struct llist_node *node;
304
305 for (node = llist_del_all(&queue->req_list); node; node = node->next) {
306 req = llist_entry(node, struct nvme_tcp_request, lentry);
307 list_add(&req->entry, &queue->send_list);
308 }
309}
310
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800311static inline struct nvme_tcp_request *
312nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
313{
314 struct nvme_tcp_request *req;
315
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800316 req = list_first_entry_or_null(&queue->send_list,
317 struct nvme_tcp_request, entry);
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700318 if (!req) {
319 nvme_tcp_process_req_list(queue);
320 req = list_first_entry_or_null(&queue->send_list,
321 struct nvme_tcp_request, entry);
322 if (unlikely(!req))
323 return NULL;
324 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800325
Sagi Grimberg15ec9282020-06-18 17:30:22 -0700326 list_del(&req->entry);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800327 return req;
328}
329
Christoph Hellwiga7273d42018-12-13 09:46:59 +0100330static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
331 __le32 *dgst)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800332{
333 ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
334 crypto_ahash_final(hash);
335}
336
337static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
338 struct page *page, off_t off, size_t len)
339{
340 struct scatterlist sg;
341
342 sg_init_marker(&sg, 1);
343 sg_set_page(&sg, page, len, off);
344 ahash_request_set_crypt(hash, &sg, NULL, len);
345 crypto_ahash_update(hash);
346}
347
348static inline void nvme_tcp_hdgst(struct ahash_request *hash,
349 void *pdu, size_t len)
350{
351 struct scatterlist sg;
352
353 sg_init_one(&sg, pdu, len);
354 ahash_request_set_crypt(hash, &sg, pdu + len, len);
355 crypto_ahash_digest(hash);
356}
357
358static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
359 void *pdu, size_t pdu_len)
360{
361 struct nvme_tcp_hdr *hdr = pdu;
362 __le32 recv_digest;
363 __le32 exp_digest;
364
365 if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
366 dev_err(queue->ctrl->ctrl.device,
367 "queue %d: header digest flag is cleared\n",
368 nvme_tcp_queue_id(queue));
369 return -EPROTO;
370 }
371
372 recv_digest = *(__le32 *)(pdu + hdr->hlen);
373 nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
374 exp_digest = *(__le32 *)(pdu + hdr->hlen);
375 if (recv_digest != exp_digest) {
376 dev_err(queue->ctrl->ctrl.device,
377 "header digest error: recv %#x expected %#x\n",
378 le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
379 return -EIO;
380 }
381
382 return 0;
383}
384
385static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
386{
387 struct nvme_tcp_hdr *hdr = pdu;
388 u8 digest_len = nvme_tcp_hdgst_len(queue);
389 u32 len;
390
391 len = le32_to_cpu(hdr->plen) - hdr->hlen -
392 ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0);
393
394 if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
395 dev_err(queue->ctrl->ctrl.device,
396 "queue %d: data digest flag is cleared\n",
397 nvme_tcp_queue_id(queue));
398 return -EPROTO;
399 }
400 crypto_ahash_init(queue->rcv_hash);
401
402 return 0;
403}
404
405static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
406 struct request *rq, unsigned int hctx_idx)
407{
408 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
409
410 page_frag_free(req->pdu);
411}
412
413static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
414 struct request *rq, unsigned int hctx_idx,
415 unsigned int numa_node)
416{
417 struct nvme_tcp_ctrl *ctrl = set->driver_data;
418 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
419 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
420 struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
421 u8 hdgst = nvme_tcp_hdgst_len(queue);
422
423 req->pdu = page_frag_alloc(&queue->pf_cache,
424 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
425 GFP_KERNEL | __GFP_ZERO);
426 if (!req->pdu)
427 return -ENOMEM;
428
429 req->queue = queue;
430 nvme_req(rq)->ctrl = &ctrl->ctrl;
431
432 return 0;
433}
434
435static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
436 unsigned int hctx_idx)
437{
438 struct nvme_tcp_ctrl *ctrl = data;
439 struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
440
441 hctx->driver_data = queue;
442 return 0;
443}
444
445static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
446 unsigned int hctx_idx)
447{
448 struct nvme_tcp_ctrl *ctrl = data;
449 struct nvme_tcp_queue *queue = &ctrl->queues[0];
450
451 hctx->driver_data = queue;
452 return 0;
453}
454
455static enum nvme_tcp_recv_state
456nvme_tcp_recv_state(struct nvme_tcp_queue *queue)
457{
458 return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU :
459 (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST :
460 NVME_TCP_RECV_DATA;
461}
462
463static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
464{
465 queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
466 nvme_tcp_hdgst_len(queue);
467 queue->pdu_offset = 0;
468 queue->data_remaining = -1;
469 queue->ddgst_remaining = 0;
470}
471
472static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
473{
474 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
475 return;
476
Sagi Grimberg236187c2020-07-28 13:16:36 -0700477 dev_warn(ctrl->device, "starting error recovery\n");
Nigel Kirkland97b25122020-02-10 16:01:45 -0800478 queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800479}
480
481static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
482 struct nvme_completion *cqe)
483{
484 struct request *rq;
485
486 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
487 if (!rq) {
488 dev_err(queue->ctrl->ctrl.device,
489 "queue %d tag 0x%x not found\n",
490 nvme_tcp_queue_id(queue), cqe->command_id);
491 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
492 return -EINVAL;
493 }
494
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200495 if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
Christoph Hellwigff029452020-06-11 08:44:52 +0200496 nvme_complete_rq(rq);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700497 queue->nr_cqe++;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800498
499 return 0;
500}
501
502static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
503 struct nvme_tcp_data_pdu *pdu)
504{
505 struct request *rq;
506
507 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
508 if (!rq) {
509 dev_err(queue->ctrl->ctrl.device,
510 "queue %d tag %#x not found\n",
511 nvme_tcp_queue_id(queue), pdu->command_id);
512 return -ENOENT;
513 }
514
515 if (!blk_rq_payload_bytes(rq)) {
516 dev_err(queue->ctrl->ctrl.device,
517 "queue %d tag %#x unexpected data\n",
518 nvme_tcp_queue_id(queue), rq->tag);
519 return -EIO;
520 }
521
522 queue->data_remaining = le32_to_cpu(pdu->data_length);
523
Sagi Grimberg602d6742019-03-13 18:55:10 +0100524 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS &&
525 unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) {
526 dev_err(queue->ctrl->ctrl.device,
527 "queue %d tag %#x SUCCESS set but not last PDU\n",
528 nvme_tcp_queue_id(queue), rq->tag);
529 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
530 return -EPROTO;
531 }
532
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800533 return 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800534}
535
536static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
537 struct nvme_tcp_rsp_pdu *pdu)
538{
539 struct nvme_completion *cqe = &pdu->cqe;
540 int ret = 0;
541
542 /*
543 * AEN requests are special as they don't time out and can
544 * survive any kind of queue freeze and often don't respond to
545 * aborts. We don't even bother to allocate a struct request
546 * for them but rather special case them here.
547 */
Israel Rukshin58a8df62019-10-13 19:57:31 +0300548 if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
549 cqe->command_id)))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800550 nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
551 &cqe->result);
552 else
553 ret = nvme_tcp_process_nvme_cqe(queue, cqe);
554
555 return ret;
556}
557
558static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
559 struct nvme_tcp_r2t_pdu *pdu)
560{
561 struct nvme_tcp_data_pdu *data = req->pdu;
562 struct nvme_tcp_queue *queue = req->queue;
563 struct request *rq = blk_mq_rq_from_pdu(req);
564 u8 hdgst = nvme_tcp_hdgst_len(queue);
565 u8 ddgst = nvme_tcp_ddgst_len(queue);
566
567 req->pdu_len = le32_to_cpu(pdu->r2t_length);
568 req->pdu_sent = 0;
569
570 if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
571 dev_err(queue->ctrl->ctrl.device,
572 "req %d r2t len %u exceeded data len %u (%zu sent)\n",
573 rq->tag, req->pdu_len, req->data_len,
574 req->data_sent);
575 return -EPROTO;
576 }
577
578 if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
579 dev_err(queue->ctrl->ctrl.device,
580 "req %d unexpected r2t offset %u (expected %zu)\n",
581 rq->tag, le32_to_cpu(pdu->r2t_offset),
582 req->data_sent);
583 return -EPROTO;
584 }
585
586 memset(data, 0, sizeof(*data));
587 data->hdr.type = nvme_tcp_h2c_data;
588 data->hdr.flags = NVME_TCP_F_DATA_LAST;
589 if (queue->hdr_digest)
590 data->hdr.flags |= NVME_TCP_F_HDGST;
591 if (queue->data_digest)
592 data->hdr.flags |= NVME_TCP_F_DDGST;
593 data->hdr.hlen = sizeof(*data);
594 data->hdr.pdo = data->hdr.hlen + hdgst;
595 data->hdr.plen =
596 cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
597 data->ttag = pdu->ttag;
598 data->command_id = rq->tag;
599 data->data_offset = cpu_to_le32(req->data_sent);
600 data->data_length = cpu_to_le32(req->pdu_len);
601 return 0;
602}
603
604static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
605 struct nvme_tcp_r2t_pdu *pdu)
606{
607 struct nvme_tcp_request *req;
608 struct request *rq;
609 int ret;
610
611 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
612 if (!rq) {
613 dev_err(queue->ctrl->ctrl.device,
614 "queue %d tag %#x not found\n",
615 nvme_tcp_queue_id(queue), pdu->command_id);
616 return -ENOENT;
617 }
618 req = blk_mq_rq_to_pdu(rq);
619
620 ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
621 if (unlikely(ret))
622 return ret;
623
624 req->state = NVME_TCP_SEND_H2C_PDU;
625 req->offset = 0;
626
Sagi Grimberg86f03482020-06-18 17:30:23 -0700627 nvme_tcp_queue_request(req, false, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800628
629 return 0;
630}
631
632static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
633 unsigned int *offset, size_t *len)
634{
635 struct nvme_tcp_hdr *hdr;
636 char *pdu = queue->pdu;
637 size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
638 int ret;
639
640 ret = skb_copy_bits(skb, *offset,
641 &pdu[queue->pdu_offset], rcv_len);
642 if (unlikely(ret))
643 return ret;
644
645 queue->pdu_remaining -= rcv_len;
646 queue->pdu_offset += rcv_len;
647 *offset += rcv_len;
648 *len -= rcv_len;
649 if (queue->pdu_remaining)
650 return 0;
651
652 hdr = queue->pdu;
653 if (queue->hdr_digest) {
654 ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
655 if (unlikely(ret))
656 return ret;
657 }
658
659
660 if (queue->data_digest) {
661 ret = nvme_tcp_check_ddgst(queue, queue->pdu);
662 if (unlikely(ret))
663 return ret;
664 }
665
666 switch (hdr->type) {
667 case nvme_tcp_c2h_data:
Sagi Grimberg6be18262019-07-19 12:46:46 -0700668 return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800669 case nvme_tcp_rsp:
670 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700671 return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800672 case nvme_tcp_r2t:
673 nvme_tcp_init_recv_ctx(queue);
Sagi Grimberg6be18262019-07-19 12:46:46 -0700674 return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800675 default:
676 dev_err(queue->ctrl->ctrl.device,
677 "unsupported pdu type (%d)\n", hdr->type);
678 return -EINVAL;
679 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800680}
681
Christoph Hellwig988aef9e2019-03-15 08:41:04 +0100682static inline void nvme_tcp_end_request(struct request *rq, u16 status)
Sagi Grimberg602d6742019-03-13 18:55:10 +0100683{
684 union nvme_result res = {};
685
Christoph Hellwig2eb81a32020-08-18 09:11:29 +0200686 if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
Christoph Hellwigff029452020-06-11 08:44:52 +0200687 nvme_complete_rq(rq);
Sagi Grimberg602d6742019-03-13 18:55:10 +0100688}
689
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800690static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
691 unsigned int *offset, size_t *len)
692{
693 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
694 struct nvme_tcp_request *req;
695 struct request *rq;
696
697 rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
698 if (!rq) {
699 dev_err(queue->ctrl->ctrl.device,
700 "queue %d tag %#x not found\n",
701 nvme_tcp_queue_id(queue), pdu->command_id);
702 return -ENOENT;
703 }
704 req = blk_mq_rq_to_pdu(rq);
705
706 while (true) {
707 int recv_len, ret;
708
709 recv_len = min_t(size_t, *len, queue->data_remaining);
710 if (!recv_len)
711 break;
712
713 if (!iov_iter_count(&req->iter)) {
714 req->curr_bio = req->curr_bio->bi_next;
715
716 /*
717 * If we don`t have any bios it means that controller
718 * sent more data than we requested, hence error
719 */
720 if (!req->curr_bio) {
721 dev_err(queue->ctrl->ctrl.device,
722 "queue %d no space in request %#x",
723 nvme_tcp_queue_id(queue), rq->tag);
724 nvme_tcp_init_recv_ctx(queue);
725 return -EIO;
726 }
727 nvme_tcp_init_iter(req, READ);
728 }
729
730 /* we can read only from what is left in this bio */
731 recv_len = min_t(size_t, recv_len,
732 iov_iter_count(&req->iter));
733
734 if (queue->data_digest)
735 ret = skb_copy_and_hash_datagram_iter(skb, *offset,
736 &req->iter, recv_len, queue->rcv_hash);
737 else
738 ret = skb_copy_datagram_iter(skb, *offset,
739 &req->iter, recv_len);
740 if (ret) {
741 dev_err(queue->ctrl->ctrl.device,
742 "queue %d failed to copy request %#x data",
743 nvme_tcp_queue_id(queue), rq->tag);
744 return ret;
745 }
746
747 *len -= recv_len;
748 *offset += recv_len;
749 queue->data_remaining -= recv_len;
750 }
751
752 if (!queue->data_remaining) {
753 if (queue->data_digest) {
754 nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
755 queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
756 } else {
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700757 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
Sagi Grimberg602d6742019-03-13 18:55:10 +0100758 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700759 queue->nr_cqe++;
760 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800761 nvme_tcp_init_recv_ctx(queue);
762 }
763 }
764
765 return 0;
766}
767
768static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
769 struct sk_buff *skb, unsigned int *offset, size_t *len)
770{
Sagi Grimberg602d6742019-03-13 18:55:10 +0100771 struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800772 char *ddgst = (char *)&queue->recv_ddgst;
773 size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
774 off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
775 int ret;
776
777 ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
778 if (unlikely(ret))
779 return ret;
780
781 queue->ddgst_remaining -= recv_len;
782 *offset += recv_len;
783 *len -= recv_len;
784 if (queue->ddgst_remaining)
785 return 0;
786
787 if (queue->recv_ddgst != queue->exp_ddgst) {
788 dev_err(queue->ctrl->ctrl.device,
789 "data digest error: recv %#x expected %#x\n",
790 le32_to_cpu(queue->recv_ddgst),
791 le32_to_cpu(queue->exp_ddgst));
792 return -EIO;
793 }
794
Sagi Grimberg602d6742019-03-13 18:55:10 +0100795 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
796 struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
797 pdu->command_id);
798
799 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -0700800 queue->nr_cqe++;
Sagi Grimberg602d6742019-03-13 18:55:10 +0100801 }
802
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800803 nvme_tcp_init_recv_ctx(queue);
804 return 0;
805}
806
807static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
808 unsigned int offset, size_t len)
809{
810 struct nvme_tcp_queue *queue = desc->arg.data;
811 size_t consumed = len;
812 int result;
813
814 while (len) {
815 switch (nvme_tcp_recv_state(queue)) {
816 case NVME_TCP_RECV_PDU:
817 result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
818 break;
819 case NVME_TCP_RECV_DATA:
820 result = nvme_tcp_recv_data(queue, skb, &offset, &len);
821 break;
822 case NVME_TCP_RECV_DDGST:
823 result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
824 break;
825 default:
826 result = -EFAULT;
827 }
828 if (result) {
829 dev_err(queue->ctrl->ctrl.device,
830 "receive failed: %d\n", result);
831 queue->rd_enabled = false;
832 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
833 return result;
834 }
835 }
836
837 return consumed;
838}
839
840static void nvme_tcp_data_ready(struct sock *sk)
841{
842 struct nvme_tcp_queue *queue;
843
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700844 read_lock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800845 queue = sk->sk_user_data;
Sagi Grimberg72e5d752020-05-01 14:25:44 -0700846 if (likely(queue && queue->rd_enabled) &&
847 !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800848 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
Sagi Grimberg386e5e62020-04-30 13:59:32 -0700849 read_unlock_bh(&sk->sk_callback_lock);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800850}
851
852static void nvme_tcp_write_space(struct sock *sk)
853{
854 struct nvme_tcp_queue *queue;
855
856 read_lock_bh(&sk->sk_callback_lock);
857 queue = sk->sk_user_data;
858 if (likely(queue && sk_stream_is_writeable(sk))) {
859 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
860 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
861 }
862 read_unlock_bh(&sk->sk_callback_lock);
863}
864
865static void nvme_tcp_state_change(struct sock *sk)
866{
867 struct nvme_tcp_queue *queue;
868
869 read_lock(&sk->sk_callback_lock);
870 queue = sk->sk_user_data;
871 if (!queue)
872 goto done;
873
874 switch (sk->sk_state) {
875 case TCP_CLOSE:
876 case TCP_CLOSE_WAIT:
877 case TCP_LAST_ACK:
878 case TCP_FIN_WAIT1:
879 case TCP_FIN_WAIT2:
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800880 nvme_tcp_error_recovery(&queue->ctrl->ctrl);
881 break;
882 default:
883 dev_info(queue->ctrl->ctrl.device,
884 "queue %d socket state %d\n",
885 nvme_tcp_queue_id(queue), sk->sk_state);
886 }
887
888 queue->state_change(sk);
889done:
890 read_unlock(&sk->sk_callback_lock);
891}
892
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700893static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
894{
895 return !list_empty(&queue->send_list) ||
896 !llist_empty(&queue->req_list) || queue->more_requests;
897}
898
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800899static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
900{
901 queue->request = NULL;
902}
903
904static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
905{
Sagi Grimberg16686012019-08-02 18:17:52 -0700906 nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800907}
908
909static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
910{
911 struct nvme_tcp_queue *queue = req->queue;
912
913 while (true) {
914 struct page *page = nvme_tcp_req_cur_page(req);
915 size_t offset = nvme_tcp_req_cur_offset(req);
916 size_t len = nvme_tcp_req_cur_length(req);
917 bool last = nvme_tcp_pdu_last_send(req, len);
918 int ret, flags = MSG_DONTWAIT;
919
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700920 if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800921 flags |= MSG_EOR;
922 else
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700923 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800924
Coly Li7d4194a2020-10-02 16:27:30 +0800925 if (sendpage_ok(page)) {
926 ret = kernel_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200927 flags);
928 } else {
Coly Li7d4194a2020-10-02 16:27:30 +0800929 ret = sock_no_sendpage(queue->sock, page, offset, len,
Mikhail Skorzhinskii37c15212019-07-08 12:31:29 +0200930 flags);
931 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800932 if (ret <= 0)
933 return ret;
934
935 nvme_tcp_advance_req(req, ret);
936 if (queue->data_digest)
937 nvme_tcp_ddgst_update(queue->snd_hash, page,
938 offset, ret);
939
940 /* fully successful last write*/
941 if (last && ret == len) {
942 if (queue->data_digest) {
943 nvme_tcp_ddgst_final(queue->snd_hash,
944 &req->ddgst);
945 req->state = NVME_TCP_SEND_DDGST;
946 req->offset = 0;
947 } else {
948 nvme_tcp_done_send_req(queue);
949 }
950 return 1;
951 }
952 }
953 return -EAGAIN;
954}
955
956static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
957{
958 struct nvme_tcp_queue *queue = req->queue;
959 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
960 bool inline_data = nvme_tcp_has_inline_data(req);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800961 u8 hdgst = nvme_tcp_hdgst_len(queue);
962 int len = sizeof(*pdu) + hdgst - req->offset;
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700963 int flags = MSG_DONTWAIT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800964 int ret;
965
Sagi Grimberg122e5b92020-06-18 17:30:24 -0700966 if (inline_data || nvme_tcp_queue_more(queue))
Sagi Grimberg5bb052d2020-05-04 22:20:01 -0700967 flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
968 else
969 flags |= MSG_EOR;
970
Sagi Grimberg3f2304f2018-12-03 17:52:17 -0800971 if (queue->hdr_digest && !req->offset)
972 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
973
974 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
975 offset_in_page(pdu) + req->offset, len, flags);
976 if (unlikely(ret <= 0))
977 return ret;
978
979 len -= ret;
980 if (!len) {
981 if (inline_data) {
982 req->state = NVME_TCP_SEND_DATA;
983 if (queue->data_digest)
984 crypto_ahash_init(queue->snd_hash);
985 nvme_tcp_init_iter(req, WRITE);
986 } else {
987 nvme_tcp_done_send_req(queue);
988 }
989 return 1;
990 }
991 req->offset += ret;
992
993 return -EAGAIN;
994}
995
996static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
997{
998 struct nvme_tcp_queue *queue = req->queue;
999 struct nvme_tcp_data_pdu *pdu = req->pdu;
1000 u8 hdgst = nvme_tcp_hdgst_len(queue);
1001 int len = sizeof(*pdu) - req->offset + hdgst;
1002 int ret;
1003
1004 if (queue->hdr_digest && !req->offset)
1005 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
1006
1007 ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
1008 offset_in_page(pdu) + req->offset, len,
Sagi Grimberg5bb052d2020-05-04 22:20:01 -07001009 MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001010 if (unlikely(ret <= 0))
1011 return ret;
1012
1013 len -= ret;
1014 if (!len) {
1015 req->state = NVME_TCP_SEND_DATA;
1016 if (queue->data_digest)
1017 crypto_ahash_init(queue->snd_hash);
1018 if (!req->data_sent)
1019 nvme_tcp_init_iter(req, WRITE);
1020 return 1;
1021 }
1022 req->offset += ret;
1023
1024 return -EAGAIN;
1025}
1026
1027static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
1028{
1029 struct nvme_tcp_queue *queue = req->queue;
1030 int ret;
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001031 struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001032 struct kvec iov = {
1033 .iov_base = &req->ddgst + req->offset,
1034 .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
1035 };
1036
Sagi Grimberg122e5b92020-06-18 17:30:24 -07001037 if (nvme_tcp_queue_more(queue))
1038 msg.msg_flags |= MSG_MORE;
1039 else
1040 msg.msg_flags |= MSG_EOR;
1041
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001042 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1043 if (unlikely(ret <= 0))
1044 return ret;
1045
1046 if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
1047 nvme_tcp_done_send_req(queue);
1048 return 1;
1049 }
1050
1051 req->offset += ret;
1052 return -EAGAIN;
1053}
1054
1055static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
1056{
1057 struct nvme_tcp_request *req;
1058 int ret = 1;
1059
1060 if (!queue->request) {
1061 queue->request = nvme_tcp_fetch_request(queue);
1062 if (!queue->request)
1063 return 0;
1064 }
1065 req = queue->request;
1066
1067 if (req->state == NVME_TCP_SEND_CMD_PDU) {
1068 ret = nvme_tcp_try_send_cmd_pdu(req);
1069 if (ret <= 0)
1070 goto done;
1071 if (!nvme_tcp_has_inline_data(req))
1072 return ret;
1073 }
1074
1075 if (req->state == NVME_TCP_SEND_H2C_PDU) {
1076 ret = nvme_tcp_try_send_data_pdu(req);
1077 if (ret <= 0)
1078 goto done;
1079 }
1080
1081 if (req->state == NVME_TCP_SEND_DATA) {
1082 ret = nvme_tcp_try_send_data(req);
1083 if (ret <= 0)
1084 goto done;
1085 }
1086
1087 if (req->state == NVME_TCP_SEND_DDGST)
1088 ret = nvme_tcp_try_send_ddgst(req);
1089done:
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001090 if (ret == -EAGAIN) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001091 ret = 0;
Sagi Grimberg5ff4e112020-02-25 16:43:23 -08001092 } else if (ret < 0) {
1093 dev_err(queue->ctrl->ctrl.device,
1094 "failed to send request %d\n", ret);
1095 if (ret != -EPIPE && ret != -ECONNRESET)
1096 nvme_tcp_fail_request(queue->request);
1097 nvme_tcp_done_send_req(queue);
1098 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001099 return ret;
1100}
1101
1102static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
1103{
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301104 struct socket *sock = queue->sock;
1105 struct sock *sk = sock->sk;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001106 read_descriptor_t rd_desc;
1107 int consumed;
1108
1109 rd_desc.arg.data = queue;
1110 rd_desc.count = 1;
1111 lock_sock(sk);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001112 queue->nr_cqe = 0;
Potnuri Bharat Teja10407ec2019-07-08 15:22:00 +05301113 consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001114 release_sock(sk);
1115 return consumed;
1116}
1117
1118static void nvme_tcp_io_work(struct work_struct *w)
1119{
1120 struct nvme_tcp_queue *queue =
1121 container_of(w, struct nvme_tcp_queue, io_work);
Wunderlich, Markddef2952019-09-18 23:36:37 +00001122 unsigned long deadline = jiffies + msecs_to_jiffies(1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001123
1124 do {
1125 bool pending = false;
1126 int result;
1127
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001128 if (mutex_trylock(&queue->send_mutex)) {
1129 result = nvme_tcp_try_send(queue);
1130 mutex_unlock(&queue->send_mutex);
1131 if (result > 0)
1132 pending = true;
1133 else if (unlikely(result < 0))
1134 break;
1135 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001136
1137 result = nvme_tcp_try_recv(queue);
1138 if (result > 0)
1139 pending = true;
Sagi Grimberg761ad262020-02-25 16:43:24 -08001140 else if (unlikely(result < 0))
Sagi Grimberg39d06079a2020-03-31 22:44:23 -07001141 return;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001142
1143 if (!pending)
1144 return;
1145
Wunderlich, Markddef2952019-09-18 23:36:37 +00001146 } while (!time_after(jiffies, deadline)); /* quota is exhausted */
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001147
1148 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
1149}
1150
1151static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
1152{
1153 struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
1154
1155 ahash_request_free(queue->rcv_hash);
1156 ahash_request_free(queue->snd_hash);
1157 crypto_free_ahash(tfm);
1158}
1159
1160static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
1161{
1162 struct crypto_ahash *tfm;
1163
1164 tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
1165 if (IS_ERR(tfm))
1166 return PTR_ERR(tfm);
1167
1168 queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1169 if (!queue->snd_hash)
1170 goto free_tfm;
1171 ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
1172
1173 queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
1174 if (!queue->rcv_hash)
1175 goto free_snd_hash;
1176 ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
1177
1178 return 0;
1179free_snd_hash:
1180 ahash_request_free(queue->snd_hash);
1181free_tfm:
1182 crypto_free_ahash(tfm);
1183 return -ENOMEM;
1184}
1185
1186static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
1187{
1188 struct nvme_tcp_request *async = &ctrl->async_req;
1189
1190 page_frag_free(async->pdu);
1191}
1192
1193static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
1194{
1195 struct nvme_tcp_queue *queue = &ctrl->queues[0];
1196 struct nvme_tcp_request *async = &ctrl->async_req;
1197 u8 hdgst = nvme_tcp_hdgst_len(queue);
1198
1199 async->pdu = page_frag_alloc(&queue->pf_cache,
1200 sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
1201 GFP_KERNEL | __GFP_ZERO);
1202 if (!async->pdu)
1203 return -ENOMEM;
1204
1205 async->queue = &ctrl->queues[0];
1206 return 0;
1207}
1208
1209static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
1210{
1211 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1212 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1213
1214 if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
1215 return;
1216
1217 if (queue->hdr_digest || queue->data_digest)
1218 nvme_tcp_free_crypto(queue);
1219
1220 sock_release(queue->sock);
1221 kfree(queue->pdu);
1222}
1223
1224static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
1225{
1226 struct nvme_tcp_icreq_pdu *icreq;
1227 struct nvme_tcp_icresp_pdu *icresp;
1228 struct msghdr msg = {};
1229 struct kvec iov;
1230 bool ctrl_hdgst, ctrl_ddgst;
1231 int ret;
1232
1233 icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
1234 if (!icreq)
1235 return -ENOMEM;
1236
1237 icresp = kzalloc(sizeof(*icresp), GFP_KERNEL);
1238 if (!icresp) {
1239 ret = -ENOMEM;
1240 goto free_icreq;
1241 }
1242
1243 icreq->hdr.type = nvme_tcp_icreq;
1244 icreq->hdr.hlen = sizeof(*icreq);
1245 icreq->hdr.pdo = 0;
1246 icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen);
1247 icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
1248 icreq->maxr2t = 0; /* single inflight r2t supported */
1249 icreq->hpda = 0; /* no alignment constraint */
1250 if (queue->hdr_digest)
1251 icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
1252 if (queue->data_digest)
1253 icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
1254
1255 iov.iov_base = icreq;
1256 iov.iov_len = sizeof(*icreq);
1257 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
1258 if (ret < 0)
1259 goto free_icresp;
1260
1261 memset(&msg, 0, sizeof(msg));
1262 iov.iov_base = icresp;
1263 iov.iov_len = sizeof(*icresp);
1264 ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
1265 iov.iov_len, msg.msg_flags);
1266 if (ret < 0)
1267 goto free_icresp;
1268
1269 ret = -EINVAL;
1270 if (icresp->hdr.type != nvme_tcp_icresp) {
1271 pr_err("queue %d: bad type returned %d\n",
1272 nvme_tcp_queue_id(queue), icresp->hdr.type);
1273 goto free_icresp;
1274 }
1275
1276 if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) {
1277 pr_err("queue %d: bad pdu length returned %d\n",
1278 nvme_tcp_queue_id(queue), icresp->hdr.plen);
1279 goto free_icresp;
1280 }
1281
1282 if (icresp->pfv != NVME_TCP_PFV_1_0) {
1283 pr_err("queue %d: bad pfv returned %d\n",
1284 nvme_tcp_queue_id(queue), icresp->pfv);
1285 goto free_icresp;
1286 }
1287
1288 ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE);
1289 if ((queue->data_digest && !ctrl_ddgst) ||
1290 (!queue->data_digest && ctrl_ddgst)) {
1291 pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n",
1292 nvme_tcp_queue_id(queue),
1293 queue->data_digest ? "enabled" : "disabled",
1294 ctrl_ddgst ? "enabled" : "disabled");
1295 goto free_icresp;
1296 }
1297
1298 ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE);
1299 if ((queue->hdr_digest && !ctrl_hdgst) ||
1300 (!queue->hdr_digest && ctrl_hdgst)) {
1301 pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n",
1302 nvme_tcp_queue_id(queue),
1303 queue->hdr_digest ? "enabled" : "disabled",
1304 ctrl_hdgst ? "enabled" : "disabled");
1305 goto free_icresp;
1306 }
1307
1308 if (icresp->cpda != 0) {
1309 pr_err("queue %d: unsupported cpda returned %d\n",
1310 nvme_tcp_queue_id(queue), icresp->cpda);
1311 goto free_icresp;
1312 }
1313
1314 ret = 0;
1315free_icresp:
1316 kfree(icresp);
1317free_icreq:
1318 kfree(icreq);
1319 return ret;
1320}
1321
Sagi Grimberg40510a62020-02-25 15:53:09 -08001322static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
1323{
1324 return nvme_tcp_queue_id(queue) == 0;
1325}
1326
1327static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
1328{
1329 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1330 int qid = nvme_tcp_queue_id(queue);
1331
1332 return !nvme_tcp_admin_queue(queue) &&
1333 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
1334}
1335
1336static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
1337{
1338 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1339 int qid = nvme_tcp_queue_id(queue);
1340
1341 return !nvme_tcp_admin_queue(queue) &&
1342 !nvme_tcp_default_queue(queue) &&
1343 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1344 ctrl->io_queues[HCTX_TYPE_READ];
1345}
1346
1347static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
1348{
1349 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1350 int qid = nvme_tcp_queue_id(queue);
1351
1352 return !nvme_tcp_admin_queue(queue) &&
1353 !nvme_tcp_default_queue(queue) &&
1354 !nvme_tcp_read_queue(queue) &&
1355 qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1356 ctrl->io_queues[HCTX_TYPE_READ] +
1357 ctrl->io_queues[HCTX_TYPE_POLL];
1358}
1359
1360static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
1361{
1362 struct nvme_tcp_ctrl *ctrl = queue->ctrl;
1363 int qid = nvme_tcp_queue_id(queue);
1364 int n = 0;
1365
1366 if (nvme_tcp_default_queue(queue))
1367 n = qid - 1;
1368 else if (nvme_tcp_read_queue(queue))
1369 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
1370 else if (nvme_tcp_poll_queue(queue))
1371 n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
1372 ctrl->io_queues[HCTX_TYPE_READ] - 1;
1373 queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
1374}
1375
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001376static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
1377 int qid, size_t queue_size)
1378{
1379 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1380 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001381 int ret, rcv_pdu_size;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001382
1383 queue->ctrl = ctrl;
Sagi Grimberg15ec9282020-06-18 17:30:22 -07001384 init_llist_head(&queue->req_list);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001385 INIT_LIST_HEAD(&queue->send_list);
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001386 mutex_init(&queue->send_mutex);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001387 INIT_WORK(&queue->io_work, nvme_tcp_io_work);
1388 queue->queue_size = queue_size;
1389
1390 if (qid > 0)
Israel Rukshin9924b032019-08-18 12:08:53 +03001391 queue->cmnd_capsule_len = nctrl->ioccsz * 16;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001392 else
1393 queue->cmnd_capsule_len = sizeof(struct nvme_command) +
1394 NVME_TCP_ADMIN_CCSZ;
1395
1396 ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
1397 IPPROTO_TCP, &queue->sock);
1398 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001399 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001400 "failed to create socket: %d\n", ret);
1401 return ret;
1402 }
1403
1404 /* Single syn retry */
Christoph Hellwig557eadf2020-05-28 07:12:21 +02001405 tcp_sock_set_syncnt(queue->sock->sk, 1);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001406
1407 /* Set TCP no delay */
Christoph Hellwig12abc5e2020-05-28 07:12:19 +02001408 tcp_sock_set_nodelay(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001409
1410 /*
1411 * Cleanup whatever is sitting in the TCP transmit queue on socket
1412 * close. This is done to prevent stale data from being sent should
1413 * the network connection be restored before TCP times out.
1414 */
Christoph Hellwigc4335942020-05-28 07:12:10 +02001415 sock_no_linger(queue->sock->sk);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001416
Christoph Hellwig6e434962020-05-28 07:12:11 +02001417 if (so_priority > 0)
1418 sock_set_priority(queue->sock->sk, so_priority);
Wunderlich, Mark9912ade2020-01-16 00:46:12 +00001419
Israel Rukshinbb139852019-08-18 12:08:54 +03001420 /* Set socket type of service */
Christoph Hellwig6ebf71b2020-05-28 07:12:26 +02001421 if (nctrl->opts->tos >= 0)
1422 ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
Israel Rukshinbb139852019-08-18 12:08:54 +03001423
Sagi Grimbergadc99fd2020-07-23 16:42:26 -07001424 /* Set 10 seconds timeout for icresp recvmsg */
1425 queue->sock->sk->sk_rcvtimeo = 10 * HZ;
1426
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001427 queue->sock->sk->sk_allocation = GFP_ATOMIC;
Sagi Grimberg40510a62020-02-25 15:53:09 -08001428 nvme_tcp_set_queue_io_cpu(queue);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001429 queue->request = NULL;
1430 queue->data_remaining = 0;
1431 queue->ddgst_remaining = 0;
1432 queue->pdu_remaining = 0;
1433 queue->pdu_offset = 0;
1434 sk_set_memalloc(queue->sock->sk);
1435
Israel Rukshin9924b032019-08-18 12:08:53 +03001436 if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001437 ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
1438 sizeof(ctrl->src_addr));
1439 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001440 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001441 "failed to bind queue %d socket %d\n",
1442 qid, ret);
1443 goto err_sock;
1444 }
1445 }
1446
1447 queue->hdr_digest = nctrl->opts->hdr_digest;
1448 queue->data_digest = nctrl->opts->data_digest;
1449 if (queue->hdr_digest || queue->data_digest) {
1450 ret = nvme_tcp_alloc_crypto(queue);
1451 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001452 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001453 "failed to allocate queue %d crypto\n", qid);
1454 goto err_sock;
1455 }
1456 }
1457
1458 rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
1459 nvme_tcp_hdgst_len(queue);
1460 queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
1461 if (!queue->pdu) {
1462 ret = -ENOMEM;
1463 goto err_crypto;
1464 }
1465
Israel Rukshin9924b032019-08-18 12:08:53 +03001466 dev_dbg(nctrl->device, "connecting queue %d\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001467 nvme_tcp_queue_id(queue));
1468
1469 ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
1470 sizeof(ctrl->addr), 0);
1471 if (ret) {
Israel Rukshin9924b032019-08-18 12:08:53 +03001472 dev_err(nctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001473 "failed to connect socket: %d\n", ret);
1474 goto err_rcv_pdu;
1475 }
1476
1477 ret = nvme_tcp_init_connection(queue);
1478 if (ret)
1479 goto err_init_connect;
1480
1481 queue->rd_enabled = true;
1482 set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
1483 nvme_tcp_init_recv_ctx(queue);
1484
1485 write_lock_bh(&queue->sock->sk->sk_callback_lock);
1486 queue->sock->sk->sk_user_data = queue;
1487 queue->state_change = queue->sock->sk->sk_state_change;
1488 queue->data_ready = queue->sock->sk->sk_data_ready;
1489 queue->write_space = queue->sock->sk->sk_write_space;
1490 queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
1491 queue->sock->sk->sk_state_change = nvme_tcp_state_change;
1492 queue->sock->sk->sk_write_space = nvme_tcp_write_space;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001493#ifdef CONFIG_NET_RX_BUSY_POLL
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001494 queue->sock->sk->sk_ll_usec = 1;
Sebastian Andrzej Siewiorac1c4e12019-10-10 17:34:12 +02001495#endif
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001496 write_unlock_bh(&queue->sock->sk->sk_callback_lock);
1497
1498 return 0;
1499
1500err_init_connect:
1501 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1502err_rcv_pdu:
1503 kfree(queue->pdu);
1504err_crypto:
1505 if (queue->hdr_digest || queue->data_digest)
1506 nvme_tcp_free_crypto(queue);
1507err_sock:
1508 sock_release(queue->sock);
1509 queue->sock = NULL;
1510 return ret;
1511}
1512
1513static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
1514{
1515 struct socket *sock = queue->sock;
1516
1517 write_lock_bh(&sock->sk->sk_callback_lock);
1518 sock->sk->sk_user_data = NULL;
1519 sock->sk->sk_data_ready = queue->data_ready;
1520 sock->sk->sk_state_change = queue->state_change;
1521 sock->sk->sk_write_space = queue->write_space;
1522 write_unlock_bh(&sock->sk->sk_callback_lock);
1523}
1524
1525static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
1526{
1527 kernel_sock_shutdown(queue->sock, SHUT_RDWR);
1528 nvme_tcp_restore_sock_calls(queue);
1529 cancel_work_sync(&queue->io_work);
1530}
1531
1532static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
1533{
1534 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1535 struct nvme_tcp_queue *queue = &ctrl->queues[qid];
1536
1537 if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
1538 return;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001539 __nvme_tcp_stop_queue(queue);
1540}
1541
1542static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
1543{
1544 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1545 int ret;
1546
1547 if (idx)
Sagi Grimberg26c68222018-12-14 11:06:08 -08001548 ret = nvmf_connect_io_queue(nctrl, idx, false);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001549 else
1550 ret = nvmf_connect_admin_queue(nctrl);
1551
1552 if (!ret) {
1553 set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
1554 } else {
Sagi Grimbergf34e2582019-04-29 16:25:48 -07001555 if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
1556 __nvme_tcp_stop_queue(&ctrl->queues[idx]);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001557 dev_err(nctrl->device,
1558 "failed to connect queue: %d ret=%d\n", idx, ret);
1559 }
1560 return ret;
1561}
1562
1563static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
1564 bool admin)
1565{
1566 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1567 struct blk_mq_tag_set *set;
1568 int ret;
1569
1570 if (admin) {
1571 set = &ctrl->admin_tag_set;
1572 memset(set, 0, sizeof(*set));
1573 set->ops = &nvme_tcp_admin_mq_ops;
1574 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
1575 set->reserved_tags = 2; /* connect + keep-alive */
Max Gurtovoy610c8232020-06-16 12:34:24 +03001576 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001577 set->flags = BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001578 set->cmd_size = sizeof(struct nvme_tcp_request);
1579 set->driver_data = ctrl;
1580 set->nr_hw_queues = 1;
Chaitanya Kulkarnidc96f932020-11-09 16:33:45 -08001581 set->timeout = NVME_ADMIN_TIMEOUT;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001582 } else {
1583 set = &ctrl->tag_set;
1584 memset(set, 0, sizeof(*set));
1585 set->ops = &nvme_tcp_mq_ops;
1586 set->queue_depth = nctrl->sqsize + 1;
1587 set->reserved_tags = 1; /* fabric connect */
Max Gurtovoy610c8232020-06-16 12:34:24 +03001588 set->numa_node = nctrl->numa_node;
Sagi Grimbergdb5ad6b2020-05-01 14:25:45 -07001589 set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001590 set->cmd_size = sizeof(struct nvme_tcp_request);
1591 set->driver_data = ctrl;
1592 set->nr_hw_queues = nctrl->queue_count - 1;
1593 set->timeout = NVME_IO_TIMEOUT;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001594 set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001595 }
1596
1597 ret = blk_mq_alloc_tag_set(set);
1598 if (ret)
1599 return ERR_PTR(ret);
1600
1601 return set;
1602}
1603
1604static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
1605{
1606 if (to_tcp_ctrl(ctrl)->async_req.pdu) {
David Milburnceb1e082020-09-02 17:42:53 -05001607 cancel_work_sync(&ctrl->async_event_work);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001608 nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
1609 to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
1610 }
1611
1612 nvme_tcp_free_queue(ctrl, 0);
1613}
1614
1615static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
1616{
1617 int i;
1618
1619 for (i = 1; i < ctrl->queue_count; i++)
1620 nvme_tcp_free_queue(ctrl, i);
1621}
1622
1623static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
1624{
1625 int i;
1626
1627 for (i = 1; i < ctrl->queue_count; i++)
1628 nvme_tcp_stop_queue(ctrl, i);
1629}
1630
1631static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl)
1632{
1633 int i, ret = 0;
1634
1635 for (i = 1; i < ctrl->queue_count; i++) {
1636 ret = nvme_tcp_start_queue(ctrl, i);
1637 if (ret)
1638 goto out_stop_queues;
1639 }
1640
1641 return 0;
1642
1643out_stop_queues:
1644 for (i--; i >= 1; i--)
1645 nvme_tcp_stop_queue(ctrl, i);
1646 return ret;
1647}
1648
1649static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
1650{
1651 int ret;
1652
1653 ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
1654 if (ret)
1655 return ret;
1656
1657 ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
1658 if (ret)
1659 goto out_free_queue;
1660
1661 return 0;
1662
1663out_free_queue:
1664 nvme_tcp_free_queue(ctrl, 0);
1665 return ret;
1666}
1667
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001668static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001669{
1670 int i, ret;
1671
1672 for (i = 1; i < ctrl->queue_count; i++) {
1673 ret = nvme_tcp_alloc_queue(ctrl, i,
1674 ctrl->sqsize + 1);
1675 if (ret)
1676 goto out_free_queues;
1677 }
1678
1679 return 0;
1680
1681out_free_queues:
1682 for (i--; i >= 1; i--)
1683 nvme_tcp_free_queue(ctrl, i);
1684
1685 return ret;
1686}
1687
1688static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
1689{
Sagi Grimberg873946f2018-12-11 23:38:57 -08001690 unsigned int nr_io_queues;
1691
1692 nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
1693 nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001694 nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
Sagi Grimberg873946f2018-12-11 23:38:57 -08001695
1696 return nr_io_queues;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001697}
1698
Sagi Grimberg64861992019-05-28 22:49:05 -07001699static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
1700 unsigned int nr_io_queues)
1701{
1702 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
1703 struct nvmf_ctrl_options *opts = nctrl->opts;
1704
1705 if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
1706 /*
1707 * separate read/write queues
1708 * hand out dedicated default queues only after we have
1709 * sufficient read queues.
1710 */
1711 ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
1712 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
1713 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1714 min(opts->nr_write_queues, nr_io_queues);
1715 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1716 } else {
1717 /*
1718 * shared read/write queues
1719 * either no write queues were requested, or we don't have
1720 * sufficient queue count to have dedicated default queues.
1721 */
1722 ctrl->io_queues[HCTX_TYPE_DEFAULT] =
1723 min(opts->nr_io_queues, nr_io_queues);
1724 nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
1725 }
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07001726
1727 if (opts->nr_poll_queues && nr_io_queues) {
1728 /* map dedicated poll queues only if we have queues left */
1729 ctrl->io_queues[HCTX_TYPE_POLL] =
1730 min(opts->nr_poll_queues, nr_io_queues);
1731 }
Sagi Grimberg64861992019-05-28 22:49:05 -07001732}
1733
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001734static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001735{
1736 unsigned int nr_io_queues;
1737 int ret;
1738
1739 nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
1740 ret = nvme_set_queue_count(ctrl, &nr_io_queues);
1741 if (ret)
1742 return ret;
1743
1744 ctrl->queue_count = nr_io_queues + 1;
1745 if (ctrl->queue_count < 2)
1746 return 0;
1747
1748 dev_info(ctrl->device,
1749 "creating %d I/O queues.\n", nr_io_queues);
1750
Sagi Grimberg64861992019-05-28 22:49:05 -07001751 nvme_tcp_set_io_queues(ctrl, nr_io_queues);
1752
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001753 return __nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001754}
1755
1756static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
1757{
1758 nvme_tcp_stop_io_queues(ctrl);
1759 if (remove) {
Sagi Grimberge85037a2018-12-31 23:58:30 -08001760 blk_cleanup_queue(ctrl->connect_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001761 blk_mq_free_tag_set(ctrl->tagset);
1762 }
1763 nvme_tcp_free_io_queues(ctrl);
1764}
1765
1766static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
1767{
1768 int ret;
1769
Sagi Grimbergefb973b2019-04-24 11:53:19 -07001770 ret = nvme_tcp_alloc_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001771 if (ret)
1772 return ret;
1773
1774 if (new) {
1775 ctrl->tagset = nvme_tcp_alloc_tagset(ctrl, false);
1776 if (IS_ERR(ctrl->tagset)) {
1777 ret = PTR_ERR(ctrl->tagset);
1778 goto out_free_io_queues;
1779 }
1780
Sagi Grimberge85037a2018-12-31 23:58:30 -08001781 ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
1782 if (IS_ERR(ctrl->connect_q)) {
1783 ret = PTR_ERR(ctrl->connect_q);
1784 goto out_free_tag_set;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001785 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001786 }
1787
1788 ret = nvme_tcp_start_io_queues(ctrl);
1789 if (ret)
1790 goto out_cleanup_connect_q;
1791
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001792 if (!new) {
1793 nvme_start_queues(ctrl);
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001794 if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
1795 /*
1796 * If we timed out waiting for freeze we are likely to
1797 * be stuck. Fail the controller initialization just
1798 * to be safe.
1799 */
1800 ret = -ENODEV;
1801 goto out_wait_freeze_timed_out;
1802 }
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001803 blk_mq_update_nr_hw_queues(ctrl->tagset,
1804 ctrl->queue_count - 1);
1805 nvme_unfreeze(ctrl);
1806 }
1807
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001808 return 0;
1809
Sagi Grimberge5c01f42020-07-30 13:25:34 -07001810out_wait_freeze_timed_out:
1811 nvme_stop_queues(ctrl);
1812 nvme_tcp_stop_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001813out_cleanup_connect_q:
Sagi Grimberge85037a2018-12-31 23:58:30 -08001814 if (new)
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001815 blk_cleanup_queue(ctrl->connect_q);
1816out_free_tag_set:
1817 if (new)
1818 blk_mq_free_tag_set(ctrl->tagset);
1819out_free_io_queues:
1820 nvme_tcp_free_io_queues(ctrl);
1821 return ret;
1822}
1823
1824static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
1825{
1826 nvme_tcp_stop_queue(ctrl, 0);
1827 if (remove) {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001828 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001829 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001830 blk_mq_free_tag_set(ctrl->admin_tagset);
1831 }
1832 nvme_tcp_free_admin_queue(ctrl);
1833}
1834
1835static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
1836{
1837 int error;
1838
1839 error = nvme_tcp_alloc_admin_queue(ctrl);
1840 if (error)
1841 return error;
1842
1843 if (new) {
1844 ctrl->admin_tagset = nvme_tcp_alloc_tagset(ctrl, true);
1845 if (IS_ERR(ctrl->admin_tagset)) {
1846 error = PTR_ERR(ctrl->admin_tagset);
1847 goto out_free_queue;
1848 }
1849
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001850 ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset);
1851 if (IS_ERR(ctrl->fabrics_q)) {
1852 error = PTR_ERR(ctrl->fabrics_q);
1853 goto out_free_tagset;
1854 }
1855
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001856 ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset);
1857 if (IS_ERR(ctrl->admin_q)) {
1858 error = PTR_ERR(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001859 goto out_cleanup_fabrics_q;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001860 }
1861 }
1862
1863 error = nvme_tcp_start_queue(ctrl, 0);
1864 if (error)
1865 goto out_cleanup_queue;
1866
Sagi Grimbergc0f2f452019-07-22 17:06:53 -07001867 error = nvme_enable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001868 if (error)
1869 goto out_stop_queue;
1870
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001871 blk_mq_unquiesce_queue(ctrl->admin_q);
1872
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001873 error = nvme_init_identify(ctrl);
1874 if (error)
1875 goto out_stop_queue;
1876
1877 return 0;
1878
1879out_stop_queue:
1880 nvme_tcp_stop_queue(ctrl, 0);
1881out_cleanup_queue:
1882 if (new)
1883 blk_cleanup_queue(ctrl->admin_q);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001884out_cleanup_fabrics_q:
1885 if (new)
1886 blk_cleanup_queue(ctrl->fabrics_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001887out_free_tagset:
1888 if (new)
1889 blk_mq_free_tag_set(ctrl->admin_tagset);
1890out_free_queue:
1891 nvme_tcp_free_admin_queue(ctrl);
1892 return error;
1893}
1894
1895static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
1896 bool remove)
1897{
1898 blk_mq_quiesce_queue(ctrl->admin_q);
Chao Lengd6f66212020-10-22 10:15:15 +08001899 blk_sync_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001900 nvme_tcp_stop_queue(ctrl, 0);
Ming Lei622b8b62019-07-24 11:48:42 +08001901 if (ctrl->admin_tagset) {
Sagi Grimberg7a425892019-04-24 11:53:17 -07001902 blk_mq_tagset_busy_iter(ctrl->admin_tagset,
1903 nvme_cancel_request, ctrl);
Ming Lei622b8b62019-07-24 11:48:42 +08001904 blk_mq_tagset_wait_completed_request(ctrl->admin_tagset);
1905 }
Sagi Grimberge7832cb2019-08-02 19:33:59 -07001906 if (remove)
1907 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001908 nvme_tcp_destroy_admin_queue(ctrl, remove);
1909}
1910
1911static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
1912 bool remove)
1913{
1914 if (ctrl->queue_count <= 1)
Chao Lengd6f66212020-10-22 10:15:15 +08001915 return;
Sagi Grimbergd4d61472020-08-05 18:13:48 -07001916 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg2875b0a2020-07-24 15:10:12 -07001917 nvme_start_freeze(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001918 nvme_stop_queues(ctrl);
Chao Lengd6f66212020-10-22 10:15:15 +08001919 nvme_sync_io_queues(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001920 nvme_tcp_stop_io_queues(ctrl);
Ming Lei622b8b62019-07-24 11:48:42 +08001921 if (ctrl->tagset) {
Sagi Grimberg7a425892019-04-24 11:53:17 -07001922 blk_mq_tagset_busy_iter(ctrl->tagset,
1923 nvme_cancel_request, ctrl);
Ming Lei622b8b62019-07-24 11:48:42 +08001924 blk_mq_tagset_wait_completed_request(ctrl->tagset);
1925 }
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001926 if (remove)
1927 nvme_start_queues(ctrl);
1928 nvme_tcp_destroy_io_queues(ctrl, remove);
1929}
1930
1931static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
1932{
1933 /* If we are resetting/deleting then do nothing */
1934 if (ctrl->state != NVME_CTRL_CONNECTING) {
1935 WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
1936 ctrl->state == NVME_CTRL_LIVE);
1937 return;
1938 }
1939
1940 if (nvmf_should_reconnect(ctrl)) {
1941 dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
1942 ctrl->opts->reconnect_delay);
1943 queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
1944 ctrl->opts->reconnect_delay * HZ);
1945 } else {
1946 dev_info(ctrl->device, "Removing controller...\n");
1947 nvme_delete_ctrl(ctrl);
1948 }
1949}
1950
1951static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
1952{
1953 struct nvmf_ctrl_options *opts = ctrl->opts;
Colin Ian King312910f2019-09-05 15:34:35 +01001954 int ret;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001955
1956 ret = nvme_tcp_configure_admin_queue(ctrl, new);
1957 if (ret)
1958 return ret;
1959
1960 if (ctrl->icdoff) {
1961 dev_err(ctrl->device, "icdoff is not supported!\n");
1962 goto destroy_admin;
1963 }
1964
1965 if (opts->queue_size > ctrl->sqsize + 1)
1966 dev_warn(ctrl->device,
1967 "queue_size %zu > ctrl sqsize %u, clamping down\n",
1968 opts->queue_size, ctrl->sqsize + 1);
1969
1970 if (ctrl->sqsize + 1 > ctrl->maxcmd) {
1971 dev_warn(ctrl->device,
1972 "sqsize %u > ctrl maxcmd %u, clamping down\n",
1973 ctrl->sqsize + 1, ctrl->maxcmd);
1974 ctrl->sqsize = ctrl->maxcmd - 1;
1975 }
1976
1977 if (ctrl->queue_count > 1) {
1978 ret = nvme_tcp_configure_io_queues(ctrl, new);
1979 if (ret)
1980 goto destroy_admin;
1981 }
1982
1983 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001984 /*
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001985 * state change failure is ok if we started ctrl delete,
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001986 * unless we're during creation of a new controller to
1987 * avoid races with teardown flow.
1988 */
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001989 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
1990 ctrl->state != NVME_CTRL_DELETING_NOIO);
Israel Rukshinbea54ef2020-03-24 17:29:45 +02001991 WARN_ON_ONCE(new);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08001992 ret = -EINVAL;
1993 goto destroy_io;
1994 }
1995
1996 nvme_start_ctrl(ctrl);
1997 return 0;
1998
1999destroy_io:
2000 if (ctrl->queue_count > 1)
2001 nvme_tcp_destroy_io_queues(ctrl, new);
2002destroy_admin:
2003 nvme_tcp_stop_queue(ctrl, 0);
2004 nvme_tcp_destroy_admin_queue(ctrl, new);
2005 return ret;
2006}
2007
2008static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
2009{
2010 struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
2011 struct nvme_tcp_ctrl, connect_work);
2012 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2013
2014 ++ctrl->nr_reconnects;
2015
2016 if (nvme_tcp_setup_ctrl(ctrl, false))
2017 goto requeue;
2018
Colin Ian King56a77d22018-12-14 11:42:43 +00002019 dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002020 ctrl->nr_reconnects);
2021
2022 ctrl->nr_reconnects = 0;
2023
2024 return;
2025
2026requeue:
2027 dev_info(ctrl->device, "Failed reconnect attempt %d\n",
2028 ctrl->nr_reconnects);
2029 nvme_tcp_reconnect_or_remove(ctrl);
2030}
2031
2032static void nvme_tcp_error_recovery_work(struct work_struct *work)
2033{
2034 struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
2035 struct nvme_tcp_ctrl, err_work);
2036 struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
2037
2038 nvme_stop_keep_alive(ctrl);
2039 nvme_tcp_teardown_io_queues(ctrl, false);
2040 /* unquiesce to fail fast pending requests */
2041 nvme_start_queues(ctrl);
2042 nvme_tcp_teardown_admin_queue(ctrl, false);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002043 blk_mq_unquiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002044
2045 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002046 /* state change failure is ok if we started ctrl delete */
2047 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2048 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002049 return;
2050 }
2051
2052 nvme_tcp_reconnect_or_remove(ctrl);
2053}
2054
2055static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
2056{
Sagi Grimberg794a4cb2019-01-01 00:19:30 -08002057 cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
2058 cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
2059
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002060 nvme_tcp_teardown_io_queues(ctrl, shutdown);
Sagi Grimberge7832cb2019-08-02 19:33:59 -07002061 blk_mq_quiesce_queue(ctrl->admin_q);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002062 if (shutdown)
2063 nvme_shutdown_ctrl(ctrl);
2064 else
Sagi Grimbergb5b05042019-07-22 17:06:54 -07002065 nvme_disable_ctrl(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002066 nvme_tcp_teardown_admin_queue(ctrl, shutdown);
2067}
2068
2069static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
2070{
2071 nvme_tcp_teardown_ctrl(ctrl, true);
2072}
2073
2074static void nvme_reset_ctrl_work(struct work_struct *work)
2075{
2076 struct nvme_ctrl *ctrl =
2077 container_of(work, struct nvme_ctrl, reset_work);
2078
2079 nvme_stop_ctrl(ctrl);
2080 nvme_tcp_teardown_ctrl(ctrl, false);
2081
2082 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07002083 /* state change failure is ok if we started ctrl delete */
2084 WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2085 ctrl->state != NVME_CTRL_DELETING_NOIO);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002086 return;
2087 }
2088
2089 if (nvme_tcp_setup_ctrl(ctrl, false))
2090 goto out_fail;
2091
2092 return;
2093
2094out_fail:
2095 ++ctrl->nr_reconnects;
2096 nvme_tcp_reconnect_or_remove(ctrl);
2097}
2098
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002099static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
2100{
2101 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
2102
2103 if (list_empty(&ctrl->list))
2104 goto free_ctrl;
2105
2106 mutex_lock(&nvme_tcp_ctrl_mutex);
2107 list_del(&ctrl->list);
2108 mutex_unlock(&nvme_tcp_ctrl_mutex);
2109
2110 nvmf_free_options(nctrl->opts);
2111free_ctrl:
2112 kfree(ctrl->queues);
2113 kfree(ctrl);
2114}
2115
2116static void nvme_tcp_set_sg_null(struct nvme_command *c)
2117{
2118 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2119
2120 sg->addr = 0;
2121 sg->length = 0;
2122 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2123 NVME_SGL_FMT_TRANSPORT_A;
2124}
2125
2126static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue,
2127 struct nvme_command *c, u32 data_len)
2128{
2129 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2130
2131 sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
2132 sg->length = cpu_to_le32(data_len);
2133 sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
2134}
2135
2136static void nvme_tcp_set_sg_host_data(struct nvme_command *c,
2137 u32 data_len)
2138{
2139 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
2140
2141 sg->addr = 0;
2142 sg->length = cpu_to_le32(data_len);
2143 sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
2144 NVME_SGL_FMT_TRANSPORT_A;
2145}
2146
2147static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
2148{
2149 struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg);
2150 struct nvme_tcp_queue *queue = &ctrl->queues[0];
2151 struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu;
2152 struct nvme_command *cmd = &pdu->cmd;
2153 u8 hdgst = nvme_tcp_hdgst_len(queue);
2154
2155 memset(pdu, 0, sizeof(*pdu));
2156 pdu->hdr.type = nvme_tcp_cmd;
2157 if (queue->hdr_digest)
2158 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2159 pdu->hdr.hlen = sizeof(*pdu);
2160 pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
2161
2162 cmd->common.opcode = nvme_admin_async_event;
2163 cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
2164 cmd->common.flags |= NVME_CMD_SGL_METABUF;
2165 nvme_tcp_set_sg_null(cmd);
2166
2167 ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU;
2168 ctrl->async_req.offset = 0;
2169 ctrl->async_req.curr_bio = NULL;
2170 ctrl->async_req.data_len = 0;
2171
Sagi Grimberg86f03482020-06-18 17:30:23 -07002172 nvme_tcp_queue_request(&ctrl->async_req, true, true);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002173}
2174
Sagi Grimberg236187c2020-07-28 13:16:36 -07002175static void nvme_tcp_complete_timed_out(struct request *rq)
2176{
2177 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2178 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
2179
Sagi Grimberg236187c2020-07-28 13:16:36 -07002180 nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
Sagi Grimberg0a8a2c852020-10-22 10:15:31 +08002181 if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
Sagi Grimberg236187c2020-07-28 13:16:36 -07002182 nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
2183 blk_mq_complete_request(rq);
2184 }
Sagi Grimberg236187c2020-07-28 13:16:36 -07002185}
2186
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002187static enum blk_eh_timer_return
2188nvme_tcp_timeout(struct request *rq, bool reserved)
2189{
2190 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
Sagi Grimberg236187c2020-07-28 13:16:36 -07002191 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002192 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2193
Sagi Grimberg236187c2020-07-28 13:16:36 -07002194 dev_warn(ctrl->device,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002195 "queue %d: timeout request %#x type %d\n",
Sagi Grimberg39d57752019-01-08 01:01:30 -08002196 nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002197
Sagi Grimberg236187c2020-07-28 13:16:36 -07002198 if (ctrl->state != NVME_CTRL_LIVE) {
Sagi Grimberg39d57752019-01-08 01:01:30 -08002199 /*
Sagi Grimberg236187c2020-07-28 13:16:36 -07002200 * If we are resetting, connecting or deleting we should
2201 * complete immediately because we may block controller
2202 * teardown or setup sequence
2203 * - ctrl disable/shutdown fabrics requests
2204 * - connect requests
2205 * - initialization admin requests
2206 * - I/O requests that entered after unquiescing and
2207 * the controller stopped responding
2208 *
2209 * All other requests should be cancelled by the error
2210 * recovery work, so it's fine that we fail it here.
Sagi Grimberg39d57752019-01-08 01:01:30 -08002211 */
Sagi Grimberg236187c2020-07-28 13:16:36 -07002212 nvme_tcp_complete_timed_out(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002213 return BLK_EH_DONE;
2214 }
2215
Sagi Grimberg236187c2020-07-28 13:16:36 -07002216 /*
2217 * LIVE state should trigger the normal error recovery which will
2218 * handle completing this request.
2219 */
2220 nvme_tcp_error_recovery(ctrl);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002221 return BLK_EH_RESET_TIMER;
2222}
2223
2224static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
2225 struct request *rq)
2226{
2227 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2228 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2229 struct nvme_command *c = &pdu->cmd;
2230
2231 c->common.flags |= NVME_CMD_SGL_METABUF;
2232
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002233 if (!blk_rq_nr_phys_segments(rq))
2234 nvme_tcp_set_sg_null(c);
2235 else if (rq_data_dir(rq) == WRITE &&
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002236 req->data_len <= nvme_tcp_inline_data_size(queue))
2237 nvme_tcp_set_sg_inline(queue, c, req->data_len);
2238 else
2239 nvme_tcp_set_sg_host_data(c, req->data_len);
2240
2241 return 0;
2242}
2243
2244static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
2245 struct request *rq)
2246{
2247 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2248 struct nvme_tcp_cmd_pdu *pdu = req->pdu;
2249 struct nvme_tcp_queue *queue = req->queue;
2250 u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
2251 blk_status_t ret;
2252
2253 ret = nvme_setup_cmd(ns, rq, &pdu->cmd);
2254 if (ret)
2255 return ret;
2256
2257 req->state = NVME_TCP_SEND_CMD_PDU;
2258 req->offset = 0;
2259 req->data_sent = 0;
2260 req->pdu_len = 0;
2261 req->pdu_sent = 0;
Sagi Grimberg25e5cb72020-03-23 15:06:30 -07002262 req->data_len = blk_rq_nr_phys_segments(rq) ?
2263 blk_rq_payload_bytes(rq) : 0;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002264 req->curr_bio = rq->bio;
2265
2266 if (rq_data_dir(rq) == WRITE &&
2267 req->data_len <= nvme_tcp_inline_data_size(queue))
2268 req->pdu_len = req->data_len;
2269 else if (req->curr_bio)
2270 nvme_tcp_init_iter(req, READ);
2271
2272 pdu->hdr.type = nvme_tcp_cmd;
2273 pdu->hdr.flags = 0;
2274 if (queue->hdr_digest)
2275 pdu->hdr.flags |= NVME_TCP_F_HDGST;
2276 if (queue->data_digest && req->pdu_len) {
2277 pdu->hdr.flags |= NVME_TCP_F_DDGST;
2278 ddgst = nvme_tcp_ddgst_len(queue);
2279 }
2280 pdu->hdr.hlen = sizeof(*pdu);
2281 pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0;
2282 pdu->hdr.plen =
2283 cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst);
2284
2285 ret = nvme_tcp_map_data(queue, rq);
2286 if (unlikely(ret)) {
Max Gurtovoy28a4cac2019-10-13 19:57:38 +03002287 nvme_cleanup_cmd(rq);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002288 dev_err(queue->ctrl->ctrl.device,
2289 "Failed to map data (%d)\n", ret);
2290 return ret;
2291 }
2292
2293 return 0;
2294}
2295
Sagi Grimberg86f03482020-06-18 17:30:23 -07002296static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
2297{
2298 struct nvme_tcp_queue *queue = hctx->driver_data;
2299
2300 if (!llist_empty(&queue->req_list))
2301 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
2302}
2303
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002304static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
2305 const struct blk_mq_queue_data *bd)
2306{
2307 struct nvme_ns *ns = hctx->queue->queuedata;
2308 struct nvme_tcp_queue *queue = hctx->driver_data;
2309 struct request *rq = bd->rq;
2310 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
2311 bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
2312 blk_status_t ret;
2313
2314 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2315 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
2316
2317 ret = nvme_tcp_setup_cmd_pdu(ns, rq);
2318 if (unlikely(ret))
2319 return ret;
2320
2321 blk_mq_start_request(rq);
2322
Sagi Grimberg86f03482020-06-18 17:30:23 -07002323 nvme_tcp_queue_request(req, true, bd->last);
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002324
2325 return BLK_STS_OK;
2326}
2327
Sagi Grimberg873946f2018-12-11 23:38:57 -08002328static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
2329{
2330 struct nvme_tcp_ctrl *ctrl = set->driver_data;
Sagi Grimberg64861992019-05-28 22:49:05 -07002331 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
Sagi Grimberg873946f2018-12-11 23:38:57 -08002332
Sagi Grimberg64861992019-05-28 22:49:05 -07002333 if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
Sagi Grimberg873946f2018-12-11 23:38:57 -08002334 /* separate read/write queues */
2335 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002336 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2337 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2338 set->map[HCTX_TYPE_READ].nr_queues =
2339 ctrl->io_queues[HCTX_TYPE_READ];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002340 set->map[HCTX_TYPE_READ].queue_offset =
Sagi Grimberg64861992019-05-28 22:49:05 -07002341 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002342 } else {
Sagi Grimberg64861992019-05-28 22:49:05 -07002343 /* shared read/write queues */
Sagi Grimberg873946f2018-12-11 23:38:57 -08002344 set->map[HCTX_TYPE_DEFAULT].nr_queues =
Sagi Grimberg64861992019-05-28 22:49:05 -07002345 ctrl->io_queues[HCTX_TYPE_DEFAULT];
2346 set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
2347 set->map[HCTX_TYPE_READ].nr_queues =
2348 ctrl->io_queues[HCTX_TYPE_DEFAULT];
Sagi Grimberg873946f2018-12-11 23:38:57 -08002349 set->map[HCTX_TYPE_READ].queue_offset = 0;
2350 }
2351 blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
2352 blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002353
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002354 if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
2355 /* map dedicated poll queues only if we have queues left */
2356 set->map[HCTX_TYPE_POLL].nr_queues =
2357 ctrl->io_queues[HCTX_TYPE_POLL];
2358 set->map[HCTX_TYPE_POLL].queue_offset =
2359 ctrl->io_queues[HCTX_TYPE_DEFAULT] +
2360 ctrl->io_queues[HCTX_TYPE_READ];
2361 blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
2362 }
2363
Sagi Grimberg64861992019-05-28 22:49:05 -07002364 dev_info(ctrl->ctrl.device,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002365 "mapped %d/%d/%d default/read/poll queues.\n",
Sagi Grimberg64861992019-05-28 22:49:05 -07002366 ctrl->io_queues[HCTX_TYPE_DEFAULT],
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002367 ctrl->io_queues[HCTX_TYPE_READ],
2368 ctrl->io_queues[HCTX_TYPE_POLL]);
Sagi Grimberg64861992019-05-28 22:49:05 -07002369
Sagi Grimberg873946f2018-12-11 23:38:57 -08002370 return 0;
2371}
2372
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002373static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
2374{
2375 struct nvme_tcp_queue *queue = hctx->driver_data;
2376 struct sock *sk = queue->sock->sk;
2377
Sagi Grimbergf86e5bf2020-03-23 16:43:52 -07002378 if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
2379 return 0;
2380
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002381 set_bit(NVME_TCP_Q_POLLING, &queue->flags);
Eric Dumazet3f926af2019-10-23 22:44:51 -07002382 if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002383 sk_busy_loop(sk, true);
2384 nvme_tcp_try_recv(queue);
Sagi Grimberg72e5d752020-05-01 14:25:44 -07002385 clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002386 return queue->nr_cqe;
2387}
2388
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002389static const struct blk_mq_ops nvme_tcp_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002390 .queue_rq = nvme_tcp_queue_rq,
Sagi Grimberg86f03482020-06-18 17:30:23 -07002391 .commit_rqs = nvme_tcp_commit_rqs,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002392 .complete = nvme_complete_rq,
2393 .init_request = nvme_tcp_init_request,
2394 .exit_request = nvme_tcp_exit_request,
2395 .init_hctx = nvme_tcp_init_hctx,
2396 .timeout = nvme_tcp_timeout,
Sagi Grimberg873946f2018-12-11 23:38:57 -08002397 .map_queues = nvme_tcp_map_queues,
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002398 .poll = nvme_tcp_poll,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002399};
2400
Rikard Falkeborn6acbd962020-05-29 00:25:07 +02002401static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002402 .queue_rq = nvme_tcp_queue_rq,
2403 .complete = nvme_complete_rq,
2404 .init_request = nvme_tcp_init_request,
2405 .exit_request = nvme_tcp_exit_request,
2406 .init_hctx = nvme_tcp_init_admin_hctx,
2407 .timeout = nvme_tcp_timeout,
2408};
2409
2410static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
2411 .name = "tcp",
2412 .module = THIS_MODULE,
2413 .flags = NVME_F_FABRICS,
2414 .reg_read32 = nvmf_reg_read32,
2415 .reg_read64 = nvmf_reg_read64,
2416 .reg_write32 = nvmf_reg_write32,
2417 .free_ctrl = nvme_tcp_free_ctrl,
2418 .submit_async_event = nvme_tcp_submit_async_event,
2419 .delete_ctrl = nvme_tcp_delete_ctrl,
2420 .get_address = nvmf_get_address,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002421};
2422
2423static bool
2424nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
2425{
2426 struct nvme_tcp_ctrl *ctrl;
2427 bool found = false;
2428
2429 mutex_lock(&nvme_tcp_ctrl_mutex);
2430 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) {
2431 found = nvmf_ip_options_match(&ctrl->ctrl, opts);
2432 if (found)
2433 break;
2434 }
2435 mutex_unlock(&nvme_tcp_ctrl_mutex);
2436
2437 return found;
2438}
2439
2440static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
2441 struct nvmf_ctrl_options *opts)
2442{
2443 struct nvme_tcp_ctrl *ctrl;
2444 int ret;
2445
2446 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
2447 if (!ctrl)
2448 return ERR_PTR(-ENOMEM);
2449
2450 INIT_LIST_HEAD(&ctrl->list);
2451 ctrl->ctrl.opts = opts;
Sagi Grimberg1a9460c2019-07-03 14:08:04 -07002452 ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
2453 opts->nr_poll_queues + 1;
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002454 ctrl->ctrl.sqsize = opts->queue_size - 1;
2455 ctrl->ctrl.kato = opts->kato;
2456
2457 INIT_DELAYED_WORK(&ctrl->connect_work,
2458 nvme_tcp_reconnect_ctrl_work);
2459 INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
2460 INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
2461
2462 if (!(opts->mask & NVMF_OPT_TRSVCID)) {
2463 opts->trsvcid =
2464 kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
2465 if (!opts->trsvcid) {
2466 ret = -ENOMEM;
2467 goto out_free_ctrl;
2468 }
2469 opts->mask |= NVMF_OPT_TRSVCID;
2470 }
2471
2472 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2473 opts->traddr, opts->trsvcid, &ctrl->addr);
2474 if (ret) {
2475 pr_err("malformed address passed: %s:%s\n",
2476 opts->traddr, opts->trsvcid);
2477 goto out_free_ctrl;
2478 }
2479
2480 if (opts->mask & NVMF_OPT_HOST_TRADDR) {
2481 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2482 opts->host_traddr, NULL, &ctrl->src_addr);
2483 if (ret) {
2484 pr_err("malformed src address passed: %s\n",
2485 opts->host_traddr);
2486 goto out_free_ctrl;
2487 }
2488 }
2489
2490 if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
2491 ret = -EALREADY;
2492 goto out_free_ctrl;
2493 }
2494
Sagi Grimberg873946f2018-12-11 23:38:57 -08002495 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002496 GFP_KERNEL);
2497 if (!ctrl->queues) {
2498 ret = -ENOMEM;
2499 goto out_free_ctrl;
2500 }
2501
2502 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
2503 if (ret)
2504 goto out_kfree_queues;
2505
2506 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
2507 WARN_ON_ONCE(1);
2508 ret = -EINTR;
2509 goto out_uninit_ctrl;
2510 }
2511
2512 ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
2513 if (ret)
2514 goto out_uninit_ctrl;
2515
2516 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
2517 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2518
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002519 mutex_lock(&nvme_tcp_ctrl_mutex);
2520 list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
2521 mutex_unlock(&nvme_tcp_ctrl_mutex);
2522
2523 return &ctrl->ctrl;
2524
2525out_uninit_ctrl:
2526 nvme_uninit_ctrl(&ctrl->ctrl);
2527 nvme_put_ctrl(&ctrl->ctrl);
2528 if (ret > 0)
2529 ret = -EIO;
2530 return ERR_PTR(ret);
2531out_kfree_queues:
2532 kfree(ctrl->queues);
2533out_free_ctrl:
2534 kfree(ctrl);
2535 return ERR_PTR(ret);
2536}
2537
2538static struct nvmf_transport_ops nvme_tcp_transport = {
2539 .name = "tcp",
2540 .module = THIS_MODULE,
2541 .required_opts = NVMF_OPT_TRADDR,
2542 .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
2543 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
Sagi Grimberg873946f2018-12-11 23:38:57 -08002544 NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
Israel Rukshinbb139852019-08-18 12:08:54 +03002545 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
2546 NVMF_OPT_TOS,
Sagi Grimberg3f2304f2018-12-03 17:52:17 -08002547 .create_ctrl = nvme_tcp_create_ctrl,
2548};
2549
2550static int __init nvme_tcp_init_module(void)
2551{
2552 nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
2553 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2554 if (!nvme_tcp_wq)
2555 return -ENOMEM;
2556
2557 nvmf_register_transport(&nvme_tcp_transport);
2558 return 0;
2559}
2560
2561static void __exit nvme_tcp_cleanup_module(void)
2562{
2563 struct nvme_tcp_ctrl *ctrl;
2564
2565 nvmf_unregister_transport(&nvme_tcp_transport);
2566
2567 mutex_lock(&nvme_tcp_ctrl_mutex);
2568 list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list)
2569 nvme_delete_ctrl(&ctrl->ctrl);
2570 mutex_unlock(&nvme_tcp_ctrl_mutex);
2571 flush_workqueue(nvme_delete_wq);
2572
2573 destroy_workqueue(nvme_tcp_wq);
2574}
2575
2576module_init(nvme_tcp_init_module);
2577module_exit(nvme_tcp_cleanup_module);
2578
2579MODULE_LICENSE("GPL v2");