blob: a7fb31441069dd0fe8ce93cef9366a033c41c814 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever0dd39ca2015-03-30 14:33:43 -040053#include <linux/sunrpc/addr.h>
Chuck Lever65866f82014-05-28 10:33:59 -040054#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040055
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040056#include "xprt_rdma.h"
57
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040058/*
59 * Globals/Macros
60 */
61
Jeff Laytonf895b252014-11-17 16:58:04 -050062#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040063# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
Chuck Lever7ff11de2014-11-08 20:15:01 -0500108static const char * const async_event[] = {
109 "CQ error",
110 "QP fatal error",
111 "QP request error",
112 "QP access error",
113 "communication established",
114 "send queue drained",
115 "path migration successful",
116 "path mig error",
117 "device fatal error",
118 "port active",
119 "port error",
120 "LID change",
121 "P_key change",
122 "SM change",
123 "SRQ error",
124 "SRQ limit reached",
125 "last WQE reached",
126 "client reregister",
127 "GID change",
128};
129
130#define ASYNC_MSG(status) \
131 ((status) < ARRAY_SIZE(async_event) ? \
132 async_event[(status)] : "unknown async error")
133
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400134static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500135rpcrdma_schedule_tasklet(struct list_head *sched_list)
136{
137 unsigned long flags;
138
139 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
140 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
141 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
142 tasklet_schedule(&rpcrdma_tasklet_g);
143}
144
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400145static void
146rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
147{
148 struct rpcrdma_ep *ep = context;
149
Chuck Lever7ff11de2014-11-08 20:15:01 -0500150 pr_err("RPC: %s: %s on device %s ep %p\n",
151 __func__, ASYNC_MSG(event->event),
152 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400153 if (ep->rep_connected == 1) {
154 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500155 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 wake_up_all(&ep->rep_connect_wait);
157 }
158}
159
160static void
161rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
162{
163 struct rpcrdma_ep *ep = context;
164
Chuck Lever7ff11de2014-11-08 20:15:01 -0500165 pr_err("RPC: %s: %s on device %s ep %p\n",
166 __func__, ASYNC_MSG(event->event),
167 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400168 if (ep->rep_connected == 1) {
169 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500170 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400171 wake_up_all(&ep->rep_connect_wait);
172 }
173}
174
Chuck Lever85024272015-01-21 11:02:04 -0500175static const char * const wc_status[] = {
176 "success",
177 "local length error",
178 "local QP operation error",
179 "local EE context operation error",
180 "local protection error",
181 "WR flushed",
182 "memory management operation error",
183 "bad response error",
184 "local access error",
185 "remote invalid request error",
186 "remote access error",
187 "remote operation error",
188 "transport retry counter exceeded",
189 "RNR retrycounter exceeded",
190 "local RDD violation error",
191 "remove invalid RD request",
192 "operation aborted",
193 "invalid EE context number",
194 "invalid EE context state",
195 "fatal error",
196 "response timeout error",
197 "general error",
198};
199
200#define COMPLETION_MSG(status) \
201 ((status) < ARRAY_SIZE(wc_status) ? \
202 wc_status[(status)] : "unexpected completion error")
203
Chuck Leverfc664482014-05-28 10:33:25 -0400204static void
205rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400206{
Chuck Lever85024272015-01-21 11:02:04 -0500207 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400208 return;
Chuck Lever85024272015-01-21 11:02:04 -0500209
210 /* WARNING: Only wr_id and status are reliable at this point */
211 if (wc->wr_id == 0ULL) {
212 if (wc->status != IB_WC_WR_FLUSH_ERR)
213 pr_err("RPC: %s: SEND: %s\n",
214 __func__, COMPLETION_MSG(wc->status));
215 } else {
216 struct rpcrdma_mw *r;
217
218 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
219 r->r.frmr.fr_state = FRMR_IS_STALE;
220 pr_err("RPC: %s: frmr %p (stale): %s\n",
221 __func__, r, COMPLETION_MSG(wc->status));
222 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400223}
224
Chuck Leverfc664482014-05-28 10:33:25 -0400225static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400226rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400227{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400229 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400230
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400232 do {
233 wcs = ep->rep_send_wcs;
234
235 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
236 if (rc <= 0)
237 return rc;
238
239 count = rc;
240 while (count-- > 0)
241 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400242 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400243 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400244}
245
246/*
Chuck Leverfc664482014-05-28 10:33:25 -0400247 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400248 *
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Send events are typically suppressed and thus do not result
250 * in an upcall. Occasionally one is signaled, however. This
251 * prevents the provider's completion queue from wrapping and
252 * losing a completion.
253 */
254static void
255rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
256{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400257 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400258 int rc;
259
Chuck Lever1c00dd02014-05-28 10:33:42 -0400260 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400261 if (rc) {
262 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
263 __func__, rc);
264 return;
265 }
266
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400267 rc = ib_req_notify_cq(cq,
268 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
269 if (rc == 0)
270 return;
271 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400272 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
273 __func__, rc);
274 return;
275 }
276
Chuck Lever1c00dd02014-05-28 10:33:42 -0400277 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400278}
279
280static void
Chuck Leverbb961932014-07-29 17:25:46 -0400281rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400282{
283 struct rpcrdma_rep *rep =
284 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
285
Chuck Lever85024272015-01-21 11:02:04 -0500286 /* WARNING: Only wr_id and status are reliable at this point */
287 if (wc->status != IB_WC_SUCCESS)
288 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400289
Chuck Lever85024272015-01-21 11:02:04 -0500290 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400291 if (wc->opcode != IB_WC_RECV)
292 return;
293
Chuck Lever85024272015-01-21 11:02:04 -0500294 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
295 __func__, rep, wc->byte_len);
296
Chuck Leverfc664482014-05-28 10:33:25 -0400297 rep->rr_len = wc->byte_len;
298 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -0500299 rdmab_addr(rep->rr_rdmabuf),
300 rep->rr_len, DMA_FROM_DEVICE);
301 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
Chuck Leverfc664482014-05-28 10:33:25 -0400302
303out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400304 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500305 return;
306out_fail:
307 if (wc->status != IB_WC_WR_FLUSH_ERR)
308 pr_err("RPC: %s: rep %p: %s\n",
309 __func__, rep, COMPLETION_MSG(wc->status));
310 rep->rr_len = ~0U;
311 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400312}
313
314static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400315rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400316{
Chuck Leverbb961932014-07-29 17:25:46 -0400317 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400318 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400319 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400320
Chuck Leverbb961932014-07-29 17:25:46 -0400321 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400322 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400323 do {
324 wcs = ep->rep_recv_wcs;
325
326 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
327 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400328 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400329
330 count = rc;
331 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400332 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400333 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400334 rc = 0;
335
336out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500337 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400338 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400339}
340
341/*
342 * Handle receive completions.
343 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400344 * It is reentrant but processes single events in order to maintain
345 * ordering of receives to keep server credits.
346 *
347 * It is the responsibility of the scheduled tasklet to return
348 * recv buffers to the pool. NOTE: this affects synchronization of
349 * connection shutdown. That is, the structures required for
350 * the completion of the reply handler must remain intact until
351 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400352 */
353static void
Chuck Leverfc664482014-05-28 10:33:25 -0400354rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400355{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400356 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400357 int rc;
358
Chuck Lever1c00dd02014-05-28 10:33:42 -0400359 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400360 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400361 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400362 __func__, rc);
363 return;
364 }
365
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400366 rc = ib_req_notify_cq(cq,
367 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
368 if (rc == 0)
369 return;
370 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400371 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
372 __func__, rc);
373 return;
374 }
375
Chuck Lever1c00dd02014-05-28 10:33:42 -0400376 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377}
378
Chuck Levera7bc2112014-07-29 17:23:52 -0400379static void
380rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
381{
Chuck Lever5c166bef2014-11-08 20:14:45 -0500382 struct ib_wc wc;
383 LIST_HEAD(sched_list);
384
385 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
386 rpcrdma_recvcq_process_wc(&wc, &sched_list);
387 if (!list_empty(&sched_list))
388 rpcrdma_schedule_tasklet(&sched_list);
389 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
390 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400391}
392
Jeff Laytonf895b252014-11-17 16:58:04 -0500393#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400394static const char * const conn[] = {
395 "address resolved",
396 "address error",
397 "route resolved",
398 "route error",
399 "connect request",
400 "connect response",
401 "connect error",
402 "unreachable",
403 "rejected",
404 "established",
405 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400406 "device removal",
407 "multicast join",
408 "multicast error",
409 "address change",
410 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411};
Chuck Lever8079fb72014-07-29 17:26:12 -0400412
413#define CONNECTION_MSG(status) \
414 ((status) < ARRAY_SIZE(conn) ? \
415 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400416#endif
417
418static int
419rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
420{
421 struct rpcrdma_xprt *xprt = id->context;
422 struct rpcrdma_ia *ia = &xprt->rx_ia;
423 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500424#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400425 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800426#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500427 struct ib_qp_attr *attr = &ia->ri_qp_attr;
428 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400429 int connstate = 0;
430
431 switch (event->event) {
432 case RDMA_CM_EVENT_ADDR_RESOLVED:
433 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400434 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400435 complete(&ia->ri_done);
436 break;
437 case RDMA_CM_EVENT_ADDR_ERROR:
438 ia->ri_async_rc = -EHOSTUNREACH;
439 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
440 __func__, ep);
441 complete(&ia->ri_done);
442 break;
443 case RDMA_CM_EVENT_ROUTE_ERROR:
444 ia->ri_async_rc = -ENETUNREACH;
445 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
446 __func__, ep);
447 complete(&ia->ri_done);
448 break;
449 case RDMA_CM_EVENT_ESTABLISHED:
450 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500451 ib_query_qp(ia->ri_id->qp, attr,
452 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
453 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400454 dprintk("RPC: %s: %d responder resources"
455 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500456 __func__, attr->max_dest_rd_atomic,
457 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500477 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400478 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 default:
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400481 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
482 __func__, sap, rpc_get_port(sap), ep,
Chuck Lever8079fb72014-07-29 17:26:12 -0400483 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400484 break;
485 }
486
Jeff Laytonf895b252014-11-17 16:58:04 -0500487#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400488 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500489 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400490 int tird = ep->rep_remote_cma.responder_resources;
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400491
Chuck Levera0ce85f2015-03-30 14:34:21 -0400492 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400493 sap, rpc_get_port(sap),
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400494 ia->ri_id->device->name,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400495 ia->ri_ops->ro_displayname,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400496 xprt->rx_buf.rb_max_requests,
497 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
498 } else if (connstate < 0) {
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400499 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
500 sap, rpc_get_port(sap), connstate);
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400501 }
502#endif
503
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400504 return 0;
505}
506
507static struct rdma_cm_id *
508rpcrdma_create_id(struct rpcrdma_xprt *xprt,
509 struct rpcrdma_ia *ia, struct sockaddr *addr)
510{
511 struct rdma_cm_id *id;
512 int rc;
513
Tom Talpey1a954052008-10-09 15:01:31 -0400514 init_completion(&ia->ri_done);
515
Sean Heftyb26f9b92010-04-01 17:08:41 +0000516 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400517 if (IS_ERR(id)) {
518 rc = PTR_ERR(id);
519 dprintk("RPC: %s: rdma_create_id() failed %i\n",
520 __func__, rc);
521 return id;
522 }
523
Tom Talpey5675add2008-10-09 15:01:41 -0400524 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400525 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
526 if (rc) {
527 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
528 __func__, rc);
529 goto out;
530 }
Tom Talpey5675add2008-10-09 15:01:41 -0400531 wait_for_completion_interruptible_timeout(&ia->ri_done,
532 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400533 rc = ia->ri_async_rc;
534 if (rc)
535 goto out;
536
Tom Talpey5675add2008-10-09 15:01:41 -0400537 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400538 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
539 if (rc) {
540 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
541 __func__, rc);
542 goto out;
543 }
Tom Talpey5675add2008-10-09 15:01:41 -0400544 wait_for_completion_interruptible_timeout(&ia->ri_done,
545 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400546 rc = ia->ri_async_rc;
547 if (rc)
548 goto out;
549
550 return id;
551
552out:
553 rdma_destroy_id(id);
554 return ERR_PTR(rc);
555}
556
557/*
558 * Drain any cq, prior to teardown.
559 */
560static void
561rpcrdma_clean_cq(struct ib_cq *cq)
562{
563 struct ib_wc wc;
564 int count = 0;
565
566 while (1 == ib_poll_cq(cq, 1, &wc))
567 ++count;
568
569 if (count)
570 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
571 __func__, count, wc.opcode);
572}
573
574/*
575 * Exported functions.
576 */
577
578/*
579 * Open and initialize an Interface Adapter.
580 * o initializes fields of struct rpcrdma_ia, including
581 * interface and provider attributes and protection zone.
582 */
583int
584rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
585{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400586 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400587 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500588 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400589
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400590 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
591 if (IS_ERR(ia->ri_id)) {
592 rc = PTR_ERR(ia->ri_id);
593 goto out1;
594 }
595
596 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
597 if (IS_ERR(ia->ri_pd)) {
598 rc = PTR_ERR(ia->ri_pd);
599 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
600 __func__, rc);
601 goto out2;
602 }
603
Chuck Lever7bc79722015-01-21 11:03:27 -0500604 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400605 if (rc) {
606 dprintk("RPC: %s: ib_query_device failed %d\n",
607 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500608 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400609 }
610
Chuck Lever7bc79722015-01-21 11:03:27 -0500611 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400612 ia->ri_have_dma_lkey = 1;
613 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
614 }
615
Chuck Leverf10eafd2014-05-28 10:32:51 -0400616 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400617 /* Requires both frmr reg and local dma lkey */
Chuck Lever41f97022015-03-30 14:34:12 -0400618 if (((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400619 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
Chuck Lever41f97022015-03-30 14:34:12 -0400620 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
621 (devattr->max_fast_reg_page_list_len == 0)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400622 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400623 "not supported by HCA\n", __func__);
624 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400625 } else {
626 /* Mind the ia limit on FRMR page list depth */
627 ia->ri_max_frmr_depth = min_t(unsigned int,
628 RPCRDMA_MAX_DATA_SEGS,
Chuck Lever7bc79722015-01-21 11:03:27 -0500629 devattr->max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400630 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400631 }
632 if (memreg == RPCRDMA_MTHCAFMR) {
633 if (!ia->ri_id->device->alloc_fmr) {
634 dprintk("RPC: %s: MTHCAFMR registration "
635 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400636 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400637 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400638 }
639
640 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400641 * Optionally obtain an underlying physical identity mapping in
642 * order to do a memory window-based bind. This base registration
643 * is protected from remote access - that is enabled only by binding
644 * for the specific bytes targeted during each RPC operation, and
645 * revoked after the corresponding completion similar to a storage
646 * adapter.
647 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400648 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400649 case RPCRDMA_FRMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400650 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400651 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400652 case RPCRDMA_ALLPHYSICAL:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400653 ia->ri_ops = &rpcrdma_physical_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400654 mem_priv = IB_ACCESS_LOCAL_WRITE |
655 IB_ACCESS_REMOTE_WRITE |
656 IB_ACCESS_REMOTE_READ;
657 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400658 case RPCRDMA_MTHCAFMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400659 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400660 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400661 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400662 mem_priv = IB_ACCESS_LOCAL_WRITE;
663 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400664 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
665 if (IS_ERR(ia->ri_bind_mem)) {
666 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400667 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400668 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400669 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500670 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400671 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400672 break;
673 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400674 printk(KERN_ERR "RPC: Unsupported memory "
675 "registration mode: %d\n", memreg);
676 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500677 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400678 }
Chuck Levera0ce85f2015-03-30 14:34:21 -0400679 dprintk("RPC: %s: memory registration strategy is '%s'\n",
680 __func__, ia->ri_ops->ro_displayname);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400681
682 /* Else will do memory reg/dereg for each chunk */
683 ia->ri_memreg_strategy = memreg;
684
Chuck Lever73806c82014-07-29 17:23:25 -0400685 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500687
688out3:
689 ib_dealloc_pd(ia->ri_pd);
690 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400691out2:
692 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400693 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400694out1:
695 return rc;
696}
697
698/*
699 * Clean up/close an IA.
700 * o if event handles and PD have been initialized, free them.
701 * o close the IA
702 */
703void
704rpcrdma_ia_close(struct rpcrdma_ia *ia)
705{
706 int rc;
707
708 dprintk("RPC: %s: entering\n", __func__);
709 if (ia->ri_bind_mem != NULL) {
710 rc = ib_dereg_mr(ia->ri_bind_mem);
711 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
712 __func__, rc);
713 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400714 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
715 if (ia->ri_id->qp)
716 rdma_destroy_qp(ia->ri_id);
717 rdma_destroy_id(ia->ri_id);
718 ia->ri_id = NULL;
719 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400720 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
721 rc = ib_dealloc_pd(ia->ri_pd);
722 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
723 __func__, rc);
724 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400725}
726
727/*
728 * Create unconnected endpoint.
729 */
730int
731rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
732 struct rpcrdma_create_data_internal *cdata)
733{
Chuck Lever7bc79722015-01-21 11:03:27 -0500734 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400735 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400736 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400737
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500739 if (cdata->max_requests > devattr->max_qp_wr)
740 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400741
742 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
743 ep->rep_attr.qp_context = ep;
744 /* send_cq and recv_cq initialized below */
745 ep->rep_attr.srq = NULL;
746 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
747 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400748 case RPCRDMA_FRMR: {
749 int depth = 7;
750
Tom Tucker15cdc6442010-08-11 12:47:24 -0400751 /* Add room for frmr register and invalidate WRs.
752 * 1. FRMR reg WR for head
753 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400754 * 3. N FRMR reg WRs for pagelist
755 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400756 * 5. FRMR reg WR for tail
757 * 6. FRMR invalidate WR for tail
758 * 7. The RDMA_SEND WR
759 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400760
761 /* Calculate N if the device max FRMR depth is smaller than
762 * RPCRDMA_MAX_DATA_SEGS.
763 */
764 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
765 int delta = RPCRDMA_MAX_DATA_SEGS -
766 ia->ri_max_frmr_depth;
767
768 do {
769 depth += 2; /* FRMR reg + invalidate */
770 delta -= ia->ri_max_frmr_depth;
771 } while (delta > 0);
772
773 }
774 ep->rep_attr.cap.max_send_wr *= depth;
Chuck Lever7bc79722015-01-21 11:03:27 -0500775 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
776 cdata->max_requests = devattr->max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400777 if (!cdata->max_requests)
778 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400779 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
780 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400781 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400782 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400783 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400784 default:
785 break;
786 }
787 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
788 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
789 ep->rep_attr.cap.max_recv_sge = 1;
790 ep->rep_attr.cap.max_inline_data = 0;
791 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
792 ep->rep_attr.qp_type = IB_QPT_RC;
793 ep->rep_attr.port_num = ~0;
794
Chuck Leverc05fbb52015-01-21 11:04:33 -0500795 if (cdata->padding) {
796 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
797 GFP_KERNEL);
798 if (IS_ERR(ep->rep_padbuf))
799 return PTR_ERR(ep->rep_padbuf);
800 } else
801 ep->rep_padbuf = NULL;
802
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400803 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
804 "iovs: send %d recv %d\n",
805 __func__,
806 ep->rep_attr.cap.max_send_wr,
807 ep->rep_attr.cap.max_recv_wr,
808 ep->rep_attr.cap.max_send_sge,
809 ep->rep_attr.cap.max_recv_sge);
810
811 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400812 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500813 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
814 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
815 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400816 ep->rep_cqinit = 0;
817 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400818 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400819 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820
Chuck Leverfc664482014-05-28 10:33:25 -0400821 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400822 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400823 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400824 if (IS_ERR(sendcq)) {
825 rc = PTR_ERR(sendcq);
826 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400827 __func__, rc);
828 goto out1;
829 }
830
Chuck Leverfc664482014-05-28 10:33:25 -0400831 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832 if (rc) {
833 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
834 __func__, rc);
835 goto out2;
836 }
837
Chuck Leverfc664482014-05-28 10:33:25 -0400838 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400839 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400840 ep->rep_attr.cap.max_recv_wr + 1, 0);
841 if (IS_ERR(recvcq)) {
842 rc = PTR_ERR(recvcq);
843 dprintk("RPC: %s: failed to create recv CQ: %i\n",
844 __func__, rc);
845 goto out2;
846 }
847
848 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
849 if (rc) {
850 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
851 __func__, rc);
852 ib_destroy_cq(recvcq);
853 goto out2;
854 }
855
856 ep->rep_attr.send_cq = sendcq;
857 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400858
859 /* Initialize cma parameters */
860
861 /* RPC/RDMA does not use private data */
862 ep->rep_remote_cma.private_data = NULL;
863 ep->rep_remote_cma.private_data_len = 0;
864
865 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400866 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500867 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400868 ep->rep_remote_cma.responder_resources = 32;
869 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500870 ep->rep_remote_cma.responder_resources =
871 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400872
873 ep->rep_remote_cma.retry_count = 7;
874 ep->rep_remote_cma.flow_control = 0;
875 ep->rep_remote_cma.rnr_retry_count = 0;
876
877 return 0;
878
879out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400880 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400881 if (err)
882 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
883 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400884out1:
Chuck Leverc05fbb52015-01-21 11:04:33 -0500885 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400886 return rc;
887}
888
889/*
890 * rpcrdma_ep_destroy
891 *
892 * Disconnect and destroy endpoint. After this, the only
893 * valid operations on the ep are to free it (if dynamically
894 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400895 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400896void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
898{
899 int rc;
900
901 dprintk("RPC: %s: entering, connected is %d\n",
902 __func__, ep->rep_connected);
903
Chuck Lever254f91e2014-05-28 10:32:17 -0400904 cancel_delayed_work_sync(&ep->rep_connect_worker);
905
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400906 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400907 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400908 rdma_destroy_qp(ia->ri_id);
909 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400910 }
911
Chuck Leverc05fbb52015-01-21 11:04:33 -0500912 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400913
Chuck Leverfc664482014-05-28 10:33:25 -0400914 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
915 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
916 if (rc)
917 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
918 __func__, rc);
919
920 rpcrdma_clean_cq(ep->rep_attr.send_cq);
921 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400922 if (rc)
923 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
924 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400925}
926
927/*
928 * Connect unconnected endpoint.
929 */
930int
931rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
932{
Chuck Lever73806c82014-07-29 17:23:25 -0400933 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400934 int rc = 0;
935 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400936
Tom Talpeyc0555512008-10-10 11:32:45 -0400937 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400938 struct rpcrdma_xprt *xprt;
939retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400940 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400941
942 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400943 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400944
945 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
Chuck Lever31a701a2015-03-30 14:35:07 -0400946 ia->ri_ops->ro_reset(xprt);
947
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400948 id = rpcrdma_create_id(xprt, ia,
949 (struct sockaddr *)&xprt->rx_data.addr);
950 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400951 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400952 goto out;
953 }
954 /* TEMP TEMP TEMP - fail if new device:
955 * Deregister/remarshal *all* requests!
956 * Close and recreate adapter, pd, etc!
957 * Re-determine all attributes still sane!
958 * More stuff I haven't thought of!
959 * Rrrgh!
960 */
961 if (ia->ri_id->device != id->device) {
962 printk("RPC: %s: can't reconnect on "
963 "different device!\n", __func__);
964 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400965 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400966 goto out;
967 }
968 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400969 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
970 if (rc) {
971 dprintk("RPC: %s: rdma_create_qp failed %i\n",
972 __func__, rc);
973 rdma_destroy_id(id);
974 rc = -ENETUNREACH;
975 goto out;
976 }
Chuck Lever73806c82014-07-29 17:23:25 -0400977
978 write_lock(&ia->ri_qplock);
979 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400980 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400981 write_unlock(&ia->ri_qplock);
982
983 rdma_destroy_qp(old);
984 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400985 } else {
986 dprintk("RPC: %s: connecting...\n", __func__);
987 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
988 if (rc) {
989 dprintk("RPC: %s: rdma_create_qp failed %i\n",
990 __func__, rc);
991 /* do not update ep->rep_connected */
992 return -ENETUNREACH;
993 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400994 }
995
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400996 ep->rep_connected = 0;
997
998 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
999 if (rc) {
1000 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1001 __func__, rc);
1002 goto out;
1003 }
1004
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001005 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1006
1007 /*
1008 * Check state. A non-peer reject indicates no listener
1009 * (ECONNREFUSED), which may be a transient state. All
1010 * others indicate a transport condition which has already
1011 * undergone a best-effort.
1012 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001013 if (ep->rep_connected == -ECONNREFUSED &&
1014 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001015 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1016 goto retry;
1017 }
1018 if (ep->rep_connected <= 0) {
1019 /* Sometimes, the only way to reliably connect to remote
1020 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001021 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1022 (ep->rep_remote_cma.responder_resources == 0 ||
1023 ep->rep_remote_cma.initiator_depth !=
1024 ep->rep_remote_cma.responder_resources)) {
1025 if (ep->rep_remote_cma.responder_resources == 0)
1026 ep->rep_remote_cma.responder_resources = 1;
1027 ep->rep_remote_cma.initiator_depth =
1028 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001029 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001030 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001031 rc = ep->rep_connected;
1032 } else {
1033 dprintk("RPC: %s: connected\n", __func__);
1034 }
1035
1036out:
1037 if (rc)
1038 ep->rep_connected = rc;
1039 return rc;
1040}
1041
1042/*
1043 * rpcrdma_ep_disconnect
1044 *
1045 * This is separate from destroy to facilitate the ability
1046 * to reconnect without recreating the endpoint.
1047 *
1048 * This call is not reentrant, and must not be made in parallel
1049 * on the same endpoint.
1050 */
Chuck Lever282191c2014-07-29 17:25:55 -04001051void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001052rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1053{
1054 int rc;
1055
Chuck Levera7bc2112014-07-29 17:23:52 -04001056 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001057 rc = rdma_disconnect(ia->ri_id);
1058 if (!rc) {
1059 /* returns without wait if not connected */
1060 wait_event_interruptible(ep->rep_connect_wait,
1061 ep->rep_connected != 1);
1062 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1063 (ep->rep_connected == 1) ? "still " : "dis");
1064 } else {
1065 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1066 ep->rep_connected = rc;
1067 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001068}
1069
Chuck Lever13924022015-01-21 11:03:52 -05001070static struct rpcrdma_req *
1071rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1072{
Chuck Lever13924022015-01-21 11:03:52 -05001073 struct rpcrdma_req *req;
Chuck Lever13924022015-01-21 11:03:52 -05001074
Chuck Lever85275c82015-01-21 11:04:16 -05001075 req = kzalloc(sizeof(*req), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001076 if (req == NULL)
Chuck Lever85275c82015-01-21 11:04:16 -05001077 return ERR_PTR(-ENOMEM);
Chuck Lever13924022015-01-21 11:03:52 -05001078
Chuck Lever13924022015-01-21 11:03:52 -05001079 req->rl_buffer = &r_xprt->rx_buf;
1080 return req;
Chuck Lever13924022015-01-21 11:03:52 -05001081}
1082
1083static struct rpcrdma_rep *
1084rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1085{
1086 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever13924022015-01-21 11:03:52 -05001087 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1088 struct rpcrdma_rep *rep;
1089 int rc;
1090
1091 rc = -ENOMEM;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001092 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001093 if (rep == NULL)
1094 goto out;
Chuck Lever13924022015-01-21 11:03:52 -05001095
Chuck Lever6b1184c2015-01-21 11:04:25 -05001096 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1097 GFP_KERNEL);
1098 if (IS_ERR(rep->rr_rdmabuf)) {
1099 rc = PTR_ERR(rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001100 goto out_free;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001101 }
Chuck Lever13924022015-01-21 11:03:52 -05001102
1103 rep->rr_buffer = &r_xprt->rx_buf;
1104 return rep;
1105
1106out_free:
1107 kfree(rep);
1108out:
1109 return ERR_PTR(rc);
1110}
1111
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112int
Chuck Leverac920d02015-01-21 11:03:44 -05001113rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001114{
Chuck Leverac920d02015-01-21 11:03:44 -05001115 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1116 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1117 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001118 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001119 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001120 int i, rc;
1121
1122 buf->rb_max_requests = cdata->max_requests;
1123 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001124
1125 /* Need to allocate:
1126 * 1. arrays for send and recv pointers
1127 * 2. arrays of struct rpcrdma_req to fill in pointers
1128 * 3. array of struct rpcrdma_rep for replies
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001129 * Send/recv buffers in req/rep need to be registered
1130 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001131 len = buf->rb_max_requests *
1132 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001133
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001134 p = kzalloc(len, GFP_KERNEL);
1135 if (p == NULL) {
1136 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1137 __func__, len);
1138 rc = -ENOMEM;
1139 goto out;
1140 }
1141 buf->rb_pool = p; /* for freeing it later */
1142
1143 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1144 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1145 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1146 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1147
Chuck Lever91e70e72015-03-30 14:34:58 -04001148 rc = ia->ri_ops->ro_init(r_xprt);
1149 if (rc)
1150 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001151
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001152 for (i = 0; i < buf->rb_max_requests; i++) {
1153 struct rpcrdma_req *req;
1154 struct rpcrdma_rep *rep;
1155
Chuck Lever13924022015-01-21 11:03:52 -05001156 req = rpcrdma_create_req(r_xprt);
1157 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001158 dprintk("RPC: %s: request buffer %d alloc"
1159 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001160 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001161 goto out;
1162 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001163 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001164
Chuck Lever13924022015-01-21 11:03:52 -05001165 rep = rpcrdma_create_rep(r_xprt);
1166 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001167 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1168 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001169 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001170 goto out;
1171 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001172 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173 }
Chuck Lever13924022015-01-21 11:03:52 -05001174
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001175 return 0;
1176out:
1177 rpcrdma_buffer_destroy(buf);
1178 return rc;
1179}
1180
Chuck Lever2e845222014-07-29 17:25:38 -04001181static void
Chuck Lever13924022015-01-21 11:03:52 -05001182rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1183{
1184 if (!rep)
1185 return;
1186
Chuck Lever6b1184c2015-01-21 11:04:25 -05001187 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001188 kfree(rep);
1189}
1190
1191static void
1192rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1193{
1194 if (!req)
1195 return;
1196
Chuck Lever0ca77dc2015-01-21 11:04:08 -05001197 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
Chuck Lever85275c82015-01-21 11:04:16 -05001198 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001199 kfree(req);
1200}
1201
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001202void
1203rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1204{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001205 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001206 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001207
1208 /* clean up in reverse order from create
1209 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001210 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001211 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001212 */
1213 dprintk("RPC: %s: entering\n", __func__);
1214
1215 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001216 if (buf->rb_recv_bufs)
1217 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1218 if (buf->rb_send_bufs)
1219 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001220 }
1221
Chuck Lever4561f342015-03-30 14:35:17 -04001222 ia->ri_ops->ro_destroy(buf);
Allen Andrews4034ba02014-05-28 10:32:09 -04001223
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001224 kfree(buf->rb_pool);
1225}
1226
Chuck Leverc2922c02014-07-29 17:24:36 -04001227/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1228 * some req segments uninitialized.
1229 */
1230static void
1231rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1232{
1233 if (*mw) {
1234 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1235 *mw = NULL;
1236 }
1237}
1238
1239/* Cycle mw's back in reverse order, and "spin" them.
1240 * This delays and scrambles reuse as much as possible.
1241 */
1242static void
1243rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1244{
1245 struct rpcrdma_mr_seg *seg = req->rl_segments;
1246 struct rpcrdma_mr_seg *seg1 = seg;
1247 int i;
1248
1249 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001250 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1251 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001252}
1253
1254static void
1255rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1256{
1257 buf->rb_send_bufs[--buf->rb_send_index] = req;
1258 req->rl_niovs = 0;
1259 if (req->rl_reply) {
1260 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1261 req->rl_reply->rr_func = NULL;
1262 req->rl_reply = NULL;
1263 }
1264}
1265
Chuck Lever6814bae2015-03-30 14:34:48 -04001266/* rpcrdma_unmap_one() was already done during deregistration.
Chuck Leverddb6beb2014-07-29 17:24:54 -04001267 * Redo only the ib_post_send().
1268 */
1269static void
1270rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1271{
1272 struct rpcrdma_xprt *r_xprt =
1273 container_of(ia, struct rpcrdma_xprt, rx_ia);
1274 struct ib_send_wr invalidate_wr, *bad_wr;
1275 int rc;
1276
1277 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1278
1279 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001280 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001281
1282 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1283 invalidate_wr.wr_id = (unsigned long)(void *)r;
1284 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001285 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1286 DECR_CQCOUNT(&r_xprt->rx_ep);
1287
1288 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1289 __func__, r, r->r.frmr.fr_mr->rkey);
1290
1291 read_lock(&ia->ri_qplock);
1292 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1293 read_unlock(&ia->ri_qplock);
1294 if (rc) {
1295 /* Force rpcrdma_buffer_get() to retry */
1296 r->r.frmr.fr_state = FRMR_IS_STALE;
1297 dprintk("RPC: %s: ib_post_send failed, %i\n",
1298 __func__, rc);
1299 }
1300}
1301
1302static void
1303rpcrdma_retry_flushed_linv(struct list_head *stale,
1304 struct rpcrdma_buffer *buf)
1305{
1306 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1307 struct list_head *pos;
1308 struct rpcrdma_mw *r;
1309 unsigned long flags;
1310
1311 list_for_each(pos, stale) {
1312 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1313 rpcrdma_retry_local_inv(r, ia);
1314 }
1315
1316 spin_lock_irqsave(&buf->rb_lock, flags);
1317 list_splice_tail(stale, &buf->rb_mws);
1318 spin_unlock_irqrestore(&buf->rb_lock, flags);
1319}
1320
Chuck Leverc2922c02014-07-29 17:24:36 -04001321static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001322rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1323 struct list_head *stale)
1324{
1325 struct rpcrdma_mw *r;
1326 int i;
1327
1328 i = RPCRDMA_MAX_SEGS - 1;
1329 while (!list_empty(&buf->rb_mws)) {
1330 r = list_entry(buf->rb_mws.next,
1331 struct rpcrdma_mw, mw_list);
1332 list_del(&r->mw_list);
1333 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1334 list_add(&r->mw_list, stale);
1335 continue;
1336 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001337 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001338 if (unlikely(i-- == 0))
1339 return req; /* Success */
1340 }
1341
1342 /* Not enough entries on rb_mws for this req */
1343 rpcrdma_buffer_put_sendbuf(req, buf);
1344 rpcrdma_buffer_put_mrs(req, buf);
1345 return NULL;
1346}
1347
1348static struct rpcrdma_req *
1349rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001350{
1351 struct rpcrdma_mw *r;
1352 int i;
1353
1354 i = RPCRDMA_MAX_SEGS - 1;
1355 while (!list_empty(&buf->rb_mws)) {
1356 r = list_entry(buf->rb_mws.next,
1357 struct rpcrdma_mw, mw_list);
1358 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001359 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001360 if (unlikely(i-- == 0))
1361 return req; /* Success */
1362 }
1363
1364 /* Not enough entries on rb_mws for this req */
1365 rpcrdma_buffer_put_sendbuf(req, buf);
1366 rpcrdma_buffer_put_mrs(req, buf);
1367 return NULL;
1368}
1369
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001370/*
1371 * Get a set of request/reply buffers.
1372 *
1373 * Reply buffer (if needed) is attached to send buffer upon return.
1374 * Rule:
1375 * rb_send_index and rb_recv_index MUST always be pointing to the
1376 * *next* available buffer (non-NULL). They are incremented after
1377 * removing buffers, and decremented *before* returning them.
1378 */
1379struct rpcrdma_req *
1380rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1381{
Chuck Leverc2922c02014-07-29 17:24:36 -04001382 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001383 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001384 struct rpcrdma_req *req;
1385 unsigned long flags;
1386
1387 spin_lock_irqsave(&buffers->rb_lock, flags);
1388 if (buffers->rb_send_index == buffers->rb_max_requests) {
1389 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1390 dprintk("RPC: %s: out of request buffers\n", __func__);
1391 return ((struct rpcrdma_req *)NULL);
1392 }
1393
1394 req = buffers->rb_send_bufs[buffers->rb_send_index];
1395 if (buffers->rb_send_index < buffers->rb_recv_index) {
1396 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1397 __func__,
1398 buffers->rb_recv_index - buffers->rb_send_index);
1399 req->rl_reply = NULL;
1400 } else {
1401 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1402 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1403 }
1404 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001405
1406 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001407 switch (ia->ri_memreg_strategy) {
1408 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001409 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1410 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001411 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001412 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001413 break;
1414 default:
1415 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001416 }
1417 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001418 if (!list_empty(&stale))
1419 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001420 return req;
1421}
1422
1423/*
1424 * Put request/reply buffers back into pool.
1425 * Pre-decrement counter/array index.
1426 */
1427void
1428rpcrdma_buffer_put(struct rpcrdma_req *req)
1429{
1430 struct rpcrdma_buffer *buffers = req->rl_buffer;
1431 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001432 unsigned long flags;
1433
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001434 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001435 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001436 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001437 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001438 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001439 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001440 break;
1441 default:
1442 break;
1443 }
1444 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1445}
1446
1447/*
1448 * Recover reply buffers from pool.
1449 * This happens when recovering from error conditions.
1450 * Post-increment counter/array index.
1451 */
1452void
1453rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1454{
1455 struct rpcrdma_buffer *buffers = req->rl_buffer;
1456 unsigned long flags;
1457
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001458 spin_lock_irqsave(&buffers->rb_lock, flags);
1459 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1460 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1461 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1462 }
1463 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1464}
1465
1466/*
1467 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001468 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001469 */
1470void
1471rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1472{
1473 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1474 unsigned long flags;
1475
1476 rep->rr_func = NULL;
1477 spin_lock_irqsave(&buffers->rb_lock, flags);
1478 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1479 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1480}
1481
1482/*
1483 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1484 */
1485
Chuck Leverdf515ca2015-01-21 11:04:41 -05001486static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001487rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1488 struct ib_mr **mrp, struct ib_sge *iov)
1489{
1490 struct ib_phys_buf ipb;
1491 struct ib_mr *mr;
1492 int rc;
1493
1494 /*
1495 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1496 */
1497 iov->addr = ib_dma_map_single(ia->ri_id->device,
1498 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001499 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1500 return -ENOMEM;
1501
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001502 iov->length = len;
1503
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001504 if (ia->ri_have_dma_lkey) {
1505 *mrp = NULL;
1506 iov->lkey = ia->ri_dma_lkey;
1507 return 0;
1508 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001509 *mrp = NULL;
1510 iov->lkey = ia->ri_bind_mem->lkey;
1511 return 0;
1512 }
1513
1514 ipb.addr = iov->addr;
1515 ipb.size = iov->length;
1516 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1517 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1518
1519 dprintk("RPC: %s: phys convert: 0x%llx "
1520 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001521 __func__, (unsigned long long)ipb.addr,
1522 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001523
1524 if (IS_ERR(mr)) {
1525 *mrp = NULL;
1526 rc = PTR_ERR(mr);
1527 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1528 } else {
1529 *mrp = mr;
1530 iov->lkey = mr->lkey;
1531 rc = 0;
1532 }
1533
1534 return rc;
1535}
1536
Chuck Leverdf515ca2015-01-21 11:04:41 -05001537static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001538rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1539 struct ib_mr *mr, struct ib_sge *iov)
1540{
1541 int rc;
1542
1543 ib_dma_unmap_single(ia->ri_id->device,
1544 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1545
1546 if (NULL == mr)
1547 return 0;
1548
1549 rc = ib_dereg_mr(mr);
1550 if (rc)
1551 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1552 return rc;
1553}
1554
Chuck Lever9128c3e2015-01-21 11:04:00 -05001555/**
1556 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1557 * @ia: controlling rpcrdma_ia
1558 * @size: size of buffer to be allocated, in bytes
1559 * @flags: GFP flags
1560 *
1561 * Returns pointer to private header of an area of internally
1562 * registered memory, or an ERR_PTR. The registered buffer follows
1563 * the end of the private header.
1564 *
1565 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1566 * receiving the payload of RDMA RECV operations. regbufs are not
1567 * used for RDMA READ/WRITE operations, thus are registered only for
1568 * LOCAL access.
1569 */
1570struct rpcrdma_regbuf *
1571rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1572{
1573 struct rpcrdma_regbuf *rb;
1574 int rc;
1575
1576 rc = -ENOMEM;
1577 rb = kmalloc(sizeof(*rb) + size, flags);
1578 if (rb == NULL)
1579 goto out;
1580
1581 rb->rg_size = size;
1582 rb->rg_owner = NULL;
1583 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1584 &rb->rg_mr, &rb->rg_iov);
1585 if (rc)
1586 goto out_free;
1587
1588 return rb;
1589
1590out_free:
1591 kfree(rb);
1592out:
1593 return ERR_PTR(rc);
1594}
1595
1596/**
1597 * rpcrdma_free_regbuf - deregister and free registered buffer
1598 * @ia: controlling rpcrdma_ia
1599 * @rb: regbuf to be deregistered and freed
1600 */
1601void
1602rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1603{
1604 if (rb) {
1605 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1606 kfree(rb);
1607 }
1608}
1609
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001610/*
1611 * Wrappers for chunk registration, shared by read/write chunk code.
1612 */
1613
Chuck Lever9c1b4d72015-03-30 14:34:39 -04001614void
1615rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001616{
1617 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1618 seg->mr_dmalen = seg->mr_len;
1619 if (seg->mr_page)
1620 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1621 seg->mr_page, offset_in_page(seg->mr_offset),
1622 seg->mr_dmalen, seg->mr_dir);
1623 else
1624 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1625 seg->mr_offset,
1626 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001627 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1628 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1629 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001630 (unsigned long long)seg->mr_dma,
1631 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001632 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001633}
1634
Chuck Lever9c1b4d72015-03-30 14:34:39 -04001635void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001636rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1637{
1638 if (seg->mr_page)
1639 ib_dma_unmap_page(ia->ri_id->device,
1640 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1641 else
1642 ib_dma_unmap_single(ia->ri_id->device,
1643 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1644}
1645
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001646/*
1647 * Prepost any receive buffer, then post send.
1648 *
1649 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1650 */
1651int
1652rpcrdma_ep_post(struct rpcrdma_ia *ia,
1653 struct rpcrdma_ep *ep,
1654 struct rpcrdma_req *req)
1655{
1656 struct ib_send_wr send_wr, *send_wr_fail;
1657 struct rpcrdma_rep *rep = req->rl_reply;
1658 int rc;
1659
1660 if (rep) {
1661 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1662 if (rc)
1663 goto out;
1664 req->rl_reply = NULL;
1665 }
1666
1667 send_wr.next = NULL;
1668 send_wr.wr_id = 0ULL; /* no send cookie */
1669 send_wr.sg_list = req->rl_send_iov;
1670 send_wr.num_sge = req->rl_niovs;
1671 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001672 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1673 ib_dma_sync_single_for_device(ia->ri_id->device,
1674 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1675 DMA_TO_DEVICE);
1676 ib_dma_sync_single_for_device(ia->ri_id->device,
1677 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1678 DMA_TO_DEVICE);
1679 ib_dma_sync_single_for_device(ia->ri_id->device,
1680 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1681 DMA_TO_DEVICE);
1682
1683 if (DECR_CQCOUNT(ep) > 0)
1684 send_wr.send_flags = 0;
1685 else { /* Provider must take a send completion every now and then */
1686 INIT_CQCOUNT(ep);
1687 send_wr.send_flags = IB_SEND_SIGNALED;
1688 }
1689
1690 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1691 if (rc)
1692 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1693 rc);
1694out:
1695 return rc;
1696}
1697
1698/*
1699 * (Re)post a receive buffer.
1700 */
1701int
1702rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1703 struct rpcrdma_ep *ep,
1704 struct rpcrdma_rep *rep)
1705{
1706 struct ib_recv_wr recv_wr, *recv_wr_fail;
1707 int rc;
1708
1709 recv_wr.next = NULL;
1710 recv_wr.wr_id = (u64) (unsigned long) rep;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001711 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001712 recv_wr.num_sge = 1;
1713
1714 ib_dma_sync_single_for_cpu(ia->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -05001715 rdmab_addr(rep->rr_rdmabuf),
1716 rdmab_length(rep->rr_rdmabuf),
1717 DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001718
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001719 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1720
1721 if (rc)
1722 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1723 rc);
1724 return rc;
1725}
Chuck Lever43e95982014-07-29 17:23:34 -04001726
Chuck Lever1c9351e2015-03-30 14:34:30 -04001727/* How many chunk list items fit within our inline buffers?
Chuck Lever43e95982014-07-29 17:23:34 -04001728 */
Chuck Lever1c9351e2015-03-30 14:34:30 -04001729unsigned int
1730rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
Chuck Lever43e95982014-07-29 17:23:34 -04001731{
1732 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever1c9351e2015-03-30 14:34:30 -04001733 int bytes, segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001734
Chuck Lever1c9351e2015-03-30 14:34:30 -04001735 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1736 bytes -= RPCRDMA_HDRLEN_MIN;
1737 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1738 pr_warn("RPC: %s: inline threshold too small\n",
1739 __func__);
1740 return 0;
Chuck Lever43e95982014-07-29 17:23:34 -04001741 }
Chuck Lever1c9351e2015-03-30 14:34:30 -04001742
1743 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1744 dprintk("RPC: %s: max chunk list size = %d segments\n",
1745 __func__, segments);
1746 return segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001747}