blob: 5783c1a55b099325fbaa139f0d53575f8ef7428c [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050065static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040067/*
68 * internal functions
69 */
70
71/*
72 * handle replies in tasklet context, using a single, global list
73 * rdma tasklet function -- just turn around and call the func
74 * for all replies on the list
75 */
76
77static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
78static LIST_HEAD(rpcrdma_tasklets_g);
79
80static void
81rpcrdma_run_tasklet(unsigned long data)
82{
83 struct rpcrdma_rep *rep;
84 void (*func)(struct rpcrdma_rep *);
85 unsigned long flags;
86
87 data = data;
88 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
89 while (!list_empty(&rpcrdma_tasklets_g)) {
90 rep = list_entry(rpcrdma_tasklets_g.next,
91 struct rpcrdma_rep, rr_list);
92 list_del(&rep->rr_list);
93 func = rep->rr_func;
94 rep->rr_func = NULL;
95 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
96
97 if (func)
98 func(rep);
99 else
100 rpcrdma_recv_buffer_put(rep);
101
102 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
103 }
104 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
105}
106
107static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
108
Chuck Lever7ff11de2014-11-08 20:15:01 -0500109static const char * const async_event[] = {
110 "CQ error",
111 "QP fatal error",
112 "QP request error",
113 "QP access error",
114 "communication established",
115 "send queue drained",
116 "path migration successful",
117 "path mig error",
118 "device fatal error",
119 "port active",
120 "port error",
121 "LID change",
122 "P_key change",
123 "SM change",
124 "SRQ error",
125 "SRQ limit reached",
126 "last WQE reached",
127 "client reregister",
128 "GID change",
129};
130
131#define ASYNC_MSG(status) \
132 ((status) < ARRAY_SIZE(async_event) ? \
133 async_event[(status)] : "unknown async error")
134
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400135static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500136rpcrdma_schedule_tasklet(struct list_head *sched_list)
137{
138 unsigned long flags;
139
140 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
141 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
142 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
143 tasklet_schedule(&rpcrdma_tasklet_g);
144}
145
146static void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
148{
149 struct rpcrdma_ep *ep = context;
150
Chuck Lever7ff11de2014-11-08 20:15:01 -0500151 pr_err("RPC: %s: %s on device %s ep %p\n",
152 __func__, ASYNC_MSG(event->event),
153 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154 if (ep->rep_connected == 1) {
155 ep->rep_connected = -EIO;
156 ep->rep_func(ep);
157 wake_up_all(&ep->rep_connect_wait);
158 }
159}
160
161static void
162rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
163{
164 struct rpcrdma_ep *ep = context;
165
Chuck Lever7ff11de2014-11-08 20:15:01 -0500166 pr_err("RPC: %s: %s on device %s ep %p\n",
167 __func__, ASYNC_MSG(event->event),
168 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400169 if (ep->rep_connected == 1) {
170 ep->rep_connected = -EIO;
171 ep->rep_func(ep);
172 wake_up_all(&ep->rep_connect_wait);
173 }
174}
175
Chuck Leverfc664482014-05-28 10:33:25 -0400176static void
177rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400178{
Chuck Leverfc664482014-05-28 10:33:25 -0400179 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400180
Chuck Leverfc664482014-05-28 10:33:25 -0400181 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
182 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400183
Chuck Leverfc664482014-05-28 10:33:25 -0400184 if (wc->wr_id == 0ULL)
185 return;
Chuck Leverdab7e3b2014-07-29 17:25:20 -0400186 if (wc->status != IB_WC_SUCCESS)
Chuck Lever9f9d8022014-07-29 17:24:45 -0400187 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400188}
189
Chuck Leverfc664482014-05-28 10:33:25 -0400190static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400191rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400192{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400193 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400194 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400195
Chuck Lever8301a2c2014-05-28 10:33:51 -0400196 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400197 do {
198 wcs = ep->rep_send_wcs;
199
200 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
201 if (rc <= 0)
202 return rc;
203
204 count = rc;
205 while (count-- > 0)
206 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400207 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400208 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400209}
210
211/*
Chuck Leverfc664482014-05-28 10:33:25 -0400212 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400213 *
Chuck Leverfc664482014-05-28 10:33:25 -0400214 * Send events are typically suppressed and thus do not result
215 * in an upcall. Occasionally one is signaled, however. This
216 * prevents the provider's completion queue from wrapping and
217 * losing a completion.
218 */
219static void
220rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
221{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400222 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400223 int rc;
224
Chuck Lever1c00dd02014-05-28 10:33:42 -0400225 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400226 if (rc) {
227 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
228 __func__, rc);
229 return;
230 }
231
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400232 rc = ib_req_notify_cq(cq,
233 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
234 if (rc == 0)
235 return;
236 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400237 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
238 __func__, rc);
239 return;
240 }
241
Chuck Lever1c00dd02014-05-28 10:33:42 -0400242 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400243}
244
245static void
Chuck Leverbb961932014-07-29 17:25:46 -0400246rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400247{
248 struct rpcrdma_rep *rep =
249 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
250
251 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
252 __func__, rep, wc->status, wc->opcode, wc->byte_len);
253
254 if (wc->status != IB_WC_SUCCESS) {
255 rep->rr_len = ~0U;
256 goto out_schedule;
257 }
258 if (wc->opcode != IB_WC_RECV)
259 return;
260
261 rep->rr_len = wc->byte_len;
262 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
263 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
264
265 if (rep->rr_len >= 16) {
266 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
267 unsigned int credits = ntohl(p->rm_credit);
268
269 if (credits == 0)
270 credits = 1; /* don't deadlock */
271 else if (credits > rep->rr_buffer->rb_max_requests)
272 credits = rep->rr_buffer->rb_max_requests;
273 atomic_set(&rep->rr_buffer->rb_credits, credits);
274 }
275
276out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400277 list_add_tail(&rep->rr_list, sched_list);
Chuck Leverfc664482014-05-28 10:33:25 -0400278}
279
280static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400281rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400282{
Chuck Leverbb961932014-07-29 17:25:46 -0400283 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400284 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400285 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400286
Chuck Leverbb961932014-07-29 17:25:46 -0400287 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400288 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400289 do {
290 wcs = ep->rep_recv_wcs;
291
292 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
293 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400294 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400295
296 count = rc;
297 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400298 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400299 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400300 rc = 0;
301
302out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500303 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400304 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400305}
306
307/*
308 * Handle receive completions.
309 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400310 * It is reentrant but processes single events in order to maintain
311 * ordering of receives to keep server credits.
312 *
313 * It is the responsibility of the scheduled tasklet to return
314 * recv buffers to the pool. NOTE: this affects synchronization of
315 * connection shutdown. That is, the structures required for
316 * the completion of the reply handler must remain intact until
317 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400318 */
319static void
Chuck Leverfc664482014-05-28 10:33:25 -0400320rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400321{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400322 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400323 int rc;
324
Chuck Lever1c00dd02014-05-28 10:33:42 -0400325 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400326 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400327 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400328 __func__, rc);
329 return;
330 }
331
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400332 rc = ib_req_notify_cq(cq,
333 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
334 if (rc == 0)
335 return;
336 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400337 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
338 __func__, rc);
339 return;
340 }
341
Chuck Lever1c00dd02014-05-28 10:33:42 -0400342 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400343}
344
Chuck Levera7bc2112014-07-29 17:23:52 -0400345static void
346rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
347{
Chuck Lever5c166bef2014-11-08 20:14:45 -0500348 struct ib_wc wc;
349 LIST_HEAD(sched_list);
350
351 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
352 rpcrdma_recvcq_process_wc(&wc, &sched_list);
353 if (!list_empty(&sched_list))
354 rpcrdma_schedule_tasklet(&sched_list);
355 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
356 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400357}
358
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400359#ifdef RPC_DEBUG
360static const char * const conn[] = {
361 "address resolved",
362 "address error",
363 "route resolved",
364 "route error",
365 "connect request",
366 "connect response",
367 "connect error",
368 "unreachable",
369 "rejected",
370 "established",
371 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400372 "device removal",
373 "multicast join",
374 "multicast error",
375 "address change",
376 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377};
Chuck Lever8079fb72014-07-29 17:26:12 -0400378
379#define CONNECTION_MSG(status) \
380 ((status) < ARRAY_SIZE(conn) ? \
381 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400382#endif
383
384static int
385rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
386{
387 struct rpcrdma_xprt *xprt = id->context;
388 struct rpcrdma_ia *ia = &xprt->rx_ia;
389 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800390#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400391 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800392#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400393 struct ib_qp_attr attr;
394 struct ib_qp_init_attr iattr;
395 int connstate = 0;
396
397 switch (event->event) {
398 case RDMA_CM_EVENT_ADDR_RESOLVED:
399 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400400 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400401 complete(&ia->ri_done);
402 break;
403 case RDMA_CM_EVENT_ADDR_ERROR:
404 ia->ri_async_rc = -EHOSTUNREACH;
405 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
406 __func__, ep);
407 complete(&ia->ri_done);
408 break;
409 case RDMA_CM_EVENT_ROUTE_ERROR:
410 ia->ri_async_rc = -ENETUNREACH;
411 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
412 __func__, ep);
413 complete(&ia->ri_done);
414 break;
415 case RDMA_CM_EVENT_ESTABLISHED:
416 connstate = 1;
417 ib_query_qp(ia->ri_id->qp, &attr,
418 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
419 &iattr);
420 dprintk("RPC: %s: %d responder resources"
421 " (%d initiator)\n",
422 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
423 goto connected;
424 case RDMA_CM_EVENT_CONNECT_ERROR:
425 connstate = -ENOTCONN;
426 goto connected;
427 case RDMA_CM_EVENT_UNREACHABLE:
428 connstate = -ENETDOWN;
429 goto connected;
430 case RDMA_CM_EVENT_REJECTED:
431 connstate = -ECONNREFUSED;
432 goto connected;
433 case RDMA_CM_EVENT_DISCONNECTED:
434 connstate = -ECONNABORTED;
435 goto connected;
436 case RDMA_CM_EVENT_DEVICE_REMOVAL:
437 connstate = -ENODEV;
438connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400439 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
440 dprintk("RPC: %s: %sconnected\n",
441 __func__, connstate > 0 ? "" : "dis");
442 ep->rep_connected = connstate;
443 ep->rep_func(ep);
444 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400445 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400446 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400447 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
448 __func__, &addr->sin_addr.s_addr,
449 ntohs(addr->sin_port), ep,
450 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400451 break;
452 }
453
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400454#ifdef RPC_DEBUG
455 if (connstate == 1) {
456 int ird = attr.max_dest_rd_atomic;
457 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700458 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400459 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700460 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400461 ntohs(addr->sin_port),
462 ia->ri_id->device->name,
463 ia->ri_memreg_strategy,
464 xprt->rx_buf.rb_max_requests,
465 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
466 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700467 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
468 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400469 ntohs(addr->sin_port),
470 connstate);
471 }
472#endif
473
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 return 0;
475}
476
477static struct rdma_cm_id *
478rpcrdma_create_id(struct rpcrdma_xprt *xprt,
479 struct rpcrdma_ia *ia, struct sockaddr *addr)
480{
481 struct rdma_cm_id *id;
482 int rc;
483
Tom Talpey1a954052008-10-09 15:01:31 -0400484 init_completion(&ia->ri_done);
485
Sean Heftyb26f9b92010-04-01 17:08:41 +0000486 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400487 if (IS_ERR(id)) {
488 rc = PTR_ERR(id);
489 dprintk("RPC: %s: rdma_create_id() failed %i\n",
490 __func__, rc);
491 return id;
492 }
493
Tom Talpey5675add2008-10-09 15:01:41 -0400494 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400495 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
496 if (rc) {
497 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
498 __func__, rc);
499 goto out;
500 }
Tom Talpey5675add2008-10-09 15:01:41 -0400501 wait_for_completion_interruptible_timeout(&ia->ri_done,
502 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400503 rc = ia->ri_async_rc;
504 if (rc)
505 goto out;
506
Tom Talpey5675add2008-10-09 15:01:41 -0400507 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400508 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
509 if (rc) {
510 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
511 __func__, rc);
512 goto out;
513 }
Tom Talpey5675add2008-10-09 15:01:41 -0400514 wait_for_completion_interruptible_timeout(&ia->ri_done,
515 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400516 rc = ia->ri_async_rc;
517 if (rc)
518 goto out;
519
520 return id;
521
522out:
523 rdma_destroy_id(id);
524 return ERR_PTR(rc);
525}
526
527/*
528 * Drain any cq, prior to teardown.
529 */
530static void
531rpcrdma_clean_cq(struct ib_cq *cq)
532{
533 struct ib_wc wc;
534 int count = 0;
535
536 while (1 == ib_poll_cq(cq, 1, &wc))
537 ++count;
538
539 if (count)
540 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
541 __func__, count, wc.opcode);
542}
543
544/*
545 * Exported functions.
546 */
547
548/*
549 * Open and initialize an Interface Adapter.
550 * o initializes fields of struct rpcrdma_ia, including
551 * interface and provider attributes and protection zone.
552 */
553int
554rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
555{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400556 int rc, mem_priv;
557 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400558 struct rpcrdma_ia *ia = &xprt->rx_ia;
559
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400560 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
561 if (IS_ERR(ia->ri_id)) {
562 rc = PTR_ERR(ia->ri_id);
563 goto out1;
564 }
565
566 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
567 if (IS_ERR(ia->ri_pd)) {
568 rc = PTR_ERR(ia->ri_pd);
569 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
570 __func__, rc);
571 goto out2;
572 }
573
574 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400575 * Query the device to determine if the requested memory
576 * registration strategy is supported. If it isn't, set the
577 * strategy to a globally supported model.
578 */
579 rc = ib_query_device(ia->ri_id->device, &devattr);
580 if (rc) {
581 dprintk("RPC: %s: ib_query_device failed %d\n",
582 __func__, rc);
583 goto out2;
584 }
585
586 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
587 ia->ri_have_dma_lkey = 1;
588 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
589 }
590
Chuck Leverf10eafd2014-05-28 10:32:51 -0400591 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400592 /* Requires both frmr reg and local dma lkey */
593 if ((devattr.device_cap_flags &
594 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
595 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400596 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400597 "not supported by HCA\n", __func__);
598 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400599 } else {
600 /* Mind the ia limit on FRMR page list depth */
601 ia->ri_max_frmr_depth = min_t(unsigned int,
602 RPCRDMA_MAX_DATA_SEGS,
603 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400604 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400605 }
606 if (memreg == RPCRDMA_MTHCAFMR) {
607 if (!ia->ri_id->device->alloc_fmr) {
608 dprintk("RPC: %s: MTHCAFMR registration "
609 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400610 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400611 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400612 }
613
614 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400615 * Optionally obtain an underlying physical identity mapping in
616 * order to do a memory window-based bind. This base registration
617 * is protected from remote access - that is enabled only by binding
618 * for the specific bytes targeted during each RPC operation, and
619 * revoked after the corresponding completion similar to a storage
620 * adapter.
621 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400622 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400623 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400624 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400625 case RPCRDMA_ALLPHYSICAL:
626 mem_priv = IB_ACCESS_LOCAL_WRITE |
627 IB_ACCESS_REMOTE_WRITE |
628 IB_ACCESS_REMOTE_READ;
629 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400630 case RPCRDMA_MTHCAFMR:
631 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400632 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400633 mem_priv = IB_ACCESS_LOCAL_WRITE;
634 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400635 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
636 if (IS_ERR(ia->ri_bind_mem)) {
637 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400638 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400639 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400640 rc = -ENOMEM;
641 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400642 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400643 break;
644 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400645 printk(KERN_ERR "RPC: Unsupported memory "
646 "registration mode: %d\n", memreg);
647 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400648 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400649 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400650 dprintk("RPC: %s: memory registration strategy is %d\n",
651 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400652
653 /* Else will do memory reg/dereg for each chunk */
654 ia->ri_memreg_strategy = memreg;
655
Chuck Lever73806c82014-07-29 17:23:25 -0400656 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400657 return 0;
658out2:
659 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400660 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400661out1:
662 return rc;
663}
664
665/*
666 * Clean up/close an IA.
667 * o if event handles and PD have been initialized, free them.
668 * o close the IA
669 */
670void
671rpcrdma_ia_close(struct rpcrdma_ia *ia)
672{
673 int rc;
674
675 dprintk("RPC: %s: entering\n", __func__);
676 if (ia->ri_bind_mem != NULL) {
677 rc = ib_dereg_mr(ia->ri_bind_mem);
678 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
679 __func__, rc);
680 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400681 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
682 if (ia->ri_id->qp)
683 rdma_destroy_qp(ia->ri_id);
684 rdma_destroy_id(ia->ri_id);
685 ia->ri_id = NULL;
686 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400687 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
688 rc = ib_dealloc_pd(ia->ri_pd);
689 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
690 __func__, rc);
691 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400692}
693
694/*
695 * Create unconnected endpoint.
696 */
697int
698rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
699 struct rpcrdma_create_data_internal *cdata)
700{
701 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400702 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400703 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400704
705 rc = ib_query_device(ia->ri_id->device, &devattr);
706 if (rc) {
707 dprintk("RPC: %s: ib_query_device failed %d\n",
708 __func__, rc);
709 return rc;
710 }
711
712 /* check provider's send/recv wr limits */
713 if (cdata->max_requests > devattr.max_qp_wr)
714 cdata->max_requests = devattr.max_qp_wr;
715
716 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
717 ep->rep_attr.qp_context = ep;
718 /* send_cq and recv_cq initialized below */
719 ep->rep_attr.srq = NULL;
720 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
721 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400722 case RPCRDMA_FRMR: {
723 int depth = 7;
724
Tom Tucker15cdc6442010-08-11 12:47:24 -0400725 /* Add room for frmr register and invalidate WRs.
726 * 1. FRMR reg WR for head
727 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400728 * 3. N FRMR reg WRs for pagelist
729 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400730 * 5. FRMR reg WR for tail
731 * 6. FRMR invalidate WR for tail
732 * 7. The RDMA_SEND WR
733 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400734
735 /* Calculate N if the device max FRMR depth is smaller than
736 * RPCRDMA_MAX_DATA_SEGS.
737 */
738 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
739 int delta = RPCRDMA_MAX_DATA_SEGS -
740 ia->ri_max_frmr_depth;
741
742 do {
743 depth += 2; /* FRMR reg + invalidate */
744 delta -= ia->ri_max_frmr_depth;
745 } while (delta > 0);
746
747 }
748 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400749 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400750 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400751 if (!cdata->max_requests)
752 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400753 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
754 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400755 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400756 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400757 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400758 default:
759 break;
760 }
761 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
762 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
763 ep->rep_attr.cap.max_recv_sge = 1;
764 ep->rep_attr.cap.max_inline_data = 0;
765 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
766 ep->rep_attr.qp_type = IB_QPT_RC;
767 ep->rep_attr.port_num = ~0;
768
769 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
770 "iovs: send %d recv %d\n",
771 __func__,
772 ep->rep_attr.cap.max_send_wr,
773 ep->rep_attr.cap.max_recv_wr,
774 ep->rep_attr.cap.max_send_sge,
775 ep->rep_attr.cap.max_recv_sge);
776
777 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400778 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500779 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
780 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
781 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400782 ep->rep_cqinit = 0;
783 INIT_CQCOUNT(ep);
784 ep->rep_ia = ia;
785 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400786 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400787
Chuck Leverfc664482014-05-28 10:33:25 -0400788 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400789 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400790 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400791 if (IS_ERR(sendcq)) {
792 rc = PTR_ERR(sendcq);
793 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400794 __func__, rc);
795 goto out1;
796 }
797
Chuck Leverfc664482014-05-28 10:33:25 -0400798 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400799 if (rc) {
800 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
801 __func__, rc);
802 goto out2;
803 }
804
Chuck Leverfc664482014-05-28 10:33:25 -0400805 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400806 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400807 ep->rep_attr.cap.max_recv_wr + 1, 0);
808 if (IS_ERR(recvcq)) {
809 rc = PTR_ERR(recvcq);
810 dprintk("RPC: %s: failed to create recv CQ: %i\n",
811 __func__, rc);
812 goto out2;
813 }
814
815 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
816 if (rc) {
817 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
818 __func__, rc);
819 ib_destroy_cq(recvcq);
820 goto out2;
821 }
822
823 ep->rep_attr.send_cq = sendcq;
824 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400825
826 /* Initialize cma parameters */
827
828 /* RPC/RDMA does not use private data */
829 ep->rep_remote_cma.private_data = NULL;
830 ep->rep_remote_cma.private_data_len = 0;
831
832 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400833 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400834 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400835 ep->rep_remote_cma.responder_resources = 32;
836 else
837 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400838
839 ep->rep_remote_cma.retry_count = 7;
840 ep->rep_remote_cma.flow_control = 0;
841 ep->rep_remote_cma.rnr_retry_count = 0;
842
843 return 0;
844
845out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400846 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400847 if (err)
848 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
849 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400850out1:
851 return rc;
852}
853
854/*
855 * rpcrdma_ep_destroy
856 *
857 * Disconnect and destroy endpoint. After this, the only
858 * valid operations on the ep are to free it (if dynamically
859 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400860 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400861void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400862rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
863{
864 int rc;
865
866 dprintk("RPC: %s: entering, connected is %d\n",
867 __func__, ep->rep_connected);
868
Chuck Lever254f91e2014-05-28 10:32:17 -0400869 cancel_delayed_work_sync(&ep->rep_connect_worker);
870
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400871 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400872 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400873 rdma_destroy_qp(ia->ri_id);
874 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 }
876
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877 /* padding - could be done in rpcrdma_buffer_destroy... */
878 if (ep->rep_pad_mr) {
879 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
880 ep->rep_pad_mr = NULL;
881 }
882
Chuck Leverfc664482014-05-28 10:33:25 -0400883 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
884 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
885 if (rc)
886 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
887 __func__, rc);
888
889 rpcrdma_clean_cq(ep->rep_attr.send_cq);
890 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400891 if (rc)
892 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
893 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400894}
895
896/*
897 * Connect unconnected endpoint.
898 */
899int
900rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
901{
Chuck Lever73806c82014-07-29 17:23:25 -0400902 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400903 int rc = 0;
904 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400905
Tom Talpeyc0555512008-10-10 11:32:45 -0400906 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400907 struct rpcrdma_xprt *xprt;
908retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400909 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400910
911 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400912 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400913
Chuck Lever467c9672014-11-08 20:14:29 -0500914 switch (ia->ri_memreg_strategy) {
915 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400916 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500917 break;
918 case RPCRDMA_MTHCAFMR:
919 rpcrdma_reset_fmrs(ia);
920 break;
921 case RPCRDMA_ALLPHYSICAL:
922 break;
923 default:
924 rc = -EIO;
925 goto out;
926 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400927
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400928 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
929 id = rpcrdma_create_id(xprt, ia,
930 (struct sockaddr *)&xprt->rx_data.addr);
931 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400932 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400933 goto out;
934 }
935 /* TEMP TEMP TEMP - fail if new device:
936 * Deregister/remarshal *all* requests!
937 * Close and recreate adapter, pd, etc!
938 * Re-determine all attributes still sane!
939 * More stuff I haven't thought of!
940 * Rrrgh!
941 */
942 if (ia->ri_id->device != id->device) {
943 printk("RPC: %s: can't reconnect on "
944 "different device!\n", __func__);
945 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400946 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400947 goto out;
948 }
949 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400950 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
951 if (rc) {
952 dprintk("RPC: %s: rdma_create_qp failed %i\n",
953 __func__, rc);
954 rdma_destroy_id(id);
955 rc = -ENETUNREACH;
956 goto out;
957 }
Chuck Lever73806c82014-07-29 17:23:25 -0400958
959 write_lock(&ia->ri_qplock);
960 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400961 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400962 write_unlock(&ia->ri_qplock);
963
964 rdma_destroy_qp(old);
965 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400966 } else {
967 dprintk("RPC: %s: connecting...\n", __func__);
968 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
969 if (rc) {
970 dprintk("RPC: %s: rdma_create_qp failed %i\n",
971 __func__, rc);
972 /* do not update ep->rep_connected */
973 return -ENETUNREACH;
974 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400975 }
976
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400977 ep->rep_connected = 0;
978
979 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
980 if (rc) {
981 dprintk("RPC: %s: rdma_connect() failed with %i\n",
982 __func__, rc);
983 goto out;
984 }
985
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400986 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
987
988 /*
989 * Check state. A non-peer reject indicates no listener
990 * (ECONNREFUSED), which may be a transient state. All
991 * others indicate a transport condition which has already
992 * undergone a best-effort.
993 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800994 if (ep->rep_connected == -ECONNREFUSED &&
995 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400996 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
997 goto retry;
998 }
999 if (ep->rep_connected <= 0) {
1000 /* Sometimes, the only way to reliably connect to remote
1001 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001002 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1003 (ep->rep_remote_cma.responder_resources == 0 ||
1004 ep->rep_remote_cma.initiator_depth !=
1005 ep->rep_remote_cma.responder_resources)) {
1006 if (ep->rep_remote_cma.responder_resources == 0)
1007 ep->rep_remote_cma.responder_resources = 1;
1008 ep->rep_remote_cma.initiator_depth =
1009 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001010 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001011 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001012 rc = ep->rep_connected;
1013 } else {
1014 dprintk("RPC: %s: connected\n", __func__);
1015 }
1016
1017out:
1018 if (rc)
1019 ep->rep_connected = rc;
1020 return rc;
1021}
1022
1023/*
1024 * rpcrdma_ep_disconnect
1025 *
1026 * This is separate from destroy to facilitate the ability
1027 * to reconnect without recreating the endpoint.
1028 *
1029 * This call is not reentrant, and must not be made in parallel
1030 * on the same endpoint.
1031 */
Chuck Lever282191c2014-07-29 17:25:55 -04001032void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001033rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1034{
1035 int rc;
1036
Chuck Levera7bc2112014-07-29 17:23:52 -04001037 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001038 rc = rdma_disconnect(ia->ri_id);
1039 if (!rc) {
1040 /* returns without wait if not connected */
1041 wait_event_interruptible(ep->rep_connect_wait,
1042 ep->rep_connected != 1);
1043 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1044 (ep->rep_connected == 1) ? "still " : "dis");
1045 } else {
1046 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1047 ep->rep_connected = rc;
1048 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001049}
1050
Chuck Lever2e845222014-07-29 17:25:38 -04001051static int
1052rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1053{
1054 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1055 struct ib_fmr_attr fmr_attr = {
1056 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1057 .max_maps = 1,
1058 .page_shift = PAGE_SHIFT
1059 };
1060 struct rpcrdma_mw *r;
1061 int i, rc;
1062
1063 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1064 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1065
1066 while (i--) {
1067 r = kzalloc(sizeof(*r), GFP_KERNEL);
1068 if (r == NULL)
1069 return -ENOMEM;
1070
1071 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1072 if (IS_ERR(r->r.fmr)) {
1073 rc = PTR_ERR(r->r.fmr);
1074 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1075 __func__, rc);
1076 goto out_free;
1077 }
1078
1079 list_add(&r->mw_list, &buf->rb_mws);
1080 list_add(&r->mw_all, &buf->rb_all);
1081 }
1082 return 0;
1083
1084out_free:
1085 kfree(r);
1086 return rc;
1087}
1088
1089static int
1090rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1091{
1092 struct rpcrdma_frmr *f;
1093 struct rpcrdma_mw *r;
1094 int i, rc;
1095
1096 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1097 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1098
1099 while (i--) {
1100 r = kzalloc(sizeof(*r), GFP_KERNEL);
1101 if (r == NULL)
1102 return -ENOMEM;
1103 f = &r->r.frmr;
1104
1105 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1106 ia->ri_max_frmr_depth);
1107 if (IS_ERR(f->fr_mr)) {
1108 rc = PTR_ERR(f->fr_mr);
1109 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1110 "failed %i\n", __func__, rc);
1111 goto out_free;
1112 }
1113
1114 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1115 ia->ri_max_frmr_depth);
1116 if (IS_ERR(f->fr_pgl)) {
1117 rc = PTR_ERR(f->fr_pgl);
1118 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1119 "failed %i\n", __func__, rc);
1120
1121 ib_dereg_mr(f->fr_mr);
1122 goto out_free;
1123 }
1124
1125 list_add(&r->mw_list, &buf->rb_mws);
1126 list_add(&r->mw_all, &buf->rb_all);
1127 }
1128
1129 return 0;
1130
1131out_free:
1132 kfree(r);
1133 return rc;
1134}
1135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001136int
1137rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1138 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1139{
1140 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001141 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001142 int i, rc;
1143
1144 buf->rb_max_requests = cdata->max_requests;
1145 spin_lock_init(&buf->rb_lock);
1146 atomic_set(&buf->rb_credits, 1);
1147
1148 /* Need to allocate:
1149 * 1. arrays for send and recv pointers
1150 * 2. arrays of struct rpcrdma_req to fill in pointers
1151 * 3. array of struct rpcrdma_rep for replies
1152 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001153 * Send/recv buffers in req/rep need to be registered
1154 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001155 len = buf->rb_max_requests *
1156 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1157 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001158
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001159 p = kzalloc(len, GFP_KERNEL);
1160 if (p == NULL) {
1161 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1162 __func__, len);
1163 rc = -ENOMEM;
1164 goto out;
1165 }
1166 buf->rb_pool = p; /* for freeing it later */
1167
1168 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1169 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1170 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1171 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1172
1173 /*
1174 * Register the zeroed pad buffer, if any.
1175 */
1176 if (cdata->padding) {
1177 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1178 &ep->rep_pad_mr, &ep->rep_pad);
1179 if (rc)
1180 goto out;
1181 }
1182 p += cdata->padding;
1183
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001184 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001185 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001186 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001187 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001188 rc = rpcrdma_init_frmrs(ia, buf);
1189 if (rc)
1190 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001191 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001192 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001193 rc = rpcrdma_init_fmrs(ia, buf);
1194 if (rc)
1195 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001196 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001197 default:
1198 break;
1199 }
1200
1201 /*
1202 * Allocate/init the request/reply buffers. Doing this
1203 * using kmalloc for now -- one for each buf.
1204 */
Chuck Lever65866f82014-05-28 10:33:59 -04001205 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1206 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1207 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1208 __func__, wlen, rlen);
1209
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001210 for (i = 0; i < buf->rb_max_requests; i++) {
1211 struct rpcrdma_req *req;
1212 struct rpcrdma_rep *rep;
1213
Chuck Lever65866f82014-05-28 10:33:59 -04001214 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001215 if (req == NULL) {
1216 dprintk("RPC: %s: request buffer %d alloc"
1217 " failed\n", __func__, i);
1218 rc = -ENOMEM;
1219 goto out;
1220 }
1221 memset(req, 0, sizeof(struct rpcrdma_req));
1222 buf->rb_send_bufs[i] = req;
1223 buf->rb_send_bufs[i]->rl_buffer = buf;
1224
1225 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001226 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001227 &buf->rb_send_bufs[i]->rl_handle,
1228 &buf->rb_send_bufs[i]->rl_iov);
1229 if (rc)
1230 goto out;
1231
Chuck Lever65866f82014-05-28 10:33:59 -04001232 buf->rb_send_bufs[i]->rl_size = wlen -
1233 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001234
Chuck Lever65866f82014-05-28 10:33:59 -04001235 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001236 if (rep == NULL) {
1237 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1238 __func__, i);
1239 rc = -ENOMEM;
1240 goto out;
1241 }
1242 memset(rep, 0, sizeof(struct rpcrdma_rep));
1243 buf->rb_recv_bufs[i] = rep;
1244 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001245
1246 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001247 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001248 &buf->rb_recv_bufs[i]->rr_handle,
1249 &buf->rb_recv_bufs[i]->rr_iov);
1250 if (rc)
1251 goto out;
1252
1253 }
1254 dprintk("RPC: %s: max_requests %d\n",
1255 __func__, buf->rb_max_requests);
1256 /* done */
1257 return 0;
1258out:
1259 rpcrdma_buffer_destroy(buf);
1260 return rc;
1261}
1262
Chuck Lever2e845222014-07-29 17:25:38 -04001263static void
1264rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1265{
1266 struct rpcrdma_mw *r;
1267 int rc;
1268
1269 while (!list_empty(&buf->rb_all)) {
1270 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1271 list_del(&r->mw_all);
1272 list_del(&r->mw_list);
1273
1274 rc = ib_dealloc_fmr(r->r.fmr);
1275 if (rc)
1276 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1277 __func__, rc);
1278
1279 kfree(r);
1280 }
1281}
1282
1283static void
1284rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1285{
1286 struct rpcrdma_mw *r;
1287 int rc;
1288
1289 while (!list_empty(&buf->rb_all)) {
1290 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1291 list_del(&r->mw_all);
1292 list_del(&r->mw_list);
1293
1294 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1295 if (rc)
1296 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1297 __func__, rc);
1298 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1299
1300 kfree(r);
1301 }
1302}
1303
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001304void
1305rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1306{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001307 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001308 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001309
1310 /* clean up in reverse order from create
1311 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001312 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001313 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001314 */
1315 dprintk("RPC: %s: entering\n", __func__);
1316
1317 for (i = 0; i < buf->rb_max_requests; i++) {
1318 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1319 rpcrdma_deregister_internal(ia,
1320 buf->rb_recv_bufs[i]->rr_handle,
1321 &buf->rb_recv_bufs[i]->rr_iov);
1322 kfree(buf->rb_recv_bufs[i]);
1323 }
1324 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001325 rpcrdma_deregister_internal(ia,
1326 buf->rb_send_bufs[i]->rl_handle,
1327 &buf->rb_send_bufs[i]->rl_iov);
1328 kfree(buf->rb_send_bufs[i]);
1329 }
1330 }
1331
Chuck Lever2e845222014-07-29 17:25:38 -04001332 switch (ia->ri_memreg_strategy) {
1333 case RPCRDMA_FRMR:
1334 rpcrdma_destroy_frmrs(buf);
1335 break;
1336 case RPCRDMA_MTHCAFMR:
1337 rpcrdma_destroy_fmrs(buf);
1338 break;
1339 default:
1340 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001341 }
1342
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001343 kfree(buf->rb_pool);
1344}
1345
Chuck Lever467c9672014-11-08 20:14:29 -05001346/* After a disconnect, unmap all FMRs.
1347 *
1348 * This is invoked only in the transport connect worker in order
1349 * to serialize with rpcrdma_register_fmr_external().
1350 */
1351static void
1352rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1353{
1354 struct rpcrdma_xprt *r_xprt =
1355 container_of(ia, struct rpcrdma_xprt, rx_ia);
1356 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1357 struct list_head *pos;
1358 struct rpcrdma_mw *r;
1359 LIST_HEAD(l);
1360 int rc;
1361
1362 list_for_each(pos, &buf->rb_all) {
1363 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1364
1365 INIT_LIST_HEAD(&l);
1366 list_add(&r->r.fmr->list, &l);
1367 rc = ib_unmap_fmr(&l);
1368 if (rc)
1369 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1370 __func__, rc);
1371 }
1372}
1373
Chuck Lever9f9d8022014-07-29 17:24:45 -04001374/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1375 * an unusable state. Find FRMRs in this state and dereg / reg
1376 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1377 * also torn down.
1378 *
1379 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1380 *
1381 * This is invoked only in the transport connect worker in order
1382 * to serialize with rpcrdma_register_frmr_external().
1383 */
1384static void
1385rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1386{
1387 struct rpcrdma_xprt *r_xprt =
1388 container_of(ia, struct rpcrdma_xprt, rx_ia);
1389 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1390 struct list_head *pos;
1391 struct rpcrdma_mw *r;
1392 int rc;
1393
1394 list_for_each(pos, &buf->rb_all) {
1395 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1396
1397 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1398 continue;
1399
1400 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1401 if (rc)
1402 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1403 __func__, rc);
1404 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1405
1406 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1407 ia->ri_max_frmr_depth);
1408 if (IS_ERR(r->r.frmr.fr_mr)) {
1409 rc = PTR_ERR(r->r.frmr.fr_mr);
1410 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1411 " failed %i\n", __func__, rc);
1412 continue;
1413 }
1414 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1415 ia->ri_id->device,
1416 ia->ri_max_frmr_depth);
1417 if (IS_ERR(r->r.frmr.fr_pgl)) {
1418 rc = PTR_ERR(r->r.frmr.fr_pgl);
1419 dprintk("RPC: %s: "
1420 "ib_alloc_fast_reg_page_list "
1421 "failed %i\n", __func__, rc);
1422
1423 ib_dereg_mr(r->r.frmr.fr_mr);
1424 continue;
1425 }
1426 r->r.frmr.fr_state = FRMR_IS_INVALID;
1427 }
1428}
1429
Chuck Leverc2922c02014-07-29 17:24:36 -04001430/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1431 * some req segments uninitialized.
1432 */
1433static void
1434rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1435{
1436 if (*mw) {
1437 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1438 *mw = NULL;
1439 }
1440}
1441
1442/* Cycle mw's back in reverse order, and "spin" them.
1443 * This delays and scrambles reuse as much as possible.
1444 */
1445static void
1446rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1447{
1448 struct rpcrdma_mr_seg *seg = req->rl_segments;
1449 struct rpcrdma_mr_seg *seg1 = seg;
1450 int i;
1451
1452 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1453 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1454 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1455}
1456
1457static void
1458rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1459{
1460 buf->rb_send_bufs[--buf->rb_send_index] = req;
1461 req->rl_niovs = 0;
1462 if (req->rl_reply) {
1463 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1464 req->rl_reply->rr_func = NULL;
1465 req->rl_reply = NULL;
1466 }
1467}
1468
Chuck Leverddb6beb2014-07-29 17:24:54 -04001469/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1470 * Redo only the ib_post_send().
1471 */
1472static void
1473rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1474{
1475 struct rpcrdma_xprt *r_xprt =
1476 container_of(ia, struct rpcrdma_xprt, rx_ia);
1477 struct ib_send_wr invalidate_wr, *bad_wr;
1478 int rc;
1479
1480 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1481
1482 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001483 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001484
1485 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1486 invalidate_wr.wr_id = (unsigned long)(void *)r;
1487 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001488 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1489 DECR_CQCOUNT(&r_xprt->rx_ep);
1490
1491 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1492 __func__, r, r->r.frmr.fr_mr->rkey);
1493
1494 read_lock(&ia->ri_qplock);
1495 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1496 read_unlock(&ia->ri_qplock);
1497 if (rc) {
1498 /* Force rpcrdma_buffer_get() to retry */
1499 r->r.frmr.fr_state = FRMR_IS_STALE;
1500 dprintk("RPC: %s: ib_post_send failed, %i\n",
1501 __func__, rc);
1502 }
1503}
1504
1505static void
1506rpcrdma_retry_flushed_linv(struct list_head *stale,
1507 struct rpcrdma_buffer *buf)
1508{
1509 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1510 struct list_head *pos;
1511 struct rpcrdma_mw *r;
1512 unsigned long flags;
1513
1514 list_for_each(pos, stale) {
1515 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1516 rpcrdma_retry_local_inv(r, ia);
1517 }
1518
1519 spin_lock_irqsave(&buf->rb_lock, flags);
1520 list_splice_tail(stale, &buf->rb_mws);
1521 spin_unlock_irqrestore(&buf->rb_lock, flags);
1522}
1523
Chuck Leverc2922c02014-07-29 17:24:36 -04001524static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001525rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1526 struct list_head *stale)
1527{
1528 struct rpcrdma_mw *r;
1529 int i;
1530
1531 i = RPCRDMA_MAX_SEGS - 1;
1532 while (!list_empty(&buf->rb_mws)) {
1533 r = list_entry(buf->rb_mws.next,
1534 struct rpcrdma_mw, mw_list);
1535 list_del(&r->mw_list);
1536 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1537 list_add(&r->mw_list, stale);
1538 continue;
1539 }
1540 req->rl_segments[i].mr_chunk.rl_mw = r;
1541 if (unlikely(i-- == 0))
1542 return req; /* Success */
1543 }
1544
1545 /* Not enough entries on rb_mws for this req */
1546 rpcrdma_buffer_put_sendbuf(req, buf);
1547 rpcrdma_buffer_put_mrs(req, buf);
1548 return NULL;
1549}
1550
1551static struct rpcrdma_req *
1552rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001553{
1554 struct rpcrdma_mw *r;
1555 int i;
1556
1557 i = RPCRDMA_MAX_SEGS - 1;
1558 while (!list_empty(&buf->rb_mws)) {
1559 r = list_entry(buf->rb_mws.next,
1560 struct rpcrdma_mw, mw_list);
1561 list_del(&r->mw_list);
1562 req->rl_segments[i].mr_chunk.rl_mw = r;
1563 if (unlikely(i-- == 0))
1564 return req; /* Success */
1565 }
1566
1567 /* Not enough entries on rb_mws for this req */
1568 rpcrdma_buffer_put_sendbuf(req, buf);
1569 rpcrdma_buffer_put_mrs(req, buf);
1570 return NULL;
1571}
1572
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001573/*
1574 * Get a set of request/reply buffers.
1575 *
1576 * Reply buffer (if needed) is attached to send buffer upon return.
1577 * Rule:
1578 * rb_send_index and rb_recv_index MUST always be pointing to the
1579 * *next* available buffer (non-NULL). They are incremented after
1580 * removing buffers, and decremented *before* returning them.
1581 */
1582struct rpcrdma_req *
1583rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1584{
Chuck Leverc2922c02014-07-29 17:24:36 -04001585 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001586 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001587 struct rpcrdma_req *req;
1588 unsigned long flags;
1589
1590 spin_lock_irqsave(&buffers->rb_lock, flags);
1591 if (buffers->rb_send_index == buffers->rb_max_requests) {
1592 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1593 dprintk("RPC: %s: out of request buffers\n", __func__);
1594 return ((struct rpcrdma_req *)NULL);
1595 }
1596
1597 req = buffers->rb_send_bufs[buffers->rb_send_index];
1598 if (buffers->rb_send_index < buffers->rb_recv_index) {
1599 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1600 __func__,
1601 buffers->rb_recv_index - buffers->rb_send_index);
1602 req->rl_reply = NULL;
1603 } else {
1604 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1605 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1606 }
1607 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001608
1609 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001610 switch (ia->ri_memreg_strategy) {
1611 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001612 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1613 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001614 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001615 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001616 break;
1617 default:
1618 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001619 }
1620 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001621 if (!list_empty(&stale))
1622 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001623 return req;
1624}
1625
1626/*
1627 * Put request/reply buffers back into pool.
1628 * Pre-decrement counter/array index.
1629 */
1630void
1631rpcrdma_buffer_put(struct rpcrdma_req *req)
1632{
1633 struct rpcrdma_buffer *buffers = req->rl_buffer;
1634 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001635 unsigned long flags;
1636
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001637 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001638 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001639 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001640 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001641 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001642 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001643 break;
1644 default:
1645 break;
1646 }
1647 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1648}
1649
1650/*
1651 * Recover reply buffers from pool.
1652 * This happens when recovering from error conditions.
1653 * Post-increment counter/array index.
1654 */
1655void
1656rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1657{
1658 struct rpcrdma_buffer *buffers = req->rl_buffer;
1659 unsigned long flags;
1660
1661 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1662 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1663 spin_lock_irqsave(&buffers->rb_lock, flags);
1664 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1665 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1666 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1667 }
1668 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1669}
1670
1671/*
1672 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001673 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001674 */
1675void
1676rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1677{
1678 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1679 unsigned long flags;
1680
1681 rep->rr_func = NULL;
1682 spin_lock_irqsave(&buffers->rb_lock, flags);
1683 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1684 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1685}
1686
1687/*
1688 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1689 */
1690
1691int
1692rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1693 struct ib_mr **mrp, struct ib_sge *iov)
1694{
1695 struct ib_phys_buf ipb;
1696 struct ib_mr *mr;
1697 int rc;
1698
1699 /*
1700 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1701 */
1702 iov->addr = ib_dma_map_single(ia->ri_id->device,
1703 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001704 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1705 return -ENOMEM;
1706
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001707 iov->length = len;
1708
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001709 if (ia->ri_have_dma_lkey) {
1710 *mrp = NULL;
1711 iov->lkey = ia->ri_dma_lkey;
1712 return 0;
1713 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001714 *mrp = NULL;
1715 iov->lkey = ia->ri_bind_mem->lkey;
1716 return 0;
1717 }
1718
1719 ipb.addr = iov->addr;
1720 ipb.size = iov->length;
1721 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1722 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1723
1724 dprintk("RPC: %s: phys convert: 0x%llx "
1725 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001726 __func__, (unsigned long long)ipb.addr,
1727 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001728
1729 if (IS_ERR(mr)) {
1730 *mrp = NULL;
1731 rc = PTR_ERR(mr);
1732 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1733 } else {
1734 *mrp = mr;
1735 iov->lkey = mr->lkey;
1736 rc = 0;
1737 }
1738
1739 return rc;
1740}
1741
1742int
1743rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1744 struct ib_mr *mr, struct ib_sge *iov)
1745{
1746 int rc;
1747
1748 ib_dma_unmap_single(ia->ri_id->device,
1749 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1750
1751 if (NULL == mr)
1752 return 0;
1753
1754 rc = ib_dereg_mr(mr);
1755 if (rc)
1756 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1757 return rc;
1758}
1759
1760/*
1761 * Wrappers for chunk registration, shared by read/write chunk code.
1762 */
1763
1764static void
1765rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1766{
1767 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1768 seg->mr_dmalen = seg->mr_len;
1769 if (seg->mr_page)
1770 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1771 seg->mr_page, offset_in_page(seg->mr_offset),
1772 seg->mr_dmalen, seg->mr_dir);
1773 else
1774 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1775 seg->mr_offset,
1776 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001777 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1778 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1779 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001780 (unsigned long long)seg->mr_dma,
1781 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001782 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001783}
1784
1785static void
1786rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1787{
1788 if (seg->mr_page)
1789 ib_dma_unmap_page(ia->ri_id->device,
1790 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1791 else
1792 ib_dma_unmap_single(ia->ri_id->device,
1793 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1794}
1795
Tom Talpey8d4ba032008-10-09 14:59:49 -04001796static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001797rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1798 int *nsegs, int writing, struct rpcrdma_ia *ia,
1799 struct rpcrdma_xprt *r_xprt)
1800{
1801 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001802 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1803 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1804 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001805 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001806 u8 key;
1807 int len, pageoff;
1808 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001809 int seg_len;
1810 u64 pa;
1811 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001812
1813 pageoff = offset_in_page(seg1->mr_offset);
1814 seg1->mr_offset -= pageoff; /* start of page */
1815 seg1->mr_len += pageoff;
1816 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001817 if (*nsegs > ia->ri_max_frmr_depth)
1818 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001819 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001820 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001821 pa = seg->mr_dma;
1822 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001823 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001824 pa += PAGE_SIZE;
1825 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001826 len += seg->mr_len;
1827 ++seg;
1828 ++i;
1829 /* Check for holes */
1830 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1831 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1832 break;
1833 }
1834 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001835 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001836
Chuck Lever05055722014-07-29 17:25:12 -04001837 frmr->fr_state = FRMR_IS_VALID;
1838
Chuck Leverf590e872014-07-29 17:25:29 -04001839 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1840 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1841 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1842 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1843 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1844 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1845 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1846 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1847 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001848 rc = -EIO;
1849 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001850 }
1851
1852 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001853 key = (u8)(mr->rkey & 0x000000FF);
1854 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001855
Chuck Leverf590e872014-07-29 17:25:29 -04001856 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001857 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1858 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001859 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001860 DECR_CQCOUNT(&r_xprt->rx_ep);
1861
Chuck Leverf590e872014-07-29 17:25:29 -04001862 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001863 if (rc) {
1864 dprintk("RPC: %s: failed ib_post_send for register,"
1865 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001866 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001867 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001868 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001869 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001870 seg1->mr_base = seg1->mr_dma + pageoff;
1871 seg1->mr_nsegs = i;
1872 seg1->mr_len = len;
1873 }
1874 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001875 return 0;
1876out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001877 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001878 while (i--)
1879 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001880 return rc;
1881}
1882
1883static int
1884rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1885 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1886{
1887 struct rpcrdma_mr_seg *seg1 = seg;
1888 struct ib_send_wr invalidate_wr, *bad_wr;
1889 int rc;
1890
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001891 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1892
Tom Talpey3197d3092008-10-09 15:00:20 -04001893 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001894 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001895 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Talpey3197d3092008-10-09 15:00:20 -04001896 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1897 DECR_CQCOUNT(&r_xprt->rx_ep);
1898
Chuck Lever73806c82014-07-29 17:23:25 -04001899 read_lock(&ia->ri_qplock);
1900 while (seg1->mr_nsegs--)
1901 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001902 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001903 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001904 if (rc) {
1905 /* Force rpcrdma_buffer_get() to retry */
1906 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001907 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1908 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001909 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001910 return rc;
1911}
1912
1913static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001914rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1915 int *nsegs, int writing, struct rpcrdma_ia *ia)
1916{
1917 struct rpcrdma_mr_seg *seg1 = seg;
1918 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1919 int len, pageoff, i, rc;
1920
1921 pageoff = offset_in_page(seg1->mr_offset);
1922 seg1->mr_offset -= pageoff; /* start of page */
1923 seg1->mr_len += pageoff;
1924 len = -pageoff;
1925 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1926 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1927 for (i = 0; i < *nsegs;) {
1928 rpcrdma_map_one(ia, seg, writing);
1929 physaddrs[i] = seg->mr_dma;
1930 len += seg->mr_len;
1931 ++seg;
1932 ++i;
1933 /* Check for holes */
1934 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1935 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1936 break;
1937 }
1938 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1939 physaddrs, i, seg1->mr_dma);
1940 if (rc) {
1941 dprintk("RPC: %s: failed ib_map_phys_fmr "
1942 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1943 len, (unsigned long long)seg1->mr_dma,
1944 pageoff, i, rc);
1945 while (i--)
1946 rpcrdma_unmap_one(ia, --seg);
1947 } else {
1948 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1949 seg1->mr_base = seg1->mr_dma + pageoff;
1950 seg1->mr_nsegs = i;
1951 seg1->mr_len = len;
1952 }
1953 *nsegs = i;
1954 return rc;
1955}
1956
1957static int
1958rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1959 struct rpcrdma_ia *ia)
1960{
1961 struct rpcrdma_mr_seg *seg1 = seg;
1962 LIST_HEAD(l);
1963 int rc;
1964
1965 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1966 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001967 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001968 while (seg1->mr_nsegs--)
1969 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001970 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001971 if (rc)
1972 dprintk("RPC: %s: failed ib_unmap_fmr,"
1973 " status %i\n", __func__, rc);
1974 return rc;
1975}
1976
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001977int
1978rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1979 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1980{
1981 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001982 int rc = 0;
1983
1984 switch (ia->ri_memreg_strategy) {
1985
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001986 case RPCRDMA_ALLPHYSICAL:
1987 rpcrdma_map_one(ia, seg, writing);
1988 seg->mr_rkey = ia->ri_bind_mem->rkey;
1989 seg->mr_base = seg->mr_dma;
1990 seg->mr_nsegs = 1;
1991 nsegs = 1;
1992 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001993
Tom Talpey3197d3092008-10-09 15:00:20 -04001994 /* Registration using frmr registration */
1995 case RPCRDMA_FRMR:
1996 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1997 break;
1998
Tom Talpey8d4ba032008-10-09 14:59:49 -04001999 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002000 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002001 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002002 break;
2003
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002004 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002005 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002006 }
2007 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002008 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002009
2010 return nsegs;
2011}
2012
2013int
2014rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002015 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002016{
2017 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002018 int nsegs = seg->mr_nsegs, rc;
2019
2020 switch (ia->ri_memreg_strategy) {
2021
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002022 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002023 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002024 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002025 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002026 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002027
Tom Talpey3197d3092008-10-09 15:00:20 -04002028 case RPCRDMA_FRMR:
2029 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2030 break;
2031
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002032 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002033 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002034 break;
2035
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002036 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002037 break;
2038 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002039 return nsegs;
2040}
2041
2042/*
2043 * Prepost any receive buffer, then post send.
2044 *
2045 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2046 */
2047int
2048rpcrdma_ep_post(struct rpcrdma_ia *ia,
2049 struct rpcrdma_ep *ep,
2050 struct rpcrdma_req *req)
2051{
2052 struct ib_send_wr send_wr, *send_wr_fail;
2053 struct rpcrdma_rep *rep = req->rl_reply;
2054 int rc;
2055
2056 if (rep) {
2057 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2058 if (rc)
2059 goto out;
2060 req->rl_reply = NULL;
2061 }
2062
2063 send_wr.next = NULL;
2064 send_wr.wr_id = 0ULL; /* no send cookie */
2065 send_wr.sg_list = req->rl_send_iov;
2066 send_wr.num_sge = req->rl_niovs;
2067 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002068 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2069 ib_dma_sync_single_for_device(ia->ri_id->device,
2070 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2071 DMA_TO_DEVICE);
2072 ib_dma_sync_single_for_device(ia->ri_id->device,
2073 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2074 DMA_TO_DEVICE);
2075 ib_dma_sync_single_for_device(ia->ri_id->device,
2076 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2077 DMA_TO_DEVICE);
2078
2079 if (DECR_CQCOUNT(ep) > 0)
2080 send_wr.send_flags = 0;
2081 else { /* Provider must take a send completion every now and then */
2082 INIT_CQCOUNT(ep);
2083 send_wr.send_flags = IB_SEND_SIGNALED;
2084 }
2085
2086 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2087 if (rc)
2088 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2089 rc);
2090out:
2091 return rc;
2092}
2093
2094/*
2095 * (Re)post a receive buffer.
2096 */
2097int
2098rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2099 struct rpcrdma_ep *ep,
2100 struct rpcrdma_rep *rep)
2101{
2102 struct ib_recv_wr recv_wr, *recv_wr_fail;
2103 int rc;
2104
2105 recv_wr.next = NULL;
2106 recv_wr.wr_id = (u64) (unsigned long) rep;
2107 recv_wr.sg_list = &rep->rr_iov;
2108 recv_wr.num_sge = 1;
2109
2110 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2111 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2112
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002113 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2114
2115 if (rc)
2116 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2117 rc);
2118 return rc;
2119}
Chuck Lever43e95982014-07-29 17:23:34 -04002120
2121/* Physical mapping means one Read/Write list entry per-page.
2122 * All list entries must fit within an inline buffer
2123 *
2124 * NB: The server must return a Write list for NFS READ,
2125 * which has the same constraint. Factor in the inline
2126 * rsize as well.
2127 */
2128static size_t
2129rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2130{
2131 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2132 unsigned int inline_size, pages;
2133
2134 inline_size = min_t(unsigned int,
2135 cdata->inline_wsize, cdata->inline_rsize);
2136 inline_size -= RPCRDMA_HDRLEN_MIN;
2137 pages = inline_size / sizeof(struct rpcrdma_segment);
2138 return pages << PAGE_SHIFT;
2139}
2140
2141static size_t
2142rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2143{
2144 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2145}
2146
2147size_t
2148rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2149{
2150 size_t result;
2151
2152 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2153 case RPCRDMA_ALLPHYSICAL:
2154 result = rpcrdma_physical_max_payload(r_xprt);
2155 break;
2156 default:
2157 result = rpcrdma_mr_max_payload(r_xprt);
2158 }
2159 return result;
2160}