blob: 6bb9a07f7d491959043997e9a6daab3f47336dfc [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040051#include <linux/pci.h> /* for Tavor hack below */
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
145static inline
146void rpcrdma_event_process(struct ib_wc *wc)
147{
Tom Tucker5c635e02011-02-09 19:45:34 +0000148 struct rpcrdma_mw *frmr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149 struct rpcrdma_rep *rep =
150 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
151
152 dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
153 __func__, rep, wc->status, wc->opcode, wc->byte_len);
154
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400155 if (!rep) /* send completion that we don't care about */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 return;
157
158 if (IB_WC_SUCCESS != wc->status) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000159 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
160 __func__, wc->opcode, wc->status);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400161 rep->rr_len = ~0U;
Tom Tucker5c635e02011-02-09 19:45:34 +0000162 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
163 rpcrdma_schedule_tasklet(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400164 return;
165 }
166
167 switch (wc->opcode) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000168 case IB_WC_FAST_REG_MR:
169 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
170 frmr->r.frmr.state = FRMR_IS_VALID;
171 break;
172 case IB_WC_LOCAL_INV:
173 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
174 frmr->r.frmr.state = FRMR_IS_INVALID;
175 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400176 case IB_WC_RECV:
177 rep->rr_len = wc->byte_len;
178 ib_dma_sync_single_for_cpu(
179 rdmab_to_ia(rep->rr_buffer)->ri_id->device,
180 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
181 /* Keep (only) the most recent credits, after check validity */
182 if (rep->rr_len >= 16) {
183 struct rpcrdma_msg *p =
184 (struct rpcrdma_msg *) rep->rr_base;
185 unsigned int credits = ntohl(p->rm_credit);
186 if (credits == 0) {
187 dprintk("RPC: %s: server"
188 " dropped credits to 0!\n", __func__);
189 /* don't deadlock */
190 credits = 1;
191 } else if (credits > rep->rr_buffer->rb_max_requests) {
192 dprintk("RPC: %s: server"
193 " over-crediting: %d (%d)\n",
194 __func__, credits,
195 rep->rr_buffer->rb_max_requests);
196 credits = rep->rr_buffer->rb_max_requests;
197 }
198 atomic_set(&rep->rr_buffer->rb_credits, credits);
199 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400200 rpcrdma_schedule_tasklet(rep);
201 break;
202 default:
203 dprintk("RPC: %s: unexpected WC event %X\n",
204 __func__, wc->opcode);
205 break;
206 }
207}
208
209static inline int
210rpcrdma_cq_poll(struct ib_cq *cq)
211{
212 struct ib_wc wc;
213 int rc;
214
215 for (;;) {
216 rc = ib_poll_cq(cq, 1, &wc);
217 if (rc < 0) {
218 dprintk("RPC: %s: ib_poll_cq failed %i\n",
219 __func__, rc);
220 return rc;
221 }
222 if (rc == 0)
223 break;
224
225 rpcrdma_event_process(&wc);
226 }
227
228 return 0;
229}
230
231/*
232 * rpcrdma_cq_event_upcall
233 *
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400234 * This upcall handles recv and send events.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400235 * It is reentrant but processes single events in order to maintain
236 * ordering of receives to keep server credits.
237 *
238 * It is the responsibility of the scheduled tasklet to return
239 * recv buffers to the pool. NOTE: this affects synchronization of
240 * connection shutdown. That is, the structures required for
241 * the completion of the reply handler must remain intact until
242 * all memory has been reclaimed.
243 *
244 * Note that send events are suppressed and do not result in an upcall.
245 */
246static void
247rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
248{
249 int rc;
250
251 rc = rpcrdma_cq_poll(cq);
252 if (rc)
253 return;
254
255 rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (rc) {
257 dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
258 __func__, rc);
259 return;
260 }
261
262 rpcrdma_cq_poll(cq);
263}
264
265#ifdef RPC_DEBUG
266static const char * const conn[] = {
267 "address resolved",
268 "address error",
269 "route resolved",
270 "route error",
271 "connect request",
272 "connect response",
273 "connect error",
274 "unreachable",
275 "rejected",
276 "established",
277 "disconnected",
278 "device removal"
279};
280#endif
281
282static int
283rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284{
285 struct rpcrdma_xprt *xprt = id->context;
286 struct rpcrdma_ia *ia = &xprt->rx_ia;
287 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800288#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400289 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800290#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400291 struct ib_qp_attr attr;
292 struct ib_qp_init_attr iattr;
293 int connstate = 0;
294
295 switch (event->event) {
296 case RDMA_CM_EVENT_ADDR_RESOLVED:
297 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400298 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400299 complete(&ia->ri_done);
300 break;
301 case RDMA_CM_EVENT_ADDR_ERROR:
302 ia->ri_async_rc = -EHOSTUNREACH;
303 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
304 __func__, ep);
305 complete(&ia->ri_done);
306 break;
307 case RDMA_CM_EVENT_ROUTE_ERROR:
308 ia->ri_async_rc = -ENETUNREACH;
309 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
310 __func__, ep);
311 complete(&ia->ri_done);
312 break;
313 case RDMA_CM_EVENT_ESTABLISHED:
314 connstate = 1;
315 ib_query_qp(ia->ri_id->qp, &attr,
316 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
317 &iattr);
318 dprintk("RPC: %s: %d responder resources"
319 " (%d initiator)\n",
320 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
321 goto connected;
322 case RDMA_CM_EVENT_CONNECT_ERROR:
323 connstate = -ENOTCONN;
324 goto connected;
325 case RDMA_CM_EVENT_UNREACHABLE:
326 connstate = -ENETDOWN;
327 goto connected;
328 case RDMA_CM_EVENT_REJECTED:
329 connstate = -ECONNREFUSED;
330 goto connected;
331 case RDMA_CM_EVENT_DISCONNECTED:
332 connstate = -ECONNABORTED;
333 goto connected;
334 case RDMA_CM_EVENT_DEVICE_REMOVAL:
335 connstate = -ENODEV;
336connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700337 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400338 __func__,
339 (event->event <= 11) ? conn[event->event] :
340 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700341 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400342 ntohs(addr->sin_port),
343 ep, event->event);
344 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
345 dprintk("RPC: %s: %sconnected\n",
346 __func__, connstate > 0 ? "" : "dis");
347 ep->rep_connected = connstate;
348 ep->rep_func(ep);
349 wake_up_all(&ep->rep_connect_wait);
350 break;
351 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400352 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 break;
355 }
356
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400357#ifdef RPC_DEBUG
358 if (connstate == 1) {
359 int ird = attr.max_dest_rd_atomic;
360 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700361 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400362 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700363 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400364 ntohs(addr->sin_port),
365 ia->ri_id->device->name,
366 ia->ri_memreg_strategy,
367 xprt->rx_buf.rb_max_requests,
368 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
369 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700370 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
371 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400372 ntohs(addr->sin_port),
373 connstate);
374 }
375#endif
376
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377 return 0;
378}
379
380static struct rdma_cm_id *
381rpcrdma_create_id(struct rpcrdma_xprt *xprt,
382 struct rpcrdma_ia *ia, struct sockaddr *addr)
383{
384 struct rdma_cm_id *id;
385 int rc;
386
Tom Talpey1a954052008-10-09 15:01:31 -0400387 init_completion(&ia->ri_done);
388
Sean Heftyb26f9b92010-04-01 17:08:41 +0000389 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390 if (IS_ERR(id)) {
391 rc = PTR_ERR(id);
392 dprintk("RPC: %s: rdma_create_id() failed %i\n",
393 __func__, rc);
394 return id;
395 }
396
Tom Talpey5675add2008-10-09 15:01:41 -0400397 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400398 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
399 if (rc) {
400 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
401 __func__, rc);
402 goto out;
403 }
Tom Talpey5675add2008-10-09 15:01:41 -0400404 wait_for_completion_interruptible_timeout(&ia->ri_done,
405 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400406 rc = ia->ri_async_rc;
407 if (rc)
408 goto out;
409
Tom Talpey5675add2008-10-09 15:01:41 -0400410 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
412 if (rc) {
413 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
414 __func__, rc);
415 goto out;
416 }
Tom Talpey5675add2008-10-09 15:01:41 -0400417 wait_for_completion_interruptible_timeout(&ia->ri_done,
418 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400419 rc = ia->ri_async_rc;
420 if (rc)
421 goto out;
422
423 return id;
424
425out:
426 rdma_destroy_id(id);
427 return ERR_PTR(rc);
428}
429
430/*
431 * Drain any cq, prior to teardown.
432 */
433static void
434rpcrdma_clean_cq(struct ib_cq *cq)
435{
436 struct ib_wc wc;
437 int count = 0;
438
439 while (1 == ib_poll_cq(cq, 1, &wc))
440 ++count;
441
442 if (count)
443 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
444 __func__, count, wc.opcode);
445}
446
447/*
448 * Exported functions.
449 */
450
451/*
452 * Open and initialize an Interface Adapter.
453 * o initializes fields of struct rpcrdma_ia, including
454 * interface and provider attributes and protection zone.
455 */
456int
457rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
458{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400459 int rc, mem_priv;
460 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400461 struct rpcrdma_ia *ia = &xprt->rx_ia;
462
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400463 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
464 if (IS_ERR(ia->ri_id)) {
465 rc = PTR_ERR(ia->ri_id);
466 goto out1;
467 }
468
469 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
470 if (IS_ERR(ia->ri_pd)) {
471 rc = PTR_ERR(ia->ri_pd);
472 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
473 __func__, rc);
474 goto out2;
475 }
476
477 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400478 * Query the device to determine if the requested memory
479 * registration strategy is supported. If it isn't, set the
480 * strategy to a globally supported model.
481 */
482 rc = ib_query_device(ia->ri_id->device, &devattr);
483 if (rc) {
484 dprintk("RPC: %s: ib_query_device failed %d\n",
485 __func__, rc);
486 goto out2;
487 }
488
489 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
490 ia->ri_have_dma_lkey = 1;
491 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
492 }
493
494 switch (memreg) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400495 case RPCRDMA_MTHCAFMR:
496 if (!ia->ri_id->device->alloc_fmr) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400497 dprintk("RPC: %s: MTHCAFMR registration "
498 "specified but not supported by adapter, "
499 "using riskier RPCRDMA_ALLPHYSICAL\n",
500 __func__);
501 memreg = RPCRDMA_ALLPHYSICAL;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400502 }
503 break;
Tom Talpey3197d3092008-10-09 15:00:20 -0400504 case RPCRDMA_FRMR:
505 /* Requires both frmr reg and local dma lkey */
506 if ((devattr.device_cap_flags &
507 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
508 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400509 dprintk("RPC: %s: FRMR registration "
510 "specified but not supported by adapter, "
511 "using riskier RPCRDMA_ALLPHYSICAL\n",
512 __func__);
513 memreg = RPCRDMA_ALLPHYSICAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400514 } else {
515 /* Mind the ia limit on FRMR page list depth */
516 ia->ri_max_frmr_depth = min_t(unsigned int,
517 RPCRDMA_MAX_DATA_SEGS,
518 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400519 }
520 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400521 }
522
523 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400524 * Optionally obtain an underlying physical identity mapping in
525 * order to do a memory window-based bind. This base registration
526 * is protected from remote access - that is enabled only by binding
527 * for the specific bytes targeted during each RPC operation, and
528 * revoked after the corresponding completion similar to a storage
529 * adapter.
530 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400531 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400532 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400533 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400534#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400535 case RPCRDMA_ALLPHYSICAL:
536 mem_priv = IB_ACCESS_LOCAL_WRITE |
537 IB_ACCESS_REMOTE_WRITE |
538 IB_ACCESS_REMOTE_READ;
539 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400540#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400541 case RPCRDMA_MTHCAFMR:
542 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400543 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400544 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400545#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400546 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400547#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400548 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
549 if (IS_ERR(ia->ri_bind_mem)) {
550 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400551 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400552 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400553 rc = -ENOMEM;
554 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400555 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400556 break;
557 default:
558 printk(KERN_ERR "%s: invalid memory registration mode %d\n",
559 __func__, memreg);
560 rc = -EINVAL;
561 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400562 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400563 dprintk("RPC: %s: memory registration strategy is %d\n",
564 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400565
566 /* Else will do memory reg/dereg for each chunk */
567 ia->ri_memreg_strategy = memreg;
568
569 return 0;
570out2:
571 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400572 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400573out1:
574 return rc;
575}
576
577/*
578 * Clean up/close an IA.
579 * o if event handles and PD have been initialized, free them.
580 * o close the IA
581 */
582void
583rpcrdma_ia_close(struct rpcrdma_ia *ia)
584{
585 int rc;
586
587 dprintk("RPC: %s: entering\n", __func__);
588 if (ia->ri_bind_mem != NULL) {
589 rc = ib_dereg_mr(ia->ri_bind_mem);
590 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
591 __func__, rc);
592 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400593 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
594 if (ia->ri_id->qp)
595 rdma_destroy_qp(ia->ri_id);
596 rdma_destroy_id(ia->ri_id);
597 ia->ri_id = NULL;
598 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400599 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
600 rc = ib_dealloc_pd(ia->ri_pd);
601 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
602 __func__, rc);
603 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400604}
605
606/*
607 * Create unconnected endpoint.
608 */
609int
610rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
611 struct rpcrdma_create_data_internal *cdata)
612{
613 struct ib_device_attr devattr;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400614 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400615
616 rc = ib_query_device(ia->ri_id->device, &devattr);
617 if (rc) {
618 dprintk("RPC: %s: ib_query_device failed %d\n",
619 __func__, rc);
620 return rc;
621 }
622
623 /* check provider's send/recv wr limits */
624 if (cdata->max_requests > devattr.max_qp_wr)
625 cdata->max_requests = devattr.max_qp_wr;
626
627 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
628 ep->rep_attr.qp_context = ep;
629 /* send_cq and recv_cq initialized below */
630 ep->rep_attr.srq = NULL;
631 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
632 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400633 case RPCRDMA_FRMR: {
634 int depth = 7;
635
Tom Tucker15cdc6442010-08-11 12:47:24 -0400636 /* Add room for frmr register and invalidate WRs.
637 * 1. FRMR reg WR for head
638 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400639 * 3. N FRMR reg WRs for pagelist
640 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400641 * 5. FRMR reg WR for tail
642 * 6. FRMR invalidate WR for tail
643 * 7. The RDMA_SEND WR
644 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400645
646 /* Calculate N if the device max FRMR depth is smaller than
647 * RPCRDMA_MAX_DATA_SEGS.
648 */
649 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
650 int delta = RPCRDMA_MAX_DATA_SEGS -
651 ia->ri_max_frmr_depth;
652
653 do {
654 depth += 2; /* FRMR reg + invalidate */
655 delta -= ia->ri_max_frmr_depth;
656 } while (delta > 0);
657
658 }
659 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400660 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400661 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400662 if (!cdata->max_requests)
663 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400664 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
665 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400666 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400667 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400668 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 default:
670 break;
671 }
672 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
673 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
674 ep->rep_attr.cap.max_recv_sge = 1;
675 ep->rep_attr.cap.max_inline_data = 0;
676 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
677 ep->rep_attr.qp_type = IB_QPT_RC;
678 ep->rep_attr.port_num = ~0;
679
680 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
681 "iovs: send %d recv %d\n",
682 __func__,
683 ep->rep_attr.cap.max_send_wr,
684 ep->rep_attr.cap.max_recv_wr,
685 ep->rep_attr.cap.max_send_sge,
686 ep->rep_attr.cap.max_recv_sge);
687
688 /* set trigger for requesting send completion */
689 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400690 if (ep->rep_cqinit <= 2)
691 ep->rep_cqinit = 0;
692 INIT_CQCOUNT(ep);
693 ep->rep_ia = ia;
694 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400695 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400696
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400697 ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
698 rpcrdma_cq_async_error_upcall, NULL,
699 ep->rep_attr.cap.max_recv_wr +
700 ep->rep_attr.cap.max_send_wr + 1, 0);
701 if (IS_ERR(ep->rep_cq)) {
702 rc = PTR_ERR(ep->rep_cq);
703 dprintk("RPC: %s: ib_create_cq failed: %i\n",
704 __func__, rc);
705 goto out1;
706 }
707
708 rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
709 if (rc) {
710 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
711 __func__, rc);
712 goto out2;
713 }
714
715 ep->rep_attr.send_cq = ep->rep_cq;
716 ep->rep_attr.recv_cq = ep->rep_cq;
717
718 /* Initialize cma parameters */
719
720 /* RPC/RDMA does not use private data */
721 ep->rep_remote_cma.private_data = NULL;
722 ep->rep_remote_cma.private_data_len = 0;
723
724 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400725 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400726 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400727 ep->rep_remote_cma.responder_resources = 32;
728 else
729 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400730
731 ep->rep_remote_cma.retry_count = 7;
732 ep->rep_remote_cma.flow_control = 0;
733 ep->rep_remote_cma.rnr_retry_count = 0;
734
735 return 0;
736
737out2:
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400738 err = ib_destroy_cq(ep->rep_cq);
739 if (err)
740 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
741 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400742out1:
743 return rc;
744}
745
746/*
747 * rpcrdma_ep_destroy
748 *
749 * Disconnect and destroy endpoint. After this, the only
750 * valid operations on the ep are to free it (if dynamically
751 * allocated) or re-create it.
752 *
753 * The caller's error handling must be sure to not leak the endpoint
754 * if this function fails.
755 */
756int
757rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
758{
759 int rc;
760
761 dprintk("RPC: %s: entering, connected is %d\n",
762 __func__, ep->rep_connected);
763
Chuck Lever254f91e2014-05-28 10:32:17 -0400764 cancel_delayed_work_sync(&ep->rep_connect_worker);
765
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400766 if (ia->ri_id->qp) {
767 rc = rpcrdma_ep_disconnect(ep, ia);
768 if (rc)
769 dprintk("RPC: %s: rpcrdma_ep_disconnect"
770 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400771 rdma_destroy_qp(ia->ri_id);
772 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400773 }
774
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400775 /* padding - could be done in rpcrdma_buffer_destroy... */
776 if (ep->rep_pad_mr) {
777 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
778 ep->rep_pad_mr = NULL;
779 }
780
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400781 rpcrdma_clean_cq(ep->rep_cq);
782 rc = ib_destroy_cq(ep->rep_cq);
783 if (rc)
784 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
785 __func__, rc);
786
787 return rc;
788}
789
790/*
791 * Connect unconnected endpoint.
792 */
793int
794rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
795{
796 struct rdma_cm_id *id;
797 int rc = 0;
798 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400799
Tom Talpeyc0555512008-10-10 11:32:45 -0400800 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400801 struct rpcrdma_xprt *xprt;
802retry:
803 rc = rpcrdma_ep_disconnect(ep, ia);
804 if (rc && rc != -ENOTCONN)
805 dprintk("RPC: %s: rpcrdma_ep_disconnect"
806 " status %i\n", __func__, rc);
807 rpcrdma_clean_cq(ep->rep_cq);
808
809 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
810 id = rpcrdma_create_id(xprt, ia,
811 (struct sockaddr *)&xprt->rx_data.addr);
812 if (IS_ERR(id)) {
813 rc = PTR_ERR(id);
814 goto out;
815 }
816 /* TEMP TEMP TEMP - fail if new device:
817 * Deregister/remarshal *all* requests!
818 * Close and recreate adapter, pd, etc!
819 * Re-determine all attributes still sane!
820 * More stuff I haven't thought of!
821 * Rrrgh!
822 */
823 if (ia->ri_id->device != id->device) {
824 printk("RPC: %s: can't reconnect on "
825 "different device!\n", __func__);
826 rdma_destroy_id(id);
827 rc = -ENETDOWN;
828 goto out;
829 }
830 /* END TEMP */
Tom Talpey1a954052008-10-09 15:01:31 -0400831 rdma_destroy_qp(ia->ri_id);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832 rdma_destroy_id(ia->ri_id);
833 ia->ri_id = id;
834 }
835
836 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
837 if (rc) {
838 dprintk("RPC: %s: rdma_create_qp failed %i\n",
839 __func__, rc);
840 goto out;
841 }
842
843/* XXX Tavor device performs badly with 2K MTU! */
844if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
845 struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
846 if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
847 (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
848 pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
849 struct ib_qp_attr attr = {
850 .path_mtu = IB_MTU_1024
851 };
852 rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
853 }
854}
855
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400856 ep->rep_connected = 0;
857
858 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
859 if (rc) {
860 dprintk("RPC: %s: rdma_connect() failed with %i\n",
861 __func__, rc);
862 goto out;
863 }
864
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400865 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
866
867 /*
868 * Check state. A non-peer reject indicates no listener
869 * (ECONNREFUSED), which may be a transient state. All
870 * others indicate a transport condition which has already
871 * undergone a best-effort.
872 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800873 if (ep->rep_connected == -ECONNREFUSED &&
874 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
876 goto retry;
877 }
878 if (ep->rep_connected <= 0) {
879 /* Sometimes, the only way to reliably connect to remote
880 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400881 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
882 (ep->rep_remote_cma.responder_resources == 0 ||
883 ep->rep_remote_cma.initiator_depth !=
884 ep->rep_remote_cma.responder_resources)) {
885 if (ep->rep_remote_cma.responder_resources == 0)
886 ep->rep_remote_cma.responder_resources = 1;
887 ep->rep_remote_cma.initiator_depth =
888 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400889 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400890 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400891 rc = ep->rep_connected;
892 } else {
893 dprintk("RPC: %s: connected\n", __func__);
894 }
895
896out:
897 if (rc)
898 ep->rep_connected = rc;
899 return rc;
900}
901
902/*
903 * rpcrdma_ep_disconnect
904 *
905 * This is separate from destroy to facilitate the ability
906 * to reconnect without recreating the endpoint.
907 *
908 * This call is not reentrant, and must not be made in parallel
909 * on the same endpoint.
910 */
911int
912rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
913{
914 int rc;
915
916 rpcrdma_clean_cq(ep->rep_cq);
917 rc = rdma_disconnect(ia->ri_id);
918 if (!rc) {
919 /* returns without wait if not connected */
920 wait_event_interruptible(ep->rep_connect_wait,
921 ep->rep_connected != 1);
922 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
923 (ep->rep_connected == 1) ? "still " : "dis");
924 } else {
925 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
926 ep->rep_connected = rc;
927 }
928 return rc;
929}
930
931/*
932 * Initialize buffer memory
933 */
934int
935rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
936 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
937{
938 char *p;
939 size_t len;
940 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -0400941 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400942
943 buf->rb_max_requests = cdata->max_requests;
944 spin_lock_init(&buf->rb_lock);
945 atomic_set(&buf->rb_credits, 1);
946
947 /* Need to allocate:
948 * 1. arrays for send and recv pointers
949 * 2. arrays of struct rpcrdma_req to fill in pointers
950 * 3. array of struct rpcrdma_rep for replies
951 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -0400952 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400953 * Send/recv buffers in req/rep need to be registered
954 */
955
956 len = buf->rb_max_requests *
957 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
958 len += cdata->padding;
959 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400960 case RPCRDMA_FRMR:
961 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
962 sizeof(struct rpcrdma_mw);
963 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400964 case RPCRDMA_MTHCAFMR:
965 /* TBD we are perhaps overallocating here */
966 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
967 sizeof(struct rpcrdma_mw);
968 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400969 default:
970 break;
971 }
972
973 /* allocate 1, 4 and 5 in one shot */
974 p = kzalloc(len, GFP_KERNEL);
975 if (p == NULL) {
976 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
977 __func__, len);
978 rc = -ENOMEM;
979 goto out;
980 }
981 buf->rb_pool = p; /* for freeing it later */
982
983 buf->rb_send_bufs = (struct rpcrdma_req **) p;
984 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
985 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
986 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
987
988 /*
989 * Register the zeroed pad buffer, if any.
990 */
991 if (cdata->padding) {
992 rc = rpcrdma_register_internal(ia, p, cdata->padding,
993 &ep->rep_pad_mr, &ep->rep_pad);
994 if (rc)
995 goto out;
996 }
997 p += cdata->padding;
998
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400999 INIT_LIST_HEAD(&buf->rb_mws);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001000 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001001 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001002 case RPCRDMA_FRMR:
1003 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1004 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001005 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001006 if (IS_ERR(r->r.frmr.fr_mr)) {
1007 rc = PTR_ERR(r->r.frmr.fr_mr);
1008 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1009 " failed %i\n", __func__, rc);
1010 goto out;
1011 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001012 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1013 ia->ri_id->device,
1014 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001015 if (IS_ERR(r->r.frmr.fr_pgl)) {
1016 rc = PTR_ERR(r->r.frmr.fr_pgl);
1017 dprintk("RPC: %s: "
1018 "ib_alloc_fast_reg_page_list "
1019 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001020
1021 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001022 goto out;
1023 }
1024 list_add(&r->mw_list, &buf->rb_mws);
1025 ++r;
1026 }
1027 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001028 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001029 /* TBD we are perhaps overallocating here */
1030 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001031 static struct ib_fmr_attr fa =
1032 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001033 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1034 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1035 &fa);
1036 if (IS_ERR(r->r.fmr)) {
1037 rc = PTR_ERR(r->r.fmr);
1038 dprintk("RPC: %s: ib_alloc_fmr"
1039 " failed %i\n", __func__, rc);
1040 goto out;
1041 }
1042 list_add(&r->mw_list, &buf->rb_mws);
1043 ++r;
1044 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001045 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001046 default:
1047 break;
1048 }
1049
1050 /*
1051 * Allocate/init the request/reply buffers. Doing this
1052 * using kmalloc for now -- one for each buf.
1053 */
1054 for (i = 0; i < buf->rb_max_requests; i++) {
1055 struct rpcrdma_req *req;
1056 struct rpcrdma_rep *rep;
1057
1058 len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
1059 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1060 /* Typical ~2400b, so rounding up saves work later */
1061 if (len < 4096)
1062 len = 4096;
1063 req = kmalloc(len, GFP_KERNEL);
1064 if (req == NULL) {
1065 dprintk("RPC: %s: request buffer %d alloc"
1066 " failed\n", __func__, i);
1067 rc = -ENOMEM;
1068 goto out;
1069 }
1070 memset(req, 0, sizeof(struct rpcrdma_req));
1071 buf->rb_send_bufs[i] = req;
1072 buf->rb_send_bufs[i]->rl_buffer = buf;
1073
1074 rc = rpcrdma_register_internal(ia, req->rl_base,
1075 len - offsetof(struct rpcrdma_req, rl_base),
1076 &buf->rb_send_bufs[i]->rl_handle,
1077 &buf->rb_send_bufs[i]->rl_iov);
1078 if (rc)
1079 goto out;
1080
1081 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
1082
1083 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
1084 rep = kmalloc(len, GFP_KERNEL);
1085 if (rep == NULL) {
1086 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1087 __func__, i);
1088 rc = -ENOMEM;
1089 goto out;
1090 }
1091 memset(rep, 0, sizeof(struct rpcrdma_rep));
1092 buf->rb_recv_bufs[i] = rep;
1093 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001094
1095 rc = rpcrdma_register_internal(ia, rep->rr_base,
1096 len - offsetof(struct rpcrdma_rep, rr_base),
1097 &buf->rb_recv_bufs[i]->rr_handle,
1098 &buf->rb_recv_bufs[i]->rr_iov);
1099 if (rc)
1100 goto out;
1101
1102 }
1103 dprintk("RPC: %s: max_requests %d\n",
1104 __func__, buf->rb_max_requests);
1105 /* done */
1106 return 0;
1107out:
1108 rpcrdma_buffer_destroy(buf);
1109 return rc;
1110}
1111
1112/*
1113 * Unregister and destroy buffer memory. Need to deal with
1114 * partial initialization, so it's callable from failed create.
1115 * Must be called before destroying endpoint, as registrations
1116 * reference it.
1117 */
1118void
1119rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1120{
1121 int rc, i;
1122 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001123 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001124
1125 /* clean up in reverse order from create
1126 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001127 * 2. send mr memory (mr free, then kfree)
1128 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1129 * 4. arrays
1130 */
1131 dprintk("RPC: %s: entering\n", __func__);
1132
1133 for (i = 0; i < buf->rb_max_requests; i++) {
1134 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1135 rpcrdma_deregister_internal(ia,
1136 buf->rb_recv_bufs[i]->rr_handle,
1137 &buf->rb_recv_bufs[i]->rr_iov);
1138 kfree(buf->rb_recv_bufs[i]);
1139 }
1140 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001141 rpcrdma_deregister_internal(ia,
1142 buf->rb_send_bufs[i]->rl_handle,
1143 &buf->rb_send_bufs[i]->rl_iov);
1144 kfree(buf->rb_send_bufs[i]);
1145 }
1146 }
1147
Allen Andrews4034ba02014-05-28 10:32:09 -04001148 while (!list_empty(&buf->rb_mws)) {
1149 r = list_entry(buf->rb_mws.next,
1150 struct rpcrdma_mw, mw_list);
1151 list_del(&r->mw_list);
1152 switch (ia->ri_memreg_strategy) {
1153 case RPCRDMA_FRMR:
1154 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1155 if (rc)
1156 dprintk("RPC: %s:"
1157 " ib_dereg_mr"
1158 " failed %i\n",
1159 __func__, rc);
1160 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1161 break;
1162 case RPCRDMA_MTHCAFMR:
1163 rc = ib_dealloc_fmr(r->r.fmr);
1164 if (rc)
1165 dprintk("RPC: %s:"
1166 " ib_dealloc_fmr"
1167 " failed %i\n",
1168 __func__, rc);
1169 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001170 default:
1171 break;
1172 }
1173 }
1174
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001175 kfree(buf->rb_pool);
1176}
1177
1178/*
1179 * Get a set of request/reply buffers.
1180 *
1181 * Reply buffer (if needed) is attached to send buffer upon return.
1182 * Rule:
1183 * rb_send_index and rb_recv_index MUST always be pointing to the
1184 * *next* available buffer (non-NULL). They are incremented after
1185 * removing buffers, and decremented *before* returning them.
1186 */
1187struct rpcrdma_req *
1188rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1189{
1190 struct rpcrdma_req *req;
1191 unsigned long flags;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001192 int i;
1193 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001194
1195 spin_lock_irqsave(&buffers->rb_lock, flags);
1196 if (buffers->rb_send_index == buffers->rb_max_requests) {
1197 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1198 dprintk("RPC: %s: out of request buffers\n", __func__);
1199 return ((struct rpcrdma_req *)NULL);
1200 }
1201
1202 req = buffers->rb_send_bufs[buffers->rb_send_index];
1203 if (buffers->rb_send_index < buffers->rb_recv_index) {
1204 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1205 __func__,
1206 buffers->rb_recv_index - buffers->rb_send_index);
1207 req->rl_reply = NULL;
1208 } else {
1209 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1210 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1211 }
1212 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1213 if (!list_empty(&buffers->rb_mws)) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001214 i = RPCRDMA_MAX_SEGS - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001215 do {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001216 r = list_entry(buffers->rb_mws.next,
1217 struct rpcrdma_mw, mw_list);
1218 list_del(&r->mw_list);
1219 req->rl_segments[i].mr_chunk.rl_mw = r;
1220 } while (--i >= 0);
1221 }
1222 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1223 return req;
1224}
1225
1226/*
1227 * Put request/reply buffers back into pool.
1228 * Pre-decrement counter/array index.
1229 */
1230void
1231rpcrdma_buffer_put(struct rpcrdma_req *req)
1232{
1233 struct rpcrdma_buffer *buffers = req->rl_buffer;
1234 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1235 int i;
1236 unsigned long flags;
1237
1238 BUG_ON(req->rl_nchunks != 0);
1239 spin_lock_irqsave(&buffers->rb_lock, flags);
1240 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1241 req->rl_niovs = 0;
1242 if (req->rl_reply) {
1243 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001244 req->rl_reply->rr_func = NULL;
1245 req->rl_reply = NULL;
1246 }
1247 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001248 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001249 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001250 /*
1251 * Cycle mw's back in reverse order, and "spin" them.
1252 * This delays and scrambles reuse as much as possible.
1253 */
1254 i = 1;
1255 do {
1256 struct rpcrdma_mw **mw;
1257 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1258 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1259 *mw = NULL;
1260 } while (++i < RPCRDMA_MAX_SEGS);
1261 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1262 &buffers->rb_mws);
1263 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1264 break;
1265 default:
1266 break;
1267 }
1268 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1269}
1270
1271/*
1272 * Recover reply buffers from pool.
1273 * This happens when recovering from error conditions.
1274 * Post-increment counter/array index.
1275 */
1276void
1277rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1278{
1279 struct rpcrdma_buffer *buffers = req->rl_buffer;
1280 unsigned long flags;
1281
1282 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1283 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1284 spin_lock_irqsave(&buffers->rb_lock, flags);
1285 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1286 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1287 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1288 }
1289 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1290}
1291
1292/*
1293 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001294 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001295 */
1296void
1297rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1298{
1299 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1300 unsigned long flags;
1301
1302 rep->rr_func = NULL;
1303 spin_lock_irqsave(&buffers->rb_lock, flags);
1304 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1305 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1306}
1307
1308/*
1309 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1310 */
1311
1312int
1313rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1314 struct ib_mr **mrp, struct ib_sge *iov)
1315{
1316 struct ib_phys_buf ipb;
1317 struct ib_mr *mr;
1318 int rc;
1319
1320 /*
1321 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1322 */
1323 iov->addr = ib_dma_map_single(ia->ri_id->device,
1324 va, len, DMA_BIDIRECTIONAL);
1325 iov->length = len;
1326
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001327 if (ia->ri_have_dma_lkey) {
1328 *mrp = NULL;
1329 iov->lkey = ia->ri_dma_lkey;
1330 return 0;
1331 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001332 *mrp = NULL;
1333 iov->lkey = ia->ri_bind_mem->lkey;
1334 return 0;
1335 }
1336
1337 ipb.addr = iov->addr;
1338 ipb.size = iov->length;
1339 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1340 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1341
1342 dprintk("RPC: %s: phys convert: 0x%llx "
1343 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001344 __func__, (unsigned long long)ipb.addr,
1345 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001346
1347 if (IS_ERR(mr)) {
1348 *mrp = NULL;
1349 rc = PTR_ERR(mr);
1350 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1351 } else {
1352 *mrp = mr;
1353 iov->lkey = mr->lkey;
1354 rc = 0;
1355 }
1356
1357 return rc;
1358}
1359
1360int
1361rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1362 struct ib_mr *mr, struct ib_sge *iov)
1363{
1364 int rc;
1365
1366 ib_dma_unmap_single(ia->ri_id->device,
1367 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1368
1369 if (NULL == mr)
1370 return 0;
1371
1372 rc = ib_dereg_mr(mr);
1373 if (rc)
1374 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1375 return rc;
1376}
1377
1378/*
1379 * Wrappers for chunk registration, shared by read/write chunk code.
1380 */
1381
1382static void
1383rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1384{
1385 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1386 seg->mr_dmalen = seg->mr_len;
1387 if (seg->mr_page)
1388 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1389 seg->mr_page, offset_in_page(seg->mr_offset),
1390 seg->mr_dmalen, seg->mr_dir);
1391 else
1392 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1393 seg->mr_offset,
1394 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001395 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1396 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1397 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001398 (unsigned long long)seg->mr_dma,
1399 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001400 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001401}
1402
1403static void
1404rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1405{
1406 if (seg->mr_page)
1407 ib_dma_unmap_page(ia->ri_id->device,
1408 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1409 else
1410 ib_dma_unmap_single(ia->ri_id->device,
1411 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1412}
1413
Tom Talpey8d4ba032008-10-09 14:59:49 -04001414static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001415rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1416 int *nsegs, int writing, struct rpcrdma_ia *ia,
1417 struct rpcrdma_xprt *r_xprt)
1418{
1419 struct rpcrdma_mr_seg *seg1 = seg;
Tom Tucker5c635e02011-02-09 19:45:34 +00001420 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1421
Tom Talpey3197d3092008-10-09 15:00:20 -04001422 u8 key;
1423 int len, pageoff;
1424 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001425 int seg_len;
1426 u64 pa;
1427 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001428
1429 pageoff = offset_in_page(seg1->mr_offset);
1430 seg1->mr_offset -= pageoff; /* start of page */
1431 seg1->mr_len += pageoff;
1432 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001433 if (*nsegs > ia->ri_max_frmr_depth)
1434 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001435 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001436 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001437 pa = seg->mr_dma;
1438 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1439 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1440 page_list[page_no++] = pa;
1441 pa += PAGE_SIZE;
1442 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001443 len += seg->mr_len;
1444 ++seg;
1445 ++i;
1446 /* Check for holes */
1447 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1448 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1449 break;
1450 }
1451 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1452 __func__, seg1->mr_chunk.rl_mw, i);
1453
Tom Tucker5c635e02011-02-09 19:45:34 +00001454 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1455 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1456 __func__,
1457 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1458 /* Invalidate before using. */
1459 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1460 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1461 invalidate_wr.next = &frmr_wr;
1462 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1463 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1464 invalidate_wr.ex.invalidate_rkey =
1465 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1466 DECR_CQCOUNT(&r_xprt->rx_ep);
1467 post_wr = &invalidate_wr;
1468 } else
1469 post_wr = &frmr_wr;
1470
Tom Talpey3197d3092008-10-09 15:00:20 -04001471 /* Bump the key */
1472 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1473 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1474
1475 /* Prepare FRMR WR */
1476 memset(&frmr_wr, 0, sizeof frmr_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001477 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001478 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Tom Tucker5c635e02011-02-09 19:45:34 +00001479 frmr_wr.send_flags = IB_SEND_SIGNALED;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001480 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Tom Talpey3197d3092008-10-09 15:00:20 -04001481 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001482 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001483 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001484 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Tom Tucker5c635e02011-02-09 19:45:34 +00001485 BUG_ON(frmr_wr.wr.fast_reg.length < len);
Tom Talpey3197d3092008-10-09 15:00:20 -04001486 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001487 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1488 IB_ACCESS_REMOTE_READ);
Tom Talpey3197d3092008-10-09 15:00:20 -04001489 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1490 DECR_CQCOUNT(&r_xprt->rx_ep);
1491
Tom Tucker5c635e02011-02-09 19:45:34 +00001492 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001493
1494 if (rc) {
1495 dprintk("RPC: %s: failed ib_post_send for register,"
1496 " status %i\n", __func__, rc);
1497 while (i--)
1498 rpcrdma_unmap_one(ia, --seg);
1499 } else {
1500 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1501 seg1->mr_base = seg1->mr_dma + pageoff;
1502 seg1->mr_nsegs = i;
1503 seg1->mr_len = len;
1504 }
1505 *nsegs = i;
1506 return rc;
1507}
1508
1509static int
1510rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1511 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1512{
1513 struct rpcrdma_mr_seg *seg1 = seg;
1514 struct ib_send_wr invalidate_wr, *bad_wr;
1515 int rc;
1516
1517 while (seg1->mr_nsegs--)
1518 rpcrdma_unmap_one(ia, seg++);
1519
1520 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001521 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001522 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001523 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001524 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1525 DECR_CQCOUNT(&r_xprt->rx_ep);
1526
1527 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1528 if (rc)
1529 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1530 " status %i\n", __func__, rc);
1531 return rc;
1532}
1533
1534static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001535rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1536 int *nsegs, int writing, struct rpcrdma_ia *ia)
1537{
1538 struct rpcrdma_mr_seg *seg1 = seg;
1539 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1540 int len, pageoff, i, rc;
1541
1542 pageoff = offset_in_page(seg1->mr_offset);
1543 seg1->mr_offset -= pageoff; /* start of page */
1544 seg1->mr_len += pageoff;
1545 len = -pageoff;
1546 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1547 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1548 for (i = 0; i < *nsegs;) {
1549 rpcrdma_map_one(ia, seg, writing);
1550 physaddrs[i] = seg->mr_dma;
1551 len += seg->mr_len;
1552 ++seg;
1553 ++i;
1554 /* Check for holes */
1555 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1556 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1557 break;
1558 }
1559 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1560 physaddrs, i, seg1->mr_dma);
1561 if (rc) {
1562 dprintk("RPC: %s: failed ib_map_phys_fmr "
1563 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1564 len, (unsigned long long)seg1->mr_dma,
1565 pageoff, i, rc);
1566 while (i--)
1567 rpcrdma_unmap_one(ia, --seg);
1568 } else {
1569 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1570 seg1->mr_base = seg1->mr_dma + pageoff;
1571 seg1->mr_nsegs = i;
1572 seg1->mr_len = len;
1573 }
1574 *nsegs = i;
1575 return rc;
1576}
1577
1578static int
1579rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1580 struct rpcrdma_ia *ia)
1581{
1582 struct rpcrdma_mr_seg *seg1 = seg;
1583 LIST_HEAD(l);
1584 int rc;
1585
1586 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1587 rc = ib_unmap_fmr(&l);
1588 while (seg1->mr_nsegs--)
1589 rpcrdma_unmap_one(ia, seg++);
1590 if (rc)
1591 dprintk("RPC: %s: failed ib_unmap_fmr,"
1592 " status %i\n", __func__, rc);
1593 return rc;
1594}
1595
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001596int
1597rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1598 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1599{
1600 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001601 int rc = 0;
1602
1603 switch (ia->ri_memreg_strategy) {
1604
1605#if RPCRDMA_PERSISTENT_REGISTRATION
1606 case RPCRDMA_ALLPHYSICAL:
1607 rpcrdma_map_one(ia, seg, writing);
1608 seg->mr_rkey = ia->ri_bind_mem->rkey;
1609 seg->mr_base = seg->mr_dma;
1610 seg->mr_nsegs = 1;
1611 nsegs = 1;
1612 break;
1613#endif
1614
Tom Talpey3197d3092008-10-09 15:00:20 -04001615 /* Registration using frmr registration */
1616 case RPCRDMA_FRMR:
1617 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1618 break;
1619
Tom Talpey8d4ba032008-10-09 14:59:49 -04001620 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001621 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001622 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001623 break;
1624
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001625 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001626 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001627 }
1628 if (rc)
1629 return -1;
1630
1631 return nsegs;
1632}
1633
1634int
1635rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1636 struct rpcrdma_xprt *r_xprt, void *r)
1637{
1638 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001639 int nsegs = seg->mr_nsegs, rc;
1640
1641 switch (ia->ri_memreg_strategy) {
1642
1643#if RPCRDMA_PERSISTENT_REGISTRATION
1644 case RPCRDMA_ALLPHYSICAL:
1645 BUG_ON(nsegs != 1);
1646 rpcrdma_unmap_one(ia, seg);
1647 rc = 0;
1648 break;
1649#endif
1650
Tom Talpey3197d3092008-10-09 15:00:20 -04001651 case RPCRDMA_FRMR:
1652 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1653 break;
1654
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001655 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001656 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001657 break;
1658
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001659 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001660 break;
1661 }
1662 if (r) {
1663 struct rpcrdma_rep *rep = r;
1664 void (*func)(struct rpcrdma_rep *) = rep->rr_func;
1665 rep->rr_func = NULL;
1666 func(rep); /* dereg done, callback now */
1667 }
1668 return nsegs;
1669}
1670
1671/*
1672 * Prepost any receive buffer, then post send.
1673 *
1674 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1675 */
1676int
1677rpcrdma_ep_post(struct rpcrdma_ia *ia,
1678 struct rpcrdma_ep *ep,
1679 struct rpcrdma_req *req)
1680{
1681 struct ib_send_wr send_wr, *send_wr_fail;
1682 struct rpcrdma_rep *rep = req->rl_reply;
1683 int rc;
1684
1685 if (rep) {
1686 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1687 if (rc)
1688 goto out;
1689 req->rl_reply = NULL;
1690 }
1691
1692 send_wr.next = NULL;
1693 send_wr.wr_id = 0ULL; /* no send cookie */
1694 send_wr.sg_list = req->rl_send_iov;
1695 send_wr.num_sge = req->rl_niovs;
1696 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001697 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1698 ib_dma_sync_single_for_device(ia->ri_id->device,
1699 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1700 DMA_TO_DEVICE);
1701 ib_dma_sync_single_for_device(ia->ri_id->device,
1702 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1703 DMA_TO_DEVICE);
1704 ib_dma_sync_single_for_device(ia->ri_id->device,
1705 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1706 DMA_TO_DEVICE);
1707
1708 if (DECR_CQCOUNT(ep) > 0)
1709 send_wr.send_flags = 0;
1710 else { /* Provider must take a send completion every now and then */
1711 INIT_CQCOUNT(ep);
1712 send_wr.send_flags = IB_SEND_SIGNALED;
1713 }
1714
1715 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1716 if (rc)
1717 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1718 rc);
1719out:
1720 return rc;
1721}
1722
1723/*
1724 * (Re)post a receive buffer.
1725 */
1726int
1727rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1728 struct rpcrdma_ep *ep,
1729 struct rpcrdma_rep *rep)
1730{
1731 struct ib_recv_wr recv_wr, *recv_wr_fail;
1732 int rc;
1733
1734 recv_wr.next = NULL;
1735 recv_wr.wr_id = (u64) (unsigned long) rep;
1736 recv_wr.sg_list = &rep->rr_iov;
1737 recv_wr.num_sge = 1;
1738
1739 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1740 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1741
1742 DECR_CQCOUNT(ep);
1743 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1744
1745 if (rc)
1746 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1747 rc);
1748 return rc;
1749}