blob: 304c7adc83c48b11532a7c8e76c8f14afb7c9e65 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040051#include <linux/pci.h> /* for Tavor hack below */
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
145static inline
146void rpcrdma_event_process(struct ib_wc *wc)
147{
Tom Tucker5c635e02011-02-09 19:45:34 +0000148 struct rpcrdma_mw *frmr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149 struct rpcrdma_rep *rep =
150 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
151
152 dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
153 __func__, rep, wc->status, wc->opcode, wc->byte_len);
154
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400155 if (!rep) /* send completion that we don't care about */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 return;
157
158 if (IB_WC_SUCCESS != wc->status) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000159 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
160 __func__, wc->opcode, wc->status);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400161 rep->rr_len = ~0U;
Tom Tucker5c635e02011-02-09 19:45:34 +0000162 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
163 rpcrdma_schedule_tasklet(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400164 return;
165 }
166
167 switch (wc->opcode) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000168 case IB_WC_FAST_REG_MR:
169 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
170 frmr->r.frmr.state = FRMR_IS_VALID;
171 break;
172 case IB_WC_LOCAL_INV:
173 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
174 frmr->r.frmr.state = FRMR_IS_INVALID;
175 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400176 case IB_WC_RECV:
177 rep->rr_len = wc->byte_len;
178 ib_dma_sync_single_for_cpu(
179 rdmab_to_ia(rep->rr_buffer)->ri_id->device,
180 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
181 /* Keep (only) the most recent credits, after check validity */
182 if (rep->rr_len >= 16) {
183 struct rpcrdma_msg *p =
184 (struct rpcrdma_msg *) rep->rr_base;
185 unsigned int credits = ntohl(p->rm_credit);
186 if (credits == 0) {
187 dprintk("RPC: %s: server"
188 " dropped credits to 0!\n", __func__);
189 /* don't deadlock */
190 credits = 1;
191 } else if (credits > rep->rr_buffer->rb_max_requests) {
192 dprintk("RPC: %s: server"
193 " over-crediting: %d (%d)\n",
194 __func__, credits,
195 rep->rr_buffer->rb_max_requests);
196 credits = rep->rr_buffer->rb_max_requests;
197 }
198 atomic_set(&rep->rr_buffer->rb_credits, credits);
199 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400200 rpcrdma_schedule_tasklet(rep);
201 break;
202 default:
203 dprintk("RPC: %s: unexpected WC event %X\n",
204 __func__, wc->opcode);
205 break;
206 }
207}
208
209static inline int
210rpcrdma_cq_poll(struct ib_cq *cq)
211{
212 struct ib_wc wc;
213 int rc;
214
215 for (;;) {
216 rc = ib_poll_cq(cq, 1, &wc);
217 if (rc < 0) {
218 dprintk("RPC: %s: ib_poll_cq failed %i\n",
219 __func__, rc);
220 return rc;
221 }
222 if (rc == 0)
223 break;
224
225 rpcrdma_event_process(&wc);
226 }
227
228 return 0;
229}
230
231/*
232 * rpcrdma_cq_event_upcall
233 *
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400234 * This upcall handles recv and send events.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400235 * It is reentrant but processes single events in order to maintain
236 * ordering of receives to keep server credits.
237 *
238 * It is the responsibility of the scheduled tasklet to return
239 * recv buffers to the pool. NOTE: this affects synchronization of
240 * connection shutdown. That is, the structures required for
241 * the completion of the reply handler must remain intact until
242 * all memory has been reclaimed.
243 *
244 * Note that send events are suppressed and do not result in an upcall.
245 */
246static void
247rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
248{
249 int rc;
250
251 rc = rpcrdma_cq_poll(cq);
252 if (rc)
253 return;
254
255 rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (rc) {
257 dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
258 __func__, rc);
259 return;
260 }
261
262 rpcrdma_cq_poll(cq);
263}
264
265#ifdef RPC_DEBUG
266static const char * const conn[] = {
267 "address resolved",
268 "address error",
269 "route resolved",
270 "route error",
271 "connect request",
272 "connect response",
273 "connect error",
274 "unreachable",
275 "rejected",
276 "established",
277 "disconnected",
278 "device removal"
279};
280#endif
281
282static int
283rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284{
285 struct rpcrdma_xprt *xprt = id->context;
286 struct rpcrdma_ia *ia = &xprt->rx_ia;
287 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800288#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400289 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800290#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400291 struct ib_qp_attr attr;
292 struct ib_qp_init_attr iattr;
293 int connstate = 0;
294
295 switch (event->event) {
296 case RDMA_CM_EVENT_ADDR_RESOLVED:
297 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400298 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400299 complete(&ia->ri_done);
300 break;
301 case RDMA_CM_EVENT_ADDR_ERROR:
302 ia->ri_async_rc = -EHOSTUNREACH;
303 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
304 __func__, ep);
305 complete(&ia->ri_done);
306 break;
307 case RDMA_CM_EVENT_ROUTE_ERROR:
308 ia->ri_async_rc = -ENETUNREACH;
309 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
310 __func__, ep);
311 complete(&ia->ri_done);
312 break;
313 case RDMA_CM_EVENT_ESTABLISHED:
314 connstate = 1;
315 ib_query_qp(ia->ri_id->qp, &attr,
316 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
317 &iattr);
318 dprintk("RPC: %s: %d responder resources"
319 " (%d initiator)\n",
320 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
321 goto connected;
322 case RDMA_CM_EVENT_CONNECT_ERROR:
323 connstate = -ENOTCONN;
324 goto connected;
325 case RDMA_CM_EVENT_UNREACHABLE:
326 connstate = -ENETDOWN;
327 goto connected;
328 case RDMA_CM_EVENT_REJECTED:
329 connstate = -ECONNREFUSED;
330 goto connected;
331 case RDMA_CM_EVENT_DISCONNECTED:
332 connstate = -ECONNABORTED;
333 goto connected;
334 case RDMA_CM_EVENT_DEVICE_REMOVAL:
335 connstate = -ENODEV;
336connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700337 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400338 __func__,
339 (event->event <= 11) ? conn[event->event] :
340 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700341 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400342 ntohs(addr->sin_port),
343 ep, event->event);
344 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
345 dprintk("RPC: %s: %sconnected\n",
346 __func__, connstate > 0 ? "" : "dis");
347 ep->rep_connected = connstate;
348 ep->rep_func(ep);
349 wake_up_all(&ep->rep_connect_wait);
350 break;
351 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400352 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 break;
355 }
356
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400357#ifdef RPC_DEBUG
358 if (connstate == 1) {
359 int ird = attr.max_dest_rd_atomic;
360 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700361 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400362 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700363 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400364 ntohs(addr->sin_port),
365 ia->ri_id->device->name,
366 ia->ri_memreg_strategy,
367 xprt->rx_buf.rb_max_requests,
368 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
369 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700370 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
371 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400372 ntohs(addr->sin_port),
373 connstate);
374 }
375#endif
376
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377 return 0;
378}
379
380static struct rdma_cm_id *
381rpcrdma_create_id(struct rpcrdma_xprt *xprt,
382 struct rpcrdma_ia *ia, struct sockaddr *addr)
383{
384 struct rdma_cm_id *id;
385 int rc;
386
Tom Talpey1a954052008-10-09 15:01:31 -0400387 init_completion(&ia->ri_done);
388
Sean Heftyb26f9b92010-04-01 17:08:41 +0000389 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390 if (IS_ERR(id)) {
391 rc = PTR_ERR(id);
392 dprintk("RPC: %s: rdma_create_id() failed %i\n",
393 __func__, rc);
394 return id;
395 }
396
Tom Talpey5675add2008-10-09 15:01:41 -0400397 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400398 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
399 if (rc) {
400 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
401 __func__, rc);
402 goto out;
403 }
Tom Talpey5675add2008-10-09 15:01:41 -0400404 wait_for_completion_interruptible_timeout(&ia->ri_done,
405 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400406 rc = ia->ri_async_rc;
407 if (rc)
408 goto out;
409
Tom Talpey5675add2008-10-09 15:01:41 -0400410 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
412 if (rc) {
413 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
414 __func__, rc);
415 goto out;
416 }
Tom Talpey5675add2008-10-09 15:01:41 -0400417 wait_for_completion_interruptible_timeout(&ia->ri_done,
418 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400419 rc = ia->ri_async_rc;
420 if (rc)
421 goto out;
422
423 return id;
424
425out:
426 rdma_destroy_id(id);
427 return ERR_PTR(rc);
428}
429
430/*
431 * Drain any cq, prior to teardown.
432 */
433static void
434rpcrdma_clean_cq(struct ib_cq *cq)
435{
436 struct ib_wc wc;
437 int count = 0;
438
439 while (1 == ib_poll_cq(cq, 1, &wc))
440 ++count;
441
442 if (count)
443 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
444 __func__, count, wc.opcode);
445}
446
447/*
448 * Exported functions.
449 */
450
451/*
452 * Open and initialize an Interface Adapter.
453 * o initializes fields of struct rpcrdma_ia, including
454 * interface and provider attributes and protection zone.
455 */
456int
457rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
458{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400459 int rc, mem_priv;
460 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400461 struct rpcrdma_ia *ia = &xprt->rx_ia;
462
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400463 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
464 if (IS_ERR(ia->ri_id)) {
465 rc = PTR_ERR(ia->ri_id);
466 goto out1;
467 }
468
469 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
470 if (IS_ERR(ia->ri_pd)) {
471 rc = PTR_ERR(ia->ri_pd);
472 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
473 __func__, rc);
474 goto out2;
475 }
476
477 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400478 * Query the device to determine if the requested memory
479 * registration strategy is supported. If it isn't, set the
480 * strategy to a globally supported model.
481 */
482 rc = ib_query_device(ia->ri_id->device, &devattr);
483 if (rc) {
484 dprintk("RPC: %s: ib_query_device failed %d\n",
485 __func__, rc);
486 goto out2;
487 }
488
489 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
490 ia->ri_have_dma_lkey = 1;
491 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
492 }
493
494 switch (memreg) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400495 case RPCRDMA_MTHCAFMR:
496 if (!ia->ri_id->device->alloc_fmr) {
497#if RPCRDMA_PERSISTENT_REGISTRATION
498 dprintk("RPC: %s: MTHCAFMR registration "
499 "specified but not supported by adapter, "
500 "using riskier RPCRDMA_ALLPHYSICAL\n",
501 __func__);
502 memreg = RPCRDMA_ALLPHYSICAL;
503#else
504 dprintk("RPC: %s: MTHCAFMR registration "
505 "specified but not supported by adapter, "
506 "using slower RPCRDMA_REGISTER\n",
507 __func__);
508 memreg = RPCRDMA_REGISTER;
509#endif
510 }
511 break;
Tom Talpey3197d3092008-10-09 15:00:20 -0400512 case RPCRDMA_FRMR:
513 /* Requires both frmr reg and local dma lkey */
514 if ((devattr.device_cap_flags &
515 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
516 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
517#if RPCRDMA_PERSISTENT_REGISTRATION
518 dprintk("RPC: %s: FRMR registration "
519 "specified but not supported by adapter, "
520 "using riskier RPCRDMA_ALLPHYSICAL\n",
521 __func__);
522 memreg = RPCRDMA_ALLPHYSICAL;
523#else
524 dprintk("RPC: %s: FRMR registration "
525 "specified but not supported by adapter, "
526 "using slower RPCRDMA_REGISTER\n",
527 __func__);
528 memreg = RPCRDMA_REGISTER;
529#endif
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400530 } else {
531 /* Mind the ia limit on FRMR page list depth */
532 ia->ri_max_frmr_depth = min_t(unsigned int,
533 RPCRDMA_MAX_DATA_SEGS,
534 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400535 }
536 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400537 }
538
539 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400540 * Optionally obtain an underlying physical identity mapping in
541 * order to do a memory window-based bind. This base registration
542 * is protected from remote access - that is enabled only by binding
543 * for the specific bytes targeted during each RPC operation, and
544 * revoked after the corresponding completion similar to a storage
545 * adapter.
546 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400547 switch (memreg) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400548 case RPCRDMA_REGISTER:
Tom Talpey3197d3092008-10-09 15:00:20 -0400549 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400550 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400551#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400552 case RPCRDMA_ALLPHYSICAL:
553 mem_priv = IB_ACCESS_LOCAL_WRITE |
554 IB_ACCESS_REMOTE_WRITE |
555 IB_ACCESS_REMOTE_READ;
556 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400557#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400558 case RPCRDMA_MTHCAFMR:
559 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400560 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400561 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400562#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400563 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400564#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400565 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
566 if (IS_ERR(ia->ri_bind_mem)) {
567 printk(KERN_ALERT "%s: ib_get_dma_mr for "
568 "phys register failed with %lX\n\t"
569 "Will continue with degraded performance\n",
570 __func__, PTR_ERR(ia->ri_bind_mem));
571 memreg = RPCRDMA_REGISTER;
572 ia->ri_bind_mem = NULL;
573 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400574 break;
575 default:
576 printk(KERN_ERR "%s: invalid memory registration mode %d\n",
577 __func__, memreg);
578 rc = -EINVAL;
579 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400580 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400581 dprintk("RPC: %s: memory registration strategy is %d\n",
582 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400583
584 /* Else will do memory reg/dereg for each chunk */
585 ia->ri_memreg_strategy = memreg;
586
587 return 0;
588out2:
589 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400590 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400591out1:
592 return rc;
593}
594
595/*
596 * Clean up/close an IA.
597 * o if event handles and PD have been initialized, free them.
598 * o close the IA
599 */
600void
601rpcrdma_ia_close(struct rpcrdma_ia *ia)
602{
603 int rc;
604
605 dprintk("RPC: %s: entering\n", __func__);
606 if (ia->ri_bind_mem != NULL) {
607 rc = ib_dereg_mr(ia->ri_bind_mem);
608 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
609 __func__, rc);
610 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400611 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
612 if (ia->ri_id->qp)
613 rdma_destroy_qp(ia->ri_id);
614 rdma_destroy_id(ia->ri_id);
615 ia->ri_id = NULL;
616 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400617 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
618 rc = ib_dealloc_pd(ia->ri_pd);
619 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
620 __func__, rc);
621 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400622}
623
624/*
625 * Create unconnected endpoint.
626 */
627int
628rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
629 struct rpcrdma_create_data_internal *cdata)
630{
631 struct ib_device_attr devattr;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400632 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400633
634 rc = ib_query_device(ia->ri_id->device, &devattr);
635 if (rc) {
636 dprintk("RPC: %s: ib_query_device failed %d\n",
637 __func__, rc);
638 return rc;
639 }
640
641 /* check provider's send/recv wr limits */
642 if (cdata->max_requests > devattr.max_qp_wr)
643 cdata->max_requests = devattr.max_qp_wr;
644
645 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
646 ep->rep_attr.qp_context = ep;
647 /* send_cq and recv_cq initialized below */
648 ep->rep_attr.srq = NULL;
649 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
650 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400651 case RPCRDMA_FRMR: {
652 int depth = 7;
653
Tom Tucker15cdc6442010-08-11 12:47:24 -0400654 /* Add room for frmr register and invalidate WRs.
655 * 1. FRMR reg WR for head
656 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400657 * 3. N FRMR reg WRs for pagelist
658 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400659 * 5. FRMR reg WR for tail
660 * 6. FRMR invalidate WR for tail
661 * 7. The RDMA_SEND WR
662 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400663
664 /* Calculate N if the device max FRMR depth is smaller than
665 * RPCRDMA_MAX_DATA_SEGS.
666 */
667 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
668 int delta = RPCRDMA_MAX_DATA_SEGS -
669 ia->ri_max_frmr_depth;
670
671 do {
672 depth += 2; /* FRMR reg + invalidate */
673 delta -= ia->ri_max_frmr_depth;
674 } while (delta > 0);
675
676 }
677 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400678 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400679 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400680 if (!cdata->max_requests)
681 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400682 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
683 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400684 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400685 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400686 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400687 default:
688 break;
689 }
690 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
691 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
692 ep->rep_attr.cap.max_recv_sge = 1;
693 ep->rep_attr.cap.max_inline_data = 0;
694 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
695 ep->rep_attr.qp_type = IB_QPT_RC;
696 ep->rep_attr.port_num = ~0;
697
698 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
699 "iovs: send %d recv %d\n",
700 __func__,
701 ep->rep_attr.cap.max_send_wr,
702 ep->rep_attr.cap.max_recv_wr,
703 ep->rep_attr.cap.max_send_sge,
704 ep->rep_attr.cap.max_recv_sge);
705
706 /* set trigger for requesting send completion */
707 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400708 if (ep->rep_cqinit <= 2)
709 ep->rep_cqinit = 0;
710 INIT_CQCOUNT(ep);
711 ep->rep_ia = ia;
712 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400713 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400714
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400715 ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
716 rpcrdma_cq_async_error_upcall, NULL,
717 ep->rep_attr.cap.max_recv_wr +
718 ep->rep_attr.cap.max_send_wr + 1, 0);
719 if (IS_ERR(ep->rep_cq)) {
720 rc = PTR_ERR(ep->rep_cq);
721 dprintk("RPC: %s: ib_create_cq failed: %i\n",
722 __func__, rc);
723 goto out1;
724 }
725
726 rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
727 if (rc) {
728 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
729 __func__, rc);
730 goto out2;
731 }
732
733 ep->rep_attr.send_cq = ep->rep_cq;
734 ep->rep_attr.recv_cq = ep->rep_cq;
735
736 /* Initialize cma parameters */
737
738 /* RPC/RDMA does not use private data */
739 ep->rep_remote_cma.private_data = NULL;
740 ep->rep_remote_cma.private_data_len = 0;
741
742 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400743 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400744 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400745 ep->rep_remote_cma.responder_resources = 32;
746 else
747 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400748
749 ep->rep_remote_cma.retry_count = 7;
750 ep->rep_remote_cma.flow_control = 0;
751 ep->rep_remote_cma.rnr_retry_count = 0;
752
753 return 0;
754
755out2:
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400756 err = ib_destroy_cq(ep->rep_cq);
757 if (err)
758 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
759 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400760out1:
761 return rc;
762}
763
764/*
765 * rpcrdma_ep_destroy
766 *
767 * Disconnect and destroy endpoint. After this, the only
768 * valid operations on the ep are to free it (if dynamically
769 * allocated) or re-create it.
770 *
771 * The caller's error handling must be sure to not leak the endpoint
772 * if this function fails.
773 */
774int
775rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
776{
777 int rc;
778
779 dprintk("RPC: %s: entering, connected is %d\n",
780 __func__, ep->rep_connected);
781
Chuck Lever254f91e2014-05-28 10:32:17 -0400782 cancel_delayed_work_sync(&ep->rep_connect_worker);
783
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400784 if (ia->ri_id->qp) {
785 rc = rpcrdma_ep_disconnect(ep, ia);
786 if (rc)
787 dprintk("RPC: %s: rpcrdma_ep_disconnect"
788 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400789 rdma_destroy_qp(ia->ri_id);
790 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400791 }
792
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400793 /* padding - could be done in rpcrdma_buffer_destroy... */
794 if (ep->rep_pad_mr) {
795 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
796 ep->rep_pad_mr = NULL;
797 }
798
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400799 rpcrdma_clean_cq(ep->rep_cq);
800 rc = ib_destroy_cq(ep->rep_cq);
801 if (rc)
802 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
803 __func__, rc);
804
805 return rc;
806}
807
808/*
809 * Connect unconnected endpoint.
810 */
811int
812rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
813{
814 struct rdma_cm_id *id;
815 int rc = 0;
816 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400817
Tom Talpeyc0555512008-10-10 11:32:45 -0400818 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400819 struct rpcrdma_xprt *xprt;
820retry:
821 rc = rpcrdma_ep_disconnect(ep, ia);
822 if (rc && rc != -ENOTCONN)
823 dprintk("RPC: %s: rpcrdma_ep_disconnect"
824 " status %i\n", __func__, rc);
825 rpcrdma_clean_cq(ep->rep_cq);
826
827 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
828 id = rpcrdma_create_id(xprt, ia,
829 (struct sockaddr *)&xprt->rx_data.addr);
830 if (IS_ERR(id)) {
831 rc = PTR_ERR(id);
832 goto out;
833 }
834 /* TEMP TEMP TEMP - fail if new device:
835 * Deregister/remarshal *all* requests!
836 * Close and recreate adapter, pd, etc!
837 * Re-determine all attributes still sane!
838 * More stuff I haven't thought of!
839 * Rrrgh!
840 */
841 if (ia->ri_id->device != id->device) {
842 printk("RPC: %s: can't reconnect on "
843 "different device!\n", __func__);
844 rdma_destroy_id(id);
845 rc = -ENETDOWN;
846 goto out;
847 }
848 /* END TEMP */
Tom Talpey1a954052008-10-09 15:01:31 -0400849 rdma_destroy_qp(ia->ri_id);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400850 rdma_destroy_id(ia->ri_id);
851 ia->ri_id = id;
852 }
853
854 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
855 if (rc) {
856 dprintk("RPC: %s: rdma_create_qp failed %i\n",
857 __func__, rc);
858 goto out;
859 }
860
861/* XXX Tavor device performs badly with 2K MTU! */
862if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
863 struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
864 if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
865 (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
866 pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
867 struct ib_qp_attr attr = {
868 .path_mtu = IB_MTU_1024
869 };
870 rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
871 }
872}
873
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400874 ep->rep_connected = 0;
875
876 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
877 if (rc) {
878 dprintk("RPC: %s: rdma_connect() failed with %i\n",
879 __func__, rc);
880 goto out;
881 }
882
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
884
885 /*
886 * Check state. A non-peer reject indicates no listener
887 * (ECONNREFUSED), which may be a transient state. All
888 * others indicate a transport condition which has already
889 * undergone a best-effort.
890 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800891 if (ep->rep_connected == -ECONNREFUSED &&
892 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
894 goto retry;
895 }
896 if (ep->rep_connected <= 0) {
897 /* Sometimes, the only way to reliably connect to remote
898 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400899 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
900 (ep->rep_remote_cma.responder_resources == 0 ||
901 ep->rep_remote_cma.initiator_depth !=
902 ep->rep_remote_cma.responder_resources)) {
903 if (ep->rep_remote_cma.responder_resources == 0)
904 ep->rep_remote_cma.responder_resources = 1;
905 ep->rep_remote_cma.initiator_depth =
906 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400907 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400908 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400909 rc = ep->rep_connected;
910 } else {
911 dprintk("RPC: %s: connected\n", __func__);
912 }
913
914out:
915 if (rc)
916 ep->rep_connected = rc;
917 return rc;
918}
919
920/*
921 * rpcrdma_ep_disconnect
922 *
923 * This is separate from destroy to facilitate the ability
924 * to reconnect without recreating the endpoint.
925 *
926 * This call is not reentrant, and must not be made in parallel
927 * on the same endpoint.
928 */
929int
930rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
931{
932 int rc;
933
934 rpcrdma_clean_cq(ep->rep_cq);
935 rc = rdma_disconnect(ia->ri_id);
936 if (!rc) {
937 /* returns without wait if not connected */
938 wait_event_interruptible(ep->rep_connect_wait,
939 ep->rep_connected != 1);
940 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
941 (ep->rep_connected == 1) ? "still " : "dis");
942 } else {
943 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
944 ep->rep_connected = rc;
945 }
946 return rc;
947}
948
949/*
950 * Initialize buffer memory
951 */
952int
953rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
954 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
955{
956 char *p;
957 size_t len;
958 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -0400959 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400960
961 buf->rb_max_requests = cdata->max_requests;
962 spin_lock_init(&buf->rb_lock);
963 atomic_set(&buf->rb_credits, 1);
964
965 /* Need to allocate:
966 * 1. arrays for send and recv pointers
967 * 2. arrays of struct rpcrdma_req to fill in pointers
968 * 3. array of struct rpcrdma_rep for replies
969 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -0400970 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400971 * Send/recv buffers in req/rep need to be registered
972 */
973
974 len = buf->rb_max_requests *
975 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
976 len += cdata->padding;
977 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400978 case RPCRDMA_FRMR:
979 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
980 sizeof(struct rpcrdma_mw);
981 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400982 case RPCRDMA_MTHCAFMR:
983 /* TBD we are perhaps overallocating here */
984 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
985 sizeof(struct rpcrdma_mw);
986 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400987 default:
988 break;
989 }
990
991 /* allocate 1, 4 and 5 in one shot */
992 p = kzalloc(len, GFP_KERNEL);
993 if (p == NULL) {
994 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
995 __func__, len);
996 rc = -ENOMEM;
997 goto out;
998 }
999 buf->rb_pool = p; /* for freeing it later */
1000
1001 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1002 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1003 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1004 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1005
1006 /*
1007 * Register the zeroed pad buffer, if any.
1008 */
1009 if (cdata->padding) {
1010 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1011 &ep->rep_pad_mr, &ep->rep_pad);
1012 if (rc)
1013 goto out;
1014 }
1015 p += cdata->padding;
1016
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001017 INIT_LIST_HEAD(&buf->rb_mws);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001018 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001019 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001020 case RPCRDMA_FRMR:
1021 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1022 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001023 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001024 if (IS_ERR(r->r.frmr.fr_mr)) {
1025 rc = PTR_ERR(r->r.frmr.fr_mr);
1026 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1027 " failed %i\n", __func__, rc);
1028 goto out;
1029 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001030 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1031 ia->ri_id->device,
1032 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001033 if (IS_ERR(r->r.frmr.fr_pgl)) {
1034 rc = PTR_ERR(r->r.frmr.fr_pgl);
1035 dprintk("RPC: %s: "
1036 "ib_alloc_fast_reg_page_list "
1037 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001038
1039 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001040 goto out;
1041 }
1042 list_add(&r->mw_list, &buf->rb_mws);
1043 ++r;
1044 }
1045 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001046 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001047 /* TBD we are perhaps overallocating here */
1048 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001049 static struct ib_fmr_attr fa =
1050 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001051 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1052 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1053 &fa);
1054 if (IS_ERR(r->r.fmr)) {
1055 rc = PTR_ERR(r->r.fmr);
1056 dprintk("RPC: %s: ib_alloc_fmr"
1057 " failed %i\n", __func__, rc);
1058 goto out;
1059 }
1060 list_add(&r->mw_list, &buf->rb_mws);
1061 ++r;
1062 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001063 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001064 default:
1065 break;
1066 }
1067
1068 /*
1069 * Allocate/init the request/reply buffers. Doing this
1070 * using kmalloc for now -- one for each buf.
1071 */
1072 for (i = 0; i < buf->rb_max_requests; i++) {
1073 struct rpcrdma_req *req;
1074 struct rpcrdma_rep *rep;
1075
1076 len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
1077 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1078 /* Typical ~2400b, so rounding up saves work later */
1079 if (len < 4096)
1080 len = 4096;
1081 req = kmalloc(len, GFP_KERNEL);
1082 if (req == NULL) {
1083 dprintk("RPC: %s: request buffer %d alloc"
1084 " failed\n", __func__, i);
1085 rc = -ENOMEM;
1086 goto out;
1087 }
1088 memset(req, 0, sizeof(struct rpcrdma_req));
1089 buf->rb_send_bufs[i] = req;
1090 buf->rb_send_bufs[i]->rl_buffer = buf;
1091
1092 rc = rpcrdma_register_internal(ia, req->rl_base,
1093 len - offsetof(struct rpcrdma_req, rl_base),
1094 &buf->rb_send_bufs[i]->rl_handle,
1095 &buf->rb_send_bufs[i]->rl_iov);
1096 if (rc)
1097 goto out;
1098
1099 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
1100
1101 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
1102 rep = kmalloc(len, GFP_KERNEL);
1103 if (rep == NULL) {
1104 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1105 __func__, i);
1106 rc = -ENOMEM;
1107 goto out;
1108 }
1109 memset(rep, 0, sizeof(struct rpcrdma_rep));
1110 buf->rb_recv_bufs[i] = rep;
1111 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112
1113 rc = rpcrdma_register_internal(ia, rep->rr_base,
1114 len - offsetof(struct rpcrdma_rep, rr_base),
1115 &buf->rb_recv_bufs[i]->rr_handle,
1116 &buf->rb_recv_bufs[i]->rr_iov);
1117 if (rc)
1118 goto out;
1119
1120 }
1121 dprintk("RPC: %s: max_requests %d\n",
1122 __func__, buf->rb_max_requests);
1123 /* done */
1124 return 0;
1125out:
1126 rpcrdma_buffer_destroy(buf);
1127 return rc;
1128}
1129
1130/*
1131 * Unregister and destroy buffer memory. Need to deal with
1132 * partial initialization, so it's callable from failed create.
1133 * Must be called before destroying endpoint, as registrations
1134 * reference it.
1135 */
1136void
1137rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1138{
1139 int rc, i;
1140 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001141 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001142
1143 /* clean up in reverse order from create
1144 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001145 * 2. send mr memory (mr free, then kfree)
1146 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1147 * 4. arrays
1148 */
1149 dprintk("RPC: %s: entering\n", __func__);
1150
1151 for (i = 0; i < buf->rb_max_requests; i++) {
1152 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1153 rpcrdma_deregister_internal(ia,
1154 buf->rb_recv_bufs[i]->rr_handle,
1155 &buf->rb_recv_bufs[i]->rr_iov);
1156 kfree(buf->rb_recv_bufs[i]);
1157 }
1158 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001159 rpcrdma_deregister_internal(ia,
1160 buf->rb_send_bufs[i]->rl_handle,
1161 &buf->rb_send_bufs[i]->rl_iov);
1162 kfree(buf->rb_send_bufs[i]);
1163 }
1164 }
1165
Allen Andrews4034ba02014-05-28 10:32:09 -04001166 while (!list_empty(&buf->rb_mws)) {
1167 r = list_entry(buf->rb_mws.next,
1168 struct rpcrdma_mw, mw_list);
1169 list_del(&r->mw_list);
1170 switch (ia->ri_memreg_strategy) {
1171 case RPCRDMA_FRMR:
1172 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1173 if (rc)
1174 dprintk("RPC: %s:"
1175 " ib_dereg_mr"
1176 " failed %i\n",
1177 __func__, rc);
1178 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1179 break;
1180 case RPCRDMA_MTHCAFMR:
1181 rc = ib_dealloc_fmr(r->r.fmr);
1182 if (rc)
1183 dprintk("RPC: %s:"
1184 " ib_dealloc_fmr"
1185 " failed %i\n",
1186 __func__, rc);
1187 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001188 default:
1189 break;
1190 }
1191 }
1192
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001193 kfree(buf->rb_pool);
1194}
1195
1196/*
1197 * Get a set of request/reply buffers.
1198 *
1199 * Reply buffer (if needed) is attached to send buffer upon return.
1200 * Rule:
1201 * rb_send_index and rb_recv_index MUST always be pointing to the
1202 * *next* available buffer (non-NULL). They are incremented after
1203 * removing buffers, and decremented *before* returning them.
1204 */
1205struct rpcrdma_req *
1206rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1207{
1208 struct rpcrdma_req *req;
1209 unsigned long flags;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001210 int i;
1211 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001212
1213 spin_lock_irqsave(&buffers->rb_lock, flags);
1214 if (buffers->rb_send_index == buffers->rb_max_requests) {
1215 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1216 dprintk("RPC: %s: out of request buffers\n", __func__);
1217 return ((struct rpcrdma_req *)NULL);
1218 }
1219
1220 req = buffers->rb_send_bufs[buffers->rb_send_index];
1221 if (buffers->rb_send_index < buffers->rb_recv_index) {
1222 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1223 __func__,
1224 buffers->rb_recv_index - buffers->rb_send_index);
1225 req->rl_reply = NULL;
1226 } else {
1227 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1228 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1229 }
1230 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1231 if (!list_empty(&buffers->rb_mws)) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001232 i = RPCRDMA_MAX_SEGS - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001233 do {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001234 r = list_entry(buffers->rb_mws.next,
1235 struct rpcrdma_mw, mw_list);
1236 list_del(&r->mw_list);
1237 req->rl_segments[i].mr_chunk.rl_mw = r;
1238 } while (--i >= 0);
1239 }
1240 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1241 return req;
1242}
1243
1244/*
1245 * Put request/reply buffers back into pool.
1246 * Pre-decrement counter/array index.
1247 */
1248void
1249rpcrdma_buffer_put(struct rpcrdma_req *req)
1250{
1251 struct rpcrdma_buffer *buffers = req->rl_buffer;
1252 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1253 int i;
1254 unsigned long flags;
1255
1256 BUG_ON(req->rl_nchunks != 0);
1257 spin_lock_irqsave(&buffers->rb_lock, flags);
1258 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1259 req->rl_niovs = 0;
1260 if (req->rl_reply) {
1261 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001262 req->rl_reply->rr_func = NULL;
1263 req->rl_reply = NULL;
1264 }
1265 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001266 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001267 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001268 /*
1269 * Cycle mw's back in reverse order, and "spin" them.
1270 * This delays and scrambles reuse as much as possible.
1271 */
1272 i = 1;
1273 do {
1274 struct rpcrdma_mw **mw;
1275 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1276 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1277 *mw = NULL;
1278 } while (++i < RPCRDMA_MAX_SEGS);
1279 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1280 &buffers->rb_mws);
1281 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1282 break;
1283 default:
1284 break;
1285 }
1286 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1287}
1288
1289/*
1290 * Recover reply buffers from pool.
1291 * This happens when recovering from error conditions.
1292 * Post-increment counter/array index.
1293 */
1294void
1295rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1296{
1297 struct rpcrdma_buffer *buffers = req->rl_buffer;
1298 unsigned long flags;
1299
1300 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1301 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1302 spin_lock_irqsave(&buffers->rb_lock, flags);
1303 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1304 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1305 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1306 }
1307 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1308}
1309
1310/*
1311 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001312 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001313 */
1314void
1315rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1316{
1317 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1318 unsigned long flags;
1319
1320 rep->rr_func = NULL;
1321 spin_lock_irqsave(&buffers->rb_lock, flags);
1322 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1323 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1324}
1325
1326/*
1327 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1328 */
1329
1330int
1331rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1332 struct ib_mr **mrp, struct ib_sge *iov)
1333{
1334 struct ib_phys_buf ipb;
1335 struct ib_mr *mr;
1336 int rc;
1337
1338 /*
1339 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1340 */
1341 iov->addr = ib_dma_map_single(ia->ri_id->device,
1342 va, len, DMA_BIDIRECTIONAL);
1343 iov->length = len;
1344
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001345 if (ia->ri_have_dma_lkey) {
1346 *mrp = NULL;
1347 iov->lkey = ia->ri_dma_lkey;
1348 return 0;
1349 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001350 *mrp = NULL;
1351 iov->lkey = ia->ri_bind_mem->lkey;
1352 return 0;
1353 }
1354
1355 ipb.addr = iov->addr;
1356 ipb.size = iov->length;
1357 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1358 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1359
1360 dprintk("RPC: %s: phys convert: 0x%llx "
1361 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001362 __func__, (unsigned long long)ipb.addr,
1363 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001364
1365 if (IS_ERR(mr)) {
1366 *mrp = NULL;
1367 rc = PTR_ERR(mr);
1368 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1369 } else {
1370 *mrp = mr;
1371 iov->lkey = mr->lkey;
1372 rc = 0;
1373 }
1374
1375 return rc;
1376}
1377
1378int
1379rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1380 struct ib_mr *mr, struct ib_sge *iov)
1381{
1382 int rc;
1383
1384 ib_dma_unmap_single(ia->ri_id->device,
1385 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1386
1387 if (NULL == mr)
1388 return 0;
1389
1390 rc = ib_dereg_mr(mr);
1391 if (rc)
1392 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1393 return rc;
1394}
1395
1396/*
1397 * Wrappers for chunk registration, shared by read/write chunk code.
1398 */
1399
1400static void
1401rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1402{
1403 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1404 seg->mr_dmalen = seg->mr_len;
1405 if (seg->mr_page)
1406 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1407 seg->mr_page, offset_in_page(seg->mr_offset),
1408 seg->mr_dmalen, seg->mr_dir);
1409 else
1410 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1411 seg->mr_offset,
1412 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001413 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1414 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1415 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001416 (unsigned long long)seg->mr_dma,
1417 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001418 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001419}
1420
1421static void
1422rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1423{
1424 if (seg->mr_page)
1425 ib_dma_unmap_page(ia->ri_id->device,
1426 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1427 else
1428 ib_dma_unmap_single(ia->ri_id->device,
1429 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1430}
1431
Tom Talpey8d4ba032008-10-09 14:59:49 -04001432static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001433rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1434 int *nsegs, int writing, struct rpcrdma_ia *ia,
1435 struct rpcrdma_xprt *r_xprt)
1436{
1437 struct rpcrdma_mr_seg *seg1 = seg;
Tom Tucker5c635e02011-02-09 19:45:34 +00001438 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1439
Tom Talpey3197d3092008-10-09 15:00:20 -04001440 u8 key;
1441 int len, pageoff;
1442 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001443 int seg_len;
1444 u64 pa;
1445 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001446
1447 pageoff = offset_in_page(seg1->mr_offset);
1448 seg1->mr_offset -= pageoff; /* start of page */
1449 seg1->mr_len += pageoff;
1450 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001451 if (*nsegs > ia->ri_max_frmr_depth)
1452 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001453 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001454 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001455 pa = seg->mr_dma;
1456 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1457 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1458 page_list[page_no++] = pa;
1459 pa += PAGE_SIZE;
1460 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001461 len += seg->mr_len;
1462 ++seg;
1463 ++i;
1464 /* Check for holes */
1465 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1466 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1467 break;
1468 }
1469 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1470 __func__, seg1->mr_chunk.rl_mw, i);
1471
Tom Tucker5c635e02011-02-09 19:45:34 +00001472 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1473 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1474 __func__,
1475 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1476 /* Invalidate before using. */
1477 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1478 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1479 invalidate_wr.next = &frmr_wr;
1480 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1481 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1482 invalidate_wr.ex.invalidate_rkey =
1483 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1484 DECR_CQCOUNT(&r_xprt->rx_ep);
1485 post_wr = &invalidate_wr;
1486 } else
1487 post_wr = &frmr_wr;
1488
Tom Talpey3197d3092008-10-09 15:00:20 -04001489 /* Bump the key */
1490 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1491 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1492
1493 /* Prepare FRMR WR */
1494 memset(&frmr_wr, 0, sizeof frmr_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001495 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001496 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Tom Tucker5c635e02011-02-09 19:45:34 +00001497 frmr_wr.send_flags = IB_SEND_SIGNALED;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001498 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Tom Talpey3197d3092008-10-09 15:00:20 -04001499 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001500 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001501 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001502 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Tom Tucker5c635e02011-02-09 19:45:34 +00001503 BUG_ON(frmr_wr.wr.fast_reg.length < len);
Tom Talpey3197d3092008-10-09 15:00:20 -04001504 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001505 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1506 IB_ACCESS_REMOTE_READ);
Tom Talpey3197d3092008-10-09 15:00:20 -04001507 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1508 DECR_CQCOUNT(&r_xprt->rx_ep);
1509
Tom Tucker5c635e02011-02-09 19:45:34 +00001510 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001511
1512 if (rc) {
1513 dprintk("RPC: %s: failed ib_post_send for register,"
1514 " status %i\n", __func__, rc);
1515 while (i--)
1516 rpcrdma_unmap_one(ia, --seg);
1517 } else {
1518 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1519 seg1->mr_base = seg1->mr_dma + pageoff;
1520 seg1->mr_nsegs = i;
1521 seg1->mr_len = len;
1522 }
1523 *nsegs = i;
1524 return rc;
1525}
1526
1527static int
1528rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1529 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1530{
1531 struct rpcrdma_mr_seg *seg1 = seg;
1532 struct ib_send_wr invalidate_wr, *bad_wr;
1533 int rc;
1534
1535 while (seg1->mr_nsegs--)
1536 rpcrdma_unmap_one(ia, seg++);
1537
1538 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001539 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001540 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001541 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001542 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1543 DECR_CQCOUNT(&r_xprt->rx_ep);
1544
1545 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1546 if (rc)
1547 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1548 " status %i\n", __func__, rc);
1549 return rc;
1550}
1551
1552static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001553rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1554 int *nsegs, int writing, struct rpcrdma_ia *ia)
1555{
1556 struct rpcrdma_mr_seg *seg1 = seg;
1557 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1558 int len, pageoff, i, rc;
1559
1560 pageoff = offset_in_page(seg1->mr_offset);
1561 seg1->mr_offset -= pageoff; /* start of page */
1562 seg1->mr_len += pageoff;
1563 len = -pageoff;
1564 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1565 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1566 for (i = 0; i < *nsegs;) {
1567 rpcrdma_map_one(ia, seg, writing);
1568 physaddrs[i] = seg->mr_dma;
1569 len += seg->mr_len;
1570 ++seg;
1571 ++i;
1572 /* Check for holes */
1573 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1574 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1575 break;
1576 }
1577 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1578 physaddrs, i, seg1->mr_dma);
1579 if (rc) {
1580 dprintk("RPC: %s: failed ib_map_phys_fmr "
1581 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1582 len, (unsigned long long)seg1->mr_dma,
1583 pageoff, i, rc);
1584 while (i--)
1585 rpcrdma_unmap_one(ia, --seg);
1586 } else {
1587 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1588 seg1->mr_base = seg1->mr_dma + pageoff;
1589 seg1->mr_nsegs = i;
1590 seg1->mr_len = len;
1591 }
1592 *nsegs = i;
1593 return rc;
1594}
1595
1596static int
1597rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1598 struct rpcrdma_ia *ia)
1599{
1600 struct rpcrdma_mr_seg *seg1 = seg;
1601 LIST_HEAD(l);
1602 int rc;
1603
1604 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1605 rc = ib_unmap_fmr(&l);
1606 while (seg1->mr_nsegs--)
1607 rpcrdma_unmap_one(ia, seg++);
1608 if (rc)
1609 dprintk("RPC: %s: failed ib_unmap_fmr,"
1610 " status %i\n", __func__, rc);
1611 return rc;
1612}
1613
1614static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001615rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
1616 int *nsegs, int writing, struct rpcrdma_ia *ia)
1617{
1618 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1619 IB_ACCESS_REMOTE_READ);
1620 struct rpcrdma_mr_seg *seg1 = seg;
1621 struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
1622 int len, i, rc = 0;
1623
1624 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1625 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1626 for (len = 0, i = 0; i < *nsegs;) {
1627 rpcrdma_map_one(ia, seg, writing);
1628 ipb[i].addr = seg->mr_dma;
1629 ipb[i].size = seg->mr_len;
1630 len += seg->mr_len;
1631 ++seg;
1632 ++i;
1633 /* Check for holes */
1634 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1635 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1636 break;
1637 }
1638 seg1->mr_base = seg1->mr_dma;
1639 seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
1640 ipb, i, mem_priv, &seg1->mr_base);
1641 if (IS_ERR(seg1->mr_chunk.rl_mr)) {
1642 rc = PTR_ERR(seg1->mr_chunk.rl_mr);
1643 dprintk("RPC: %s: failed ib_reg_phys_mr "
1644 "%u@0x%llx (%d)... status %i\n",
1645 __func__, len,
1646 (unsigned long long)seg1->mr_dma, i, rc);
1647 while (i--)
1648 rpcrdma_unmap_one(ia, --seg);
1649 } else {
1650 seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
1651 seg1->mr_nsegs = i;
1652 seg1->mr_len = len;
1653 }
1654 *nsegs = i;
1655 return rc;
1656}
1657
1658static int
1659rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
1660 struct rpcrdma_ia *ia)
1661{
1662 struct rpcrdma_mr_seg *seg1 = seg;
1663 int rc;
1664
1665 rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
1666 seg1->mr_chunk.rl_mr = NULL;
1667 while (seg1->mr_nsegs--)
1668 rpcrdma_unmap_one(ia, seg++);
1669 if (rc)
1670 dprintk("RPC: %s: failed ib_dereg_mr,"
1671 " status %i\n", __func__, rc);
1672 return rc;
1673}
1674
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001675int
1676rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1677 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1678{
1679 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001680 int rc = 0;
1681
1682 switch (ia->ri_memreg_strategy) {
1683
1684#if RPCRDMA_PERSISTENT_REGISTRATION
1685 case RPCRDMA_ALLPHYSICAL:
1686 rpcrdma_map_one(ia, seg, writing);
1687 seg->mr_rkey = ia->ri_bind_mem->rkey;
1688 seg->mr_base = seg->mr_dma;
1689 seg->mr_nsegs = 1;
1690 nsegs = 1;
1691 break;
1692#endif
1693
Tom Talpey3197d3092008-10-09 15:00:20 -04001694 /* Registration using frmr registration */
1695 case RPCRDMA_FRMR:
1696 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1697 break;
1698
Tom Talpey8d4ba032008-10-09 14:59:49 -04001699 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001700 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001701 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001702 break;
1703
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001704 /* Default registration each time */
1705 default:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001706 rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001707 break;
1708 }
1709 if (rc)
1710 return -1;
1711
1712 return nsegs;
1713}
1714
1715int
1716rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1717 struct rpcrdma_xprt *r_xprt, void *r)
1718{
1719 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001720 int nsegs = seg->mr_nsegs, rc;
1721
1722 switch (ia->ri_memreg_strategy) {
1723
1724#if RPCRDMA_PERSISTENT_REGISTRATION
1725 case RPCRDMA_ALLPHYSICAL:
1726 BUG_ON(nsegs != 1);
1727 rpcrdma_unmap_one(ia, seg);
1728 rc = 0;
1729 break;
1730#endif
1731
Tom Talpey3197d3092008-10-09 15:00:20 -04001732 case RPCRDMA_FRMR:
1733 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1734 break;
1735
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001736 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001737 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001738 break;
1739
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001740 default:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001741 rc = rpcrdma_deregister_default_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001742 break;
1743 }
1744 if (r) {
1745 struct rpcrdma_rep *rep = r;
1746 void (*func)(struct rpcrdma_rep *) = rep->rr_func;
1747 rep->rr_func = NULL;
1748 func(rep); /* dereg done, callback now */
1749 }
1750 return nsegs;
1751}
1752
1753/*
1754 * Prepost any receive buffer, then post send.
1755 *
1756 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1757 */
1758int
1759rpcrdma_ep_post(struct rpcrdma_ia *ia,
1760 struct rpcrdma_ep *ep,
1761 struct rpcrdma_req *req)
1762{
1763 struct ib_send_wr send_wr, *send_wr_fail;
1764 struct rpcrdma_rep *rep = req->rl_reply;
1765 int rc;
1766
1767 if (rep) {
1768 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1769 if (rc)
1770 goto out;
1771 req->rl_reply = NULL;
1772 }
1773
1774 send_wr.next = NULL;
1775 send_wr.wr_id = 0ULL; /* no send cookie */
1776 send_wr.sg_list = req->rl_send_iov;
1777 send_wr.num_sge = req->rl_niovs;
1778 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001779 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1780 ib_dma_sync_single_for_device(ia->ri_id->device,
1781 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1782 DMA_TO_DEVICE);
1783 ib_dma_sync_single_for_device(ia->ri_id->device,
1784 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1785 DMA_TO_DEVICE);
1786 ib_dma_sync_single_for_device(ia->ri_id->device,
1787 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1788 DMA_TO_DEVICE);
1789
1790 if (DECR_CQCOUNT(ep) > 0)
1791 send_wr.send_flags = 0;
1792 else { /* Provider must take a send completion every now and then */
1793 INIT_CQCOUNT(ep);
1794 send_wr.send_flags = IB_SEND_SIGNALED;
1795 }
1796
1797 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1798 if (rc)
1799 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1800 rc);
1801out:
1802 return rc;
1803}
1804
1805/*
1806 * (Re)post a receive buffer.
1807 */
1808int
1809rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1810 struct rpcrdma_ep *ep,
1811 struct rpcrdma_rep *rep)
1812{
1813 struct ib_recv_wr recv_wr, *recv_wr_fail;
1814 int rc;
1815
1816 recv_wr.next = NULL;
1817 recv_wr.wr_id = (u64) (unsigned long) rep;
1818 recv_wr.sg_list = &rep->rr_iov;
1819 recv_wr.num_sge = 1;
1820
1821 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1822 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1823
1824 DECR_CQCOUNT(ep);
1825 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1826
1827 if (rc)
1828 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1829 rc);
1830 return rc;
1831}