blob: 08c92355c64c820d4c2dfd8552299705b8563bf7 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
65
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040066/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
108static inline void
109rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
110{
111 unsigned long flags;
112
113 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
114 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
115 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
116 tasklet_schedule(&rpcrdma_tasklet_g);
117}
118
119static void
120rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
121{
122 struct rpcrdma_ep *ep = context;
123
124 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
125 __func__, event->event, event->device->name, context);
126 if (ep->rep_connected == 1) {
127 ep->rep_connected = -EIO;
128 ep->rep_func(ep);
129 wake_up_all(&ep->rep_connect_wait);
130 }
131}
132
133static void
134rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
135{
136 struct rpcrdma_ep *ep = context;
137
138 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
139 __func__, event->event, event->device->name, context);
140 if (ep->rep_connected == 1) {
141 ep->rep_connected = -EIO;
142 ep->rep_func(ep);
143 wake_up_all(&ep->rep_connect_wait);
144 }
145}
146
Chuck Leverfc664482014-05-28 10:33:25 -0400147static void
148rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149{
Chuck Leverfc664482014-05-28 10:33:25 -0400150 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400151
Chuck Leverfc664482014-05-28 10:33:25 -0400152 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
153 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154
Chuck Leverfc664482014-05-28 10:33:25 -0400155 if (wc->wr_id == 0ULL)
156 return;
Chuck Leverdab7e3b2014-07-29 17:25:20 -0400157 if (wc->status != IB_WC_SUCCESS)
Chuck Lever9f9d8022014-07-29 17:24:45 -0400158 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400159}
160
Chuck Leverfc664482014-05-28 10:33:25 -0400161static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400162rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400163{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400164 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400165 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400166
Chuck Lever8301a2c2014-05-28 10:33:51 -0400167 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400168 do {
169 wcs = ep->rep_send_wcs;
170
171 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
172 if (rc <= 0)
173 return rc;
174
175 count = rc;
176 while (count-- > 0)
177 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400178 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400179 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400180}
181
182/*
Chuck Leverfc664482014-05-28 10:33:25 -0400183 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400184 *
Chuck Leverfc664482014-05-28 10:33:25 -0400185 * Send events are typically suppressed and thus do not result
186 * in an upcall. Occasionally one is signaled, however. This
187 * prevents the provider's completion queue from wrapping and
188 * losing a completion.
189 */
190static void
191rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
192{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400193 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400194 int rc;
195
Chuck Lever1c00dd02014-05-28 10:33:42 -0400196 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400197 if (rc) {
198 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
199 __func__, rc);
200 return;
201 }
202
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400203 rc = ib_req_notify_cq(cq,
204 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
205 if (rc == 0)
206 return;
207 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400208 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
209 __func__, rc);
210 return;
211 }
212
Chuck Lever1c00dd02014-05-28 10:33:42 -0400213 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400214}
215
216static void
217rpcrdma_recvcq_process_wc(struct ib_wc *wc)
218{
219 struct rpcrdma_rep *rep =
220 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
221
222 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
223 __func__, rep, wc->status, wc->opcode, wc->byte_len);
224
225 if (wc->status != IB_WC_SUCCESS) {
226 rep->rr_len = ~0U;
227 goto out_schedule;
228 }
229 if (wc->opcode != IB_WC_RECV)
230 return;
231
232 rep->rr_len = wc->byte_len;
233 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
234 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
235
236 if (rep->rr_len >= 16) {
237 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
238 unsigned int credits = ntohl(p->rm_credit);
239
240 if (credits == 0)
241 credits = 1; /* don't deadlock */
242 else if (credits > rep->rr_buffer->rb_max_requests)
243 credits = rep->rr_buffer->rb_max_requests;
244 atomic_set(&rep->rr_buffer->rb_credits, credits);
245 }
246
247out_schedule:
248 rpcrdma_schedule_tasklet(rep);
249}
250
251static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400252rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400253{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400254 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400255 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400256
Chuck Lever8301a2c2014-05-28 10:33:51 -0400257 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400258 do {
259 wcs = ep->rep_recv_wcs;
260
261 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
262 if (rc <= 0)
263 return rc;
264
265 count = rc;
266 while (count-- > 0)
267 rpcrdma_recvcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400268 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400269 return 0;
Chuck Leverfc664482014-05-28 10:33:25 -0400270}
271
272/*
273 * Handle receive completions.
274 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400275 * It is reentrant but processes single events in order to maintain
276 * ordering of receives to keep server credits.
277 *
278 * It is the responsibility of the scheduled tasklet to return
279 * recv buffers to the pool. NOTE: this affects synchronization of
280 * connection shutdown. That is, the structures required for
281 * the completion of the reply handler must remain intact until
282 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400283 */
284static void
Chuck Leverfc664482014-05-28 10:33:25 -0400285rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400286{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400287 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400288 int rc;
289
Chuck Lever1c00dd02014-05-28 10:33:42 -0400290 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400291 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400292 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400293 __func__, rc);
294 return;
295 }
296
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400297 rc = ib_req_notify_cq(cq,
298 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
299 if (rc == 0)
300 return;
301 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400302 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
303 __func__, rc);
304 return;
305 }
306
Chuck Lever1c00dd02014-05-28 10:33:42 -0400307 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400308}
309
Chuck Levera7bc2112014-07-29 17:23:52 -0400310static void
311rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
312{
313 rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
314 rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
315}
316
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400317#ifdef RPC_DEBUG
318static const char * const conn[] = {
319 "address resolved",
320 "address error",
321 "route resolved",
322 "route error",
323 "connect request",
324 "connect response",
325 "connect error",
326 "unreachable",
327 "rejected",
328 "established",
329 "disconnected",
330 "device removal"
331};
332#endif
333
334static int
335rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
336{
337 struct rpcrdma_xprt *xprt = id->context;
338 struct rpcrdma_ia *ia = &xprt->rx_ia;
339 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800340#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400341 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800342#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400343 struct ib_qp_attr attr;
344 struct ib_qp_init_attr iattr;
345 int connstate = 0;
346
347 switch (event->event) {
348 case RDMA_CM_EVENT_ADDR_RESOLVED:
349 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400350 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400351 complete(&ia->ri_done);
352 break;
353 case RDMA_CM_EVENT_ADDR_ERROR:
354 ia->ri_async_rc = -EHOSTUNREACH;
355 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
356 __func__, ep);
357 complete(&ia->ri_done);
358 break;
359 case RDMA_CM_EVENT_ROUTE_ERROR:
360 ia->ri_async_rc = -ENETUNREACH;
361 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
362 __func__, ep);
363 complete(&ia->ri_done);
364 break;
365 case RDMA_CM_EVENT_ESTABLISHED:
366 connstate = 1;
367 ib_query_qp(ia->ri_id->qp, &attr,
368 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
369 &iattr);
370 dprintk("RPC: %s: %d responder resources"
371 " (%d initiator)\n",
372 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
373 goto connected;
374 case RDMA_CM_EVENT_CONNECT_ERROR:
375 connstate = -ENOTCONN;
376 goto connected;
377 case RDMA_CM_EVENT_UNREACHABLE:
378 connstate = -ENETDOWN;
379 goto connected;
380 case RDMA_CM_EVENT_REJECTED:
381 connstate = -ECONNREFUSED;
382 goto connected;
383 case RDMA_CM_EVENT_DISCONNECTED:
384 connstate = -ECONNABORTED;
385 goto connected;
386 case RDMA_CM_EVENT_DEVICE_REMOVAL:
387 connstate = -ENODEV;
388connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700389 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390 __func__,
391 (event->event <= 11) ? conn[event->event] :
392 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700393 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400394 ntohs(addr->sin_port),
395 ep, event->event);
396 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
397 dprintk("RPC: %s: %sconnected\n",
398 __func__, connstate > 0 ? "" : "dis");
399 ep->rep_connected = connstate;
400 ep->rep_func(ep);
401 wake_up_all(&ep->rep_connect_wait);
402 break;
403 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400404 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400405 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400406 break;
407 }
408
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400409#ifdef RPC_DEBUG
410 if (connstate == 1) {
411 int ird = attr.max_dest_rd_atomic;
412 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700413 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400414 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700415 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400416 ntohs(addr->sin_port),
417 ia->ri_id->device->name,
418 ia->ri_memreg_strategy,
419 xprt->rx_buf.rb_max_requests,
420 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
421 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700422 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
423 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400424 ntohs(addr->sin_port),
425 connstate);
426 }
427#endif
428
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400429 return 0;
430}
431
432static struct rdma_cm_id *
433rpcrdma_create_id(struct rpcrdma_xprt *xprt,
434 struct rpcrdma_ia *ia, struct sockaddr *addr)
435{
436 struct rdma_cm_id *id;
437 int rc;
438
Tom Talpey1a954052008-10-09 15:01:31 -0400439 init_completion(&ia->ri_done);
440
Sean Heftyb26f9b92010-04-01 17:08:41 +0000441 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400442 if (IS_ERR(id)) {
443 rc = PTR_ERR(id);
444 dprintk("RPC: %s: rdma_create_id() failed %i\n",
445 __func__, rc);
446 return id;
447 }
448
Tom Talpey5675add2008-10-09 15:01:41 -0400449 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400450 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
451 if (rc) {
452 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
453 __func__, rc);
454 goto out;
455 }
Tom Talpey5675add2008-10-09 15:01:41 -0400456 wait_for_completion_interruptible_timeout(&ia->ri_done,
457 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400458 rc = ia->ri_async_rc;
459 if (rc)
460 goto out;
461
Tom Talpey5675add2008-10-09 15:01:41 -0400462 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400463 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
464 if (rc) {
465 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
466 __func__, rc);
467 goto out;
468 }
Tom Talpey5675add2008-10-09 15:01:41 -0400469 wait_for_completion_interruptible_timeout(&ia->ri_done,
470 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400471 rc = ia->ri_async_rc;
472 if (rc)
473 goto out;
474
475 return id;
476
477out:
478 rdma_destroy_id(id);
479 return ERR_PTR(rc);
480}
481
482/*
483 * Drain any cq, prior to teardown.
484 */
485static void
486rpcrdma_clean_cq(struct ib_cq *cq)
487{
488 struct ib_wc wc;
489 int count = 0;
490
491 while (1 == ib_poll_cq(cq, 1, &wc))
492 ++count;
493
494 if (count)
495 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
496 __func__, count, wc.opcode);
497}
498
499/*
500 * Exported functions.
501 */
502
503/*
504 * Open and initialize an Interface Adapter.
505 * o initializes fields of struct rpcrdma_ia, including
506 * interface and provider attributes and protection zone.
507 */
508int
509rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
510{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400511 int rc, mem_priv;
512 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400513 struct rpcrdma_ia *ia = &xprt->rx_ia;
514
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400515 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
516 if (IS_ERR(ia->ri_id)) {
517 rc = PTR_ERR(ia->ri_id);
518 goto out1;
519 }
520
521 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
522 if (IS_ERR(ia->ri_pd)) {
523 rc = PTR_ERR(ia->ri_pd);
524 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
525 __func__, rc);
526 goto out2;
527 }
528
529 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400530 * Query the device to determine if the requested memory
531 * registration strategy is supported. If it isn't, set the
532 * strategy to a globally supported model.
533 */
534 rc = ib_query_device(ia->ri_id->device, &devattr);
535 if (rc) {
536 dprintk("RPC: %s: ib_query_device failed %d\n",
537 __func__, rc);
538 goto out2;
539 }
540
541 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
542 ia->ri_have_dma_lkey = 1;
543 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
544 }
545
Chuck Leverf10eafd2014-05-28 10:32:51 -0400546 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400547 /* Requires both frmr reg and local dma lkey */
548 if ((devattr.device_cap_flags &
549 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
550 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400551 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400552 "not supported by HCA\n", __func__);
553 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400554 } else {
555 /* Mind the ia limit on FRMR page list depth */
556 ia->ri_max_frmr_depth = min_t(unsigned int,
557 RPCRDMA_MAX_DATA_SEGS,
558 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400559 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400560 }
561 if (memreg == RPCRDMA_MTHCAFMR) {
562 if (!ia->ri_id->device->alloc_fmr) {
563 dprintk("RPC: %s: MTHCAFMR registration "
564 "not supported by HCA\n", __func__);
565#if RPCRDMA_PERSISTENT_REGISTRATION
566 memreg = RPCRDMA_ALLPHYSICAL;
567#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400568 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400569 goto out2;
570#endif
571 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400572 }
573
574 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400575 * Optionally obtain an underlying physical identity mapping in
576 * order to do a memory window-based bind. This base registration
577 * is protected from remote access - that is enabled only by binding
578 * for the specific bytes targeted during each RPC operation, and
579 * revoked after the corresponding completion similar to a storage
580 * adapter.
581 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400582 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400583 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400584 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400585#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400586 case RPCRDMA_ALLPHYSICAL:
587 mem_priv = IB_ACCESS_LOCAL_WRITE |
588 IB_ACCESS_REMOTE_WRITE |
589 IB_ACCESS_REMOTE_READ;
590 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400591#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400592 case RPCRDMA_MTHCAFMR:
593 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400595 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400596#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400597 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400598#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400599 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
600 if (IS_ERR(ia->ri_bind_mem)) {
601 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400602 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400603 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400604 rc = -ENOMEM;
605 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400606 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400607 break;
608 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400609 printk(KERN_ERR "RPC: Unsupported memory "
610 "registration mode: %d\n", memreg);
611 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400612 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400613 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400614 dprintk("RPC: %s: memory registration strategy is %d\n",
615 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400616
617 /* Else will do memory reg/dereg for each chunk */
618 ia->ri_memreg_strategy = memreg;
619
Chuck Lever73806c82014-07-29 17:23:25 -0400620 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400621 return 0;
622out2:
623 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400624 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400625out1:
626 return rc;
627}
628
629/*
630 * Clean up/close an IA.
631 * o if event handles and PD have been initialized, free them.
632 * o close the IA
633 */
634void
635rpcrdma_ia_close(struct rpcrdma_ia *ia)
636{
637 int rc;
638
639 dprintk("RPC: %s: entering\n", __func__);
640 if (ia->ri_bind_mem != NULL) {
641 rc = ib_dereg_mr(ia->ri_bind_mem);
642 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
643 __func__, rc);
644 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400645 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
646 if (ia->ri_id->qp)
647 rdma_destroy_qp(ia->ri_id);
648 rdma_destroy_id(ia->ri_id);
649 ia->ri_id = NULL;
650 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400651 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
652 rc = ib_dealloc_pd(ia->ri_pd);
653 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
654 __func__, rc);
655 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400656}
657
658/*
659 * Create unconnected endpoint.
660 */
661int
662rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
663 struct rpcrdma_create_data_internal *cdata)
664{
665 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400666 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400667 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400668
669 rc = ib_query_device(ia->ri_id->device, &devattr);
670 if (rc) {
671 dprintk("RPC: %s: ib_query_device failed %d\n",
672 __func__, rc);
673 return rc;
674 }
675
676 /* check provider's send/recv wr limits */
677 if (cdata->max_requests > devattr.max_qp_wr)
678 cdata->max_requests = devattr.max_qp_wr;
679
680 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
681 ep->rep_attr.qp_context = ep;
682 /* send_cq and recv_cq initialized below */
683 ep->rep_attr.srq = NULL;
684 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
685 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400686 case RPCRDMA_FRMR: {
687 int depth = 7;
688
Tom Tucker15cdc6442010-08-11 12:47:24 -0400689 /* Add room for frmr register and invalidate WRs.
690 * 1. FRMR reg WR for head
691 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400692 * 3. N FRMR reg WRs for pagelist
693 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400694 * 5. FRMR reg WR for tail
695 * 6. FRMR invalidate WR for tail
696 * 7. The RDMA_SEND WR
697 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400698
699 /* Calculate N if the device max FRMR depth is smaller than
700 * RPCRDMA_MAX_DATA_SEGS.
701 */
702 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
703 int delta = RPCRDMA_MAX_DATA_SEGS -
704 ia->ri_max_frmr_depth;
705
706 do {
707 depth += 2; /* FRMR reg + invalidate */
708 delta -= ia->ri_max_frmr_depth;
709 } while (delta > 0);
710
711 }
712 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400713 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400714 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400715 if (!cdata->max_requests)
716 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400717 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
718 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400719 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400720 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400721 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400722 default:
723 break;
724 }
725 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
726 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
727 ep->rep_attr.cap.max_recv_sge = 1;
728 ep->rep_attr.cap.max_inline_data = 0;
729 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
730 ep->rep_attr.qp_type = IB_QPT_RC;
731 ep->rep_attr.port_num = ~0;
732
733 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
734 "iovs: send %d recv %d\n",
735 __func__,
736 ep->rep_attr.cap.max_send_wr,
737 ep->rep_attr.cap.max_recv_wr,
738 ep->rep_attr.cap.max_send_sge,
739 ep->rep_attr.cap.max_recv_sge);
740
741 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400742 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400743 if (ep->rep_cqinit <= 2)
744 ep->rep_cqinit = 0;
745 INIT_CQCOUNT(ep);
746 ep->rep_ia = ia;
747 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400748 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400749
Chuck Leverfc664482014-05-28 10:33:25 -0400750 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400751 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400752 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400753 if (IS_ERR(sendcq)) {
754 rc = PTR_ERR(sendcq);
755 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400756 __func__, rc);
757 goto out1;
758 }
759
Chuck Leverfc664482014-05-28 10:33:25 -0400760 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400761 if (rc) {
762 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
763 __func__, rc);
764 goto out2;
765 }
766
Chuck Leverfc664482014-05-28 10:33:25 -0400767 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400768 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400769 ep->rep_attr.cap.max_recv_wr + 1, 0);
770 if (IS_ERR(recvcq)) {
771 rc = PTR_ERR(recvcq);
772 dprintk("RPC: %s: failed to create recv CQ: %i\n",
773 __func__, rc);
774 goto out2;
775 }
776
777 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
778 if (rc) {
779 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
780 __func__, rc);
781 ib_destroy_cq(recvcq);
782 goto out2;
783 }
784
785 ep->rep_attr.send_cq = sendcq;
786 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400787
788 /* Initialize cma parameters */
789
790 /* RPC/RDMA does not use private data */
791 ep->rep_remote_cma.private_data = NULL;
792 ep->rep_remote_cma.private_data_len = 0;
793
794 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400795 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400796 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400797 ep->rep_remote_cma.responder_resources = 32;
798 else
799 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400800
801 ep->rep_remote_cma.retry_count = 7;
802 ep->rep_remote_cma.flow_control = 0;
803 ep->rep_remote_cma.rnr_retry_count = 0;
804
805 return 0;
806
807out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400808 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400809 if (err)
810 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
811 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400812out1:
813 return rc;
814}
815
816/*
817 * rpcrdma_ep_destroy
818 *
819 * Disconnect and destroy endpoint. After this, the only
820 * valid operations on the ep are to free it (if dynamically
821 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400822 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400823void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400824rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
825{
826 int rc;
827
828 dprintk("RPC: %s: entering, connected is %d\n",
829 __func__, ep->rep_connected);
830
Chuck Lever254f91e2014-05-28 10:32:17 -0400831 cancel_delayed_work_sync(&ep->rep_connect_worker);
832
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400833 if (ia->ri_id->qp) {
834 rc = rpcrdma_ep_disconnect(ep, ia);
835 if (rc)
836 dprintk("RPC: %s: rpcrdma_ep_disconnect"
837 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400838 rdma_destroy_qp(ia->ri_id);
839 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400840 }
841
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400842 /* padding - could be done in rpcrdma_buffer_destroy... */
843 if (ep->rep_pad_mr) {
844 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
845 ep->rep_pad_mr = NULL;
846 }
847
Chuck Leverfc664482014-05-28 10:33:25 -0400848 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
849 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
850 if (rc)
851 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
852 __func__, rc);
853
854 rpcrdma_clean_cq(ep->rep_attr.send_cq);
855 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400856 if (rc)
857 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
858 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859}
860
861/*
862 * Connect unconnected endpoint.
863 */
864int
865rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
866{
Chuck Lever73806c82014-07-29 17:23:25 -0400867 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400868 int rc = 0;
869 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400870
Tom Talpeyc0555512008-10-10 11:32:45 -0400871 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400872 struct rpcrdma_xprt *xprt;
873retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400874 dprintk("RPC: %s: reconnecting...\n", __func__);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 rc = rpcrdma_ep_disconnect(ep, ia);
876 if (rc && rc != -ENOTCONN)
877 dprintk("RPC: %s: rpcrdma_ep_disconnect"
878 " status %i\n", __func__, rc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400879 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400880
Chuck Lever9f9d8022014-07-29 17:24:45 -0400881 if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
882 rpcrdma_reset_frmrs(ia);
883
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400884 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
885 id = rpcrdma_create_id(xprt, ia,
886 (struct sockaddr *)&xprt->rx_data.addr);
887 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400888 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400889 goto out;
890 }
891 /* TEMP TEMP TEMP - fail if new device:
892 * Deregister/remarshal *all* requests!
893 * Close and recreate adapter, pd, etc!
894 * Re-determine all attributes still sane!
895 * More stuff I haven't thought of!
896 * Rrrgh!
897 */
898 if (ia->ri_id->device != id->device) {
899 printk("RPC: %s: can't reconnect on "
900 "different device!\n", __func__);
901 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400902 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400903 goto out;
904 }
905 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400906 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
907 if (rc) {
908 dprintk("RPC: %s: rdma_create_qp failed %i\n",
909 __func__, rc);
910 rdma_destroy_id(id);
911 rc = -ENETUNREACH;
912 goto out;
913 }
Chuck Lever73806c82014-07-29 17:23:25 -0400914
915 write_lock(&ia->ri_qplock);
916 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400917 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400918 write_unlock(&ia->ri_qplock);
919
920 rdma_destroy_qp(old);
921 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400922 } else {
923 dprintk("RPC: %s: connecting...\n", __func__);
924 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
925 if (rc) {
926 dprintk("RPC: %s: rdma_create_qp failed %i\n",
927 __func__, rc);
928 /* do not update ep->rep_connected */
929 return -ENETUNREACH;
930 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400931 }
932
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400933 ep->rep_connected = 0;
934
935 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
936 if (rc) {
937 dprintk("RPC: %s: rdma_connect() failed with %i\n",
938 __func__, rc);
939 goto out;
940 }
941
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400942 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
943
944 /*
945 * Check state. A non-peer reject indicates no listener
946 * (ECONNREFUSED), which may be a transient state. All
947 * others indicate a transport condition which has already
948 * undergone a best-effort.
949 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800950 if (ep->rep_connected == -ECONNREFUSED &&
951 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400952 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
953 goto retry;
954 }
955 if (ep->rep_connected <= 0) {
956 /* Sometimes, the only way to reliably connect to remote
957 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400958 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
959 (ep->rep_remote_cma.responder_resources == 0 ||
960 ep->rep_remote_cma.initiator_depth !=
961 ep->rep_remote_cma.responder_resources)) {
962 if (ep->rep_remote_cma.responder_resources == 0)
963 ep->rep_remote_cma.responder_resources = 1;
964 ep->rep_remote_cma.initiator_depth =
965 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400966 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400967 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400968 rc = ep->rep_connected;
969 } else {
970 dprintk("RPC: %s: connected\n", __func__);
971 }
972
973out:
974 if (rc)
975 ep->rep_connected = rc;
976 return rc;
977}
978
979/*
980 * rpcrdma_ep_disconnect
981 *
982 * This is separate from destroy to facilitate the ability
983 * to reconnect without recreating the endpoint.
984 *
985 * This call is not reentrant, and must not be made in parallel
986 * on the same endpoint.
987 */
988int
989rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
990{
991 int rc;
992
Chuck Levera7bc2112014-07-29 17:23:52 -0400993 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400994 rc = rdma_disconnect(ia->ri_id);
995 if (!rc) {
996 /* returns without wait if not connected */
997 wait_event_interruptible(ep->rep_connect_wait,
998 ep->rep_connected != 1);
999 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1000 (ep->rep_connected == 1) ? "still " : "dis");
1001 } else {
1002 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1003 ep->rep_connected = rc;
1004 }
1005 return rc;
1006}
1007
1008/*
1009 * Initialize buffer memory
1010 */
1011int
1012rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1013 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1014{
1015 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001016 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001017 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001018 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001019
1020 buf->rb_max_requests = cdata->max_requests;
1021 spin_lock_init(&buf->rb_lock);
1022 atomic_set(&buf->rb_credits, 1);
1023
1024 /* Need to allocate:
1025 * 1. arrays for send and recv pointers
1026 * 2. arrays of struct rpcrdma_req to fill in pointers
1027 * 3. array of struct rpcrdma_rep for replies
1028 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -04001029 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001030 * Send/recv buffers in req/rep need to be registered
1031 */
1032
1033 len = buf->rb_max_requests *
1034 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1035 len += cdata->padding;
1036 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001037 case RPCRDMA_FRMR:
1038 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1039 sizeof(struct rpcrdma_mw);
1040 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001041 case RPCRDMA_MTHCAFMR:
1042 /* TBD we are perhaps overallocating here */
1043 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1044 sizeof(struct rpcrdma_mw);
1045 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001046 default:
1047 break;
1048 }
1049
1050 /* allocate 1, 4 and 5 in one shot */
1051 p = kzalloc(len, GFP_KERNEL);
1052 if (p == NULL) {
1053 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1054 __func__, len);
1055 rc = -ENOMEM;
1056 goto out;
1057 }
1058 buf->rb_pool = p; /* for freeing it later */
1059
1060 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1061 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1062 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1063 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1064
1065 /*
1066 * Register the zeroed pad buffer, if any.
1067 */
1068 if (cdata->padding) {
1069 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1070 &ep->rep_pad_mr, &ep->rep_pad);
1071 if (rc)
1072 goto out;
1073 }
1074 p += cdata->padding;
1075
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001076 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001077 INIT_LIST_HEAD(&buf->rb_all);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001078 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001079 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001080 case RPCRDMA_FRMR:
1081 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1082 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001083 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001084 if (IS_ERR(r->r.frmr.fr_mr)) {
1085 rc = PTR_ERR(r->r.frmr.fr_mr);
1086 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1087 " failed %i\n", __func__, rc);
1088 goto out;
1089 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001090 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1091 ia->ri_id->device,
1092 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001093 if (IS_ERR(r->r.frmr.fr_pgl)) {
1094 rc = PTR_ERR(r->r.frmr.fr_pgl);
1095 dprintk("RPC: %s: "
1096 "ib_alloc_fast_reg_page_list "
1097 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001098
1099 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001100 goto out;
1101 }
Chuck Lever3111d722014-07-29 17:24:28 -04001102 list_add(&r->mw_all, &buf->rb_all);
Tom Talpey3197d3092008-10-09 15:00:20 -04001103 list_add(&r->mw_list, &buf->rb_mws);
1104 ++r;
1105 }
1106 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001107 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001108 /* TBD we are perhaps overallocating here */
1109 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001110 static struct ib_fmr_attr fa =
1111 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1113 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1114 &fa);
1115 if (IS_ERR(r->r.fmr)) {
1116 rc = PTR_ERR(r->r.fmr);
1117 dprintk("RPC: %s: ib_alloc_fmr"
1118 " failed %i\n", __func__, rc);
1119 goto out;
1120 }
Chuck Lever3111d722014-07-29 17:24:28 -04001121 list_add(&r->mw_all, &buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001122 list_add(&r->mw_list, &buf->rb_mws);
1123 ++r;
1124 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001125 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001126 default:
1127 break;
1128 }
1129
1130 /*
1131 * Allocate/init the request/reply buffers. Doing this
1132 * using kmalloc for now -- one for each buf.
1133 */
Chuck Lever65866f82014-05-28 10:33:59 -04001134 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1135 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1136 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1137 __func__, wlen, rlen);
1138
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001139 for (i = 0; i < buf->rb_max_requests; i++) {
1140 struct rpcrdma_req *req;
1141 struct rpcrdma_rep *rep;
1142
Chuck Lever65866f82014-05-28 10:33:59 -04001143 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001144 if (req == NULL) {
1145 dprintk("RPC: %s: request buffer %d alloc"
1146 " failed\n", __func__, i);
1147 rc = -ENOMEM;
1148 goto out;
1149 }
1150 memset(req, 0, sizeof(struct rpcrdma_req));
1151 buf->rb_send_bufs[i] = req;
1152 buf->rb_send_bufs[i]->rl_buffer = buf;
1153
1154 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001155 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001156 &buf->rb_send_bufs[i]->rl_handle,
1157 &buf->rb_send_bufs[i]->rl_iov);
1158 if (rc)
1159 goto out;
1160
Chuck Lever65866f82014-05-28 10:33:59 -04001161 buf->rb_send_bufs[i]->rl_size = wlen -
1162 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001163
Chuck Lever65866f82014-05-28 10:33:59 -04001164 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001165 if (rep == NULL) {
1166 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1167 __func__, i);
1168 rc = -ENOMEM;
1169 goto out;
1170 }
1171 memset(rep, 0, sizeof(struct rpcrdma_rep));
1172 buf->rb_recv_bufs[i] = rep;
1173 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001174
1175 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001176 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001177 &buf->rb_recv_bufs[i]->rr_handle,
1178 &buf->rb_recv_bufs[i]->rr_iov);
1179 if (rc)
1180 goto out;
1181
1182 }
1183 dprintk("RPC: %s: max_requests %d\n",
1184 __func__, buf->rb_max_requests);
1185 /* done */
1186 return 0;
1187out:
1188 rpcrdma_buffer_destroy(buf);
1189 return rc;
1190}
1191
1192/*
1193 * Unregister and destroy buffer memory. Need to deal with
1194 * partial initialization, so it's callable from failed create.
1195 * Must be called before destroying endpoint, as registrations
1196 * reference it.
1197 */
1198void
1199rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1200{
1201 int rc, i;
1202 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001203 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001204
1205 /* clean up in reverse order from create
1206 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001207 * 2. send mr memory (mr free, then kfree)
1208 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1209 * 4. arrays
1210 */
1211 dprintk("RPC: %s: entering\n", __func__);
1212
1213 for (i = 0; i < buf->rb_max_requests; i++) {
1214 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1215 rpcrdma_deregister_internal(ia,
1216 buf->rb_recv_bufs[i]->rr_handle,
1217 &buf->rb_recv_bufs[i]->rr_iov);
1218 kfree(buf->rb_recv_bufs[i]);
1219 }
1220 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001221 rpcrdma_deregister_internal(ia,
1222 buf->rb_send_bufs[i]->rl_handle,
1223 &buf->rb_send_bufs[i]->rl_iov);
1224 kfree(buf->rb_send_bufs[i]);
1225 }
1226 }
1227
Allen Andrews4034ba02014-05-28 10:32:09 -04001228 while (!list_empty(&buf->rb_mws)) {
1229 r = list_entry(buf->rb_mws.next,
1230 struct rpcrdma_mw, mw_list);
Chuck Lever3111d722014-07-29 17:24:28 -04001231 list_del(&r->mw_all);
Allen Andrews4034ba02014-05-28 10:32:09 -04001232 list_del(&r->mw_list);
1233 switch (ia->ri_memreg_strategy) {
1234 case RPCRDMA_FRMR:
1235 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1236 if (rc)
1237 dprintk("RPC: %s:"
1238 " ib_dereg_mr"
1239 " failed %i\n",
1240 __func__, rc);
1241 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1242 break;
1243 case RPCRDMA_MTHCAFMR:
1244 rc = ib_dealloc_fmr(r->r.fmr);
1245 if (rc)
1246 dprintk("RPC: %s:"
1247 " ib_dealloc_fmr"
1248 " failed %i\n",
1249 __func__, rc);
1250 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001251 default:
1252 break;
1253 }
1254 }
1255
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001256 kfree(buf->rb_pool);
1257}
1258
Chuck Lever9f9d8022014-07-29 17:24:45 -04001259/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1260 * an unusable state. Find FRMRs in this state and dereg / reg
1261 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1262 * also torn down.
1263 *
1264 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1265 *
1266 * This is invoked only in the transport connect worker in order
1267 * to serialize with rpcrdma_register_frmr_external().
1268 */
1269static void
1270rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1271{
1272 struct rpcrdma_xprt *r_xprt =
1273 container_of(ia, struct rpcrdma_xprt, rx_ia);
1274 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1275 struct list_head *pos;
1276 struct rpcrdma_mw *r;
1277 int rc;
1278
1279 list_for_each(pos, &buf->rb_all) {
1280 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1281
1282 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1283 continue;
1284
1285 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1286 if (rc)
1287 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1288 __func__, rc);
1289 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1290
1291 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1292 ia->ri_max_frmr_depth);
1293 if (IS_ERR(r->r.frmr.fr_mr)) {
1294 rc = PTR_ERR(r->r.frmr.fr_mr);
1295 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1296 " failed %i\n", __func__, rc);
1297 continue;
1298 }
1299 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1300 ia->ri_id->device,
1301 ia->ri_max_frmr_depth);
1302 if (IS_ERR(r->r.frmr.fr_pgl)) {
1303 rc = PTR_ERR(r->r.frmr.fr_pgl);
1304 dprintk("RPC: %s: "
1305 "ib_alloc_fast_reg_page_list "
1306 "failed %i\n", __func__, rc);
1307
1308 ib_dereg_mr(r->r.frmr.fr_mr);
1309 continue;
1310 }
1311 r->r.frmr.fr_state = FRMR_IS_INVALID;
1312 }
1313}
1314
Chuck Leverc2922c02014-07-29 17:24:36 -04001315/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1316 * some req segments uninitialized.
1317 */
1318static void
1319rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1320{
1321 if (*mw) {
1322 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1323 *mw = NULL;
1324 }
1325}
1326
1327/* Cycle mw's back in reverse order, and "spin" them.
1328 * This delays and scrambles reuse as much as possible.
1329 */
1330static void
1331rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1332{
1333 struct rpcrdma_mr_seg *seg = req->rl_segments;
1334 struct rpcrdma_mr_seg *seg1 = seg;
1335 int i;
1336
1337 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1338 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1339 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1340}
1341
1342static void
1343rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1344{
1345 buf->rb_send_bufs[--buf->rb_send_index] = req;
1346 req->rl_niovs = 0;
1347 if (req->rl_reply) {
1348 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1349 req->rl_reply->rr_func = NULL;
1350 req->rl_reply = NULL;
1351 }
1352}
1353
Chuck Leverddb6beb2014-07-29 17:24:54 -04001354/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1355 * Redo only the ib_post_send().
1356 */
1357static void
1358rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1359{
1360 struct rpcrdma_xprt *r_xprt =
1361 container_of(ia, struct rpcrdma_xprt, rx_ia);
1362 struct ib_send_wr invalidate_wr, *bad_wr;
1363 int rc;
1364
1365 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1366
1367 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001368 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001369
1370 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1371 invalidate_wr.wr_id = (unsigned long)(void *)r;
1372 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001373 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1374 DECR_CQCOUNT(&r_xprt->rx_ep);
1375
1376 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1377 __func__, r, r->r.frmr.fr_mr->rkey);
1378
1379 read_lock(&ia->ri_qplock);
1380 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1381 read_unlock(&ia->ri_qplock);
1382 if (rc) {
1383 /* Force rpcrdma_buffer_get() to retry */
1384 r->r.frmr.fr_state = FRMR_IS_STALE;
1385 dprintk("RPC: %s: ib_post_send failed, %i\n",
1386 __func__, rc);
1387 }
1388}
1389
1390static void
1391rpcrdma_retry_flushed_linv(struct list_head *stale,
1392 struct rpcrdma_buffer *buf)
1393{
1394 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1395 struct list_head *pos;
1396 struct rpcrdma_mw *r;
1397 unsigned long flags;
1398
1399 list_for_each(pos, stale) {
1400 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1401 rpcrdma_retry_local_inv(r, ia);
1402 }
1403
1404 spin_lock_irqsave(&buf->rb_lock, flags);
1405 list_splice_tail(stale, &buf->rb_mws);
1406 spin_unlock_irqrestore(&buf->rb_lock, flags);
1407}
1408
Chuck Leverc2922c02014-07-29 17:24:36 -04001409static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001410rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1411 struct list_head *stale)
1412{
1413 struct rpcrdma_mw *r;
1414 int i;
1415
1416 i = RPCRDMA_MAX_SEGS - 1;
1417 while (!list_empty(&buf->rb_mws)) {
1418 r = list_entry(buf->rb_mws.next,
1419 struct rpcrdma_mw, mw_list);
1420 list_del(&r->mw_list);
1421 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1422 list_add(&r->mw_list, stale);
1423 continue;
1424 }
1425 req->rl_segments[i].mr_chunk.rl_mw = r;
1426 if (unlikely(i-- == 0))
1427 return req; /* Success */
1428 }
1429
1430 /* Not enough entries on rb_mws for this req */
1431 rpcrdma_buffer_put_sendbuf(req, buf);
1432 rpcrdma_buffer_put_mrs(req, buf);
1433 return NULL;
1434}
1435
1436static struct rpcrdma_req *
1437rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001438{
1439 struct rpcrdma_mw *r;
1440 int i;
1441
1442 i = RPCRDMA_MAX_SEGS - 1;
1443 while (!list_empty(&buf->rb_mws)) {
1444 r = list_entry(buf->rb_mws.next,
1445 struct rpcrdma_mw, mw_list);
1446 list_del(&r->mw_list);
1447 req->rl_segments[i].mr_chunk.rl_mw = r;
1448 if (unlikely(i-- == 0))
1449 return req; /* Success */
1450 }
1451
1452 /* Not enough entries on rb_mws for this req */
1453 rpcrdma_buffer_put_sendbuf(req, buf);
1454 rpcrdma_buffer_put_mrs(req, buf);
1455 return NULL;
1456}
1457
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001458/*
1459 * Get a set of request/reply buffers.
1460 *
1461 * Reply buffer (if needed) is attached to send buffer upon return.
1462 * Rule:
1463 * rb_send_index and rb_recv_index MUST always be pointing to the
1464 * *next* available buffer (non-NULL). They are incremented after
1465 * removing buffers, and decremented *before* returning them.
1466 */
1467struct rpcrdma_req *
1468rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1469{
Chuck Leverc2922c02014-07-29 17:24:36 -04001470 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001471 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001472 struct rpcrdma_req *req;
1473 unsigned long flags;
1474
1475 spin_lock_irqsave(&buffers->rb_lock, flags);
1476 if (buffers->rb_send_index == buffers->rb_max_requests) {
1477 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1478 dprintk("RPC: %s: out of request buffers\n", __func__);
1479 return ((struct rpcrdma_req *)NULL);
1480 }
1481
1482 req = buffers->rb_send_bufs[buffers->rb_send_index];
1483 if (buffers->rb_send_index < buffers->rb_recv_index) {
1484 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1485 __func__,
1486 buffers->rb_recv_index - buffers->rb_send_index);
1487 req->rl_reply = NULL;
1488 } else {
1489 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1490 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1491 }
1492 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001493
1494 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001495 switch (ia->ri_memreg_strategy) {
1496 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001497 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1498 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001499 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001500 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001501 break;
1502 default:
1503 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001504 }
1505 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001506 if (!list_empty(&stale))
1507 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001508 return req;
1509}
1510
1511/*
1512 * Put request/reply buffers back into pool.
1513 * Pre-decrement counter/array index.
1514 */
1515void
1516rpcrdma_buffer_put(struct rpcrdma_req *req)
1517{
1518 struct rpcrdma_buffer *buffers = req->rl_buffer;
1519 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001520 unsigned long flags;
1521
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001522 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001523 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001524 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001525 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001526 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001527 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001528 break;
1529 default:
1530 break;
1531 }
1532 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1533}
1534
1535/*
1536 * Recover reply buffers from pool.
1537 * This happens when recovering from error conditions.
1538 * Post-increment counter/array index.
1539 */
1540void
1541rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1542{
1543 struct rpcrdma_buffer *buffers = req->rl_buffer;
1544 unsigned long flags;
1545
1546 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1547 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1548 spin_lock_irqsave(&buffers->rb_lock, flags);
1549 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1550 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1551 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1552 }
1553 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1554}
1555
1556/*
1557 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001558 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001559 */
1560void
1561rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1562{
1563 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1564 unsigned long flags;
1565
1566 rep->rr_func = NULL;
1567 spin_lock_irqsave(&buffers->rb_lock, flags);
1568 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1569 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1570}
1571
1572/*
1573 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1574 */
1575
1576int
1577rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1578 struct ib_mr **mrp, struct ib_sge *iov)
1579{
1580 struct ib_phys_buf ipb;
1581 struct ib_mr *mr;
1582 int rc;
1583
1584 /*
1585 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1586 */
1587 iov->addr = ib_dma_map_single(ia->ri_id->device,
1588 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001589 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1590 return -ENOMEM;
1591
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001592 iov->length = len;
1593
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001594 if (ia->ri_have_dma_lkey) {
1595 *mrp = NULL;
1596 iov->lkey = ia->ri_dma_lkey;
1597 return 0;
1598 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001599 *mrp = NULL;
1600 iov->lkey = ia->ri_bind_mem->lkey;
1601 return 0;
1602 }
1603
1604 ipb.addr = iov->addr;
1605 ipb.size = iov->length;
1606 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1607 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1608
1609 dprintk("RPC: %s: phys convert: 0x%llx "
1610 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001611 __func__, (unsigned long long)ipb.addr,
1612 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001613
1614 if (IS_ERR(mr)) {
1615 *mrp = NULL;
1616 rc = PTR_ERR(mr);
1617 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1618 } else {
1619 *mrp = mr;
1620 iov->lkey = mr->lkey;
1621 rc = 0;
1622 }
1623
1624 return rc;
1625}
1626
1627int
1628rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1629 struct ib_mr *mr, struct ib_sge *iov)
1630{
1631 int rc;
1632
1633 ib_dma_unmap_single(ia->ri_id->device,
1634 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1635
1636 if (NULL == mr)
1637 return 0;
1638
1639 rc = ib_dereg_mr(mr);
1640 if (rc)
1641 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1642 return rc;
1643}
1644
1645/*
1646 * Wrappers for chunk registration, shared by read/write chunk code.
1647 */
1648
1649static void
1650rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1651{
1652 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1653 seg->mr_dmalen = seg->mr_len;
1654 if (seg->mr_page)
1655 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1656 seg->mr_page, offset_in_page(seg->mr_offset),
1657 seg->mr_dmalen, seg->mr_dir);
1658 else
1659 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1660 seg->mr_offset,
1661 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001662 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1663 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1664 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001665 (unsigned long long)seg->mr_dma,
1666 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001667 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001668}
1669
1670static void
1671rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1672{
1673 if (seg->mr_page)
1674 ib_dma_unmap_page(ia->ri_id->device,
1675 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1676 else
1677 ib_dma_unmap_single(ia->ri_id->device,
1678 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1679}
1680
Tom Talpey8d4ba032008-10-09 14:59:49 -04001681static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001682rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1683 int *nsegs, int writing, struct rpcrdma_ia *ia,
1684 struct rpcrdma_xprt *r_xprt)
1685{
1686 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001687 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1688 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1689 struct ib_mr *mr = frmr->fr_mr;
Chuck Lever440ddad52014-07-29 17:25:03 -04001690 struct ib_send_wr frmr_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001691 u8 key;
1692 int len, pageoff;
1693 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001694 int seg_len;
1695 u64 pa;
1696 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001697
1698 pageoff = offset_in_page(seg1->mr_offset);
1699 seg1->mr_offset -= pageoff; /* start of page */
1700 seg1->mr_len += pageoff;
1701 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001702 if (*nsegs > ia->ri_max_frmr_depth)
1703 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001704 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001705 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001706 pa = seg->mr_dma;
1707 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001708 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001709 pa += PAGE_SIZE;
1710 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001711 len += seg->mr_len;
1712 ++seg;
1713 ++i;
1714 /* Check for holes */
1715 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1716 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1717 break;
1718 }
1719 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001720 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001721
Chuck Lever05055722014-07-29 17:25:12 -04001722 frmr->fr_state = FRMR_IS_VALID;
1723
Tom Talpey3197d3092008-10-09 15:00:20 -04001724 memset(&frmr_wr, 0, sizeof frmr_wr);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001725 frmr_wr.wr_id = (unsigned long)(void *)mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001726 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001727 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001728 frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001729 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001730 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001731 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Chuck Leverc977dea2014-05-28 10:35:06 -04001732 if (frmr_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001733 rc = -EIO;
1734 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001735 }
1736
1737 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001738 key = (u8)(mr->rkey & 0x000000FF);
1739 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001740
Tom Talpey3197d3092008-10-09 15:00:20 -04001741 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001742 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1743 IB_ACCESS_REMOTE_READ);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001744 frmr_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001745 DECR_CQCOUNT(&r_xprt->rx_ep);
1746
Chuck Lever440ddad52014-07-29 17:25:03 -04001747 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001748 if (rc) {
1749 dprintk("RPC: %s: failed ib_post_send for register,"
1750 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001751 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001752 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001753 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001754 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001755 seg1->mr_base = seg1->mr_dma + pageoff;
1756 seg1->mr_nsegs = i;
1757 seg1->mr_len = len;
1758 }
1759 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001760 return 0;
1761out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001762 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001763 while (i--)
1764 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001765 return rc;
1766}
1767
1768static int
1769rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1770 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1771{
1772 struct rpcrdma_mr_seg *seg1 = seg;
1773 struct ib_send_wr invalidate_wr, *bad_wr;
1774 int rc;
1775
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001776 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1777
Tom Talpey3197d3092008-10-09 15:00:20 -04001778 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001779 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001780 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Talpey3197d3092008-10-09 15:00:20 -04001781 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1782 DECR_CQCOUNT(&r_xprt->rx_ep);
1783
Chuck Lever73806c82014-07-29 17:23:25 -04001784 read_lock(&ia->ri_qplock);
1785 while (seg1->mr_nsegs--)
1786 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001787 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001788 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001789 if (rc) {
1790 /* Force rpcrdma_buffer_get() to retry */
1791 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001792 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1793 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001794 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001795 return rc;
1796}
1797
1798static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001799rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1800 int *nsegs, int writing, struct rpcrdma_ia *ia)
1801{
1802 struct rpcrdma_mr_seg *seg1 = seg;
1803 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1804 int len, pageoff, i, rc;
1805
1806 pageoff = offset_in_page(seg1->mr_offset);
1807 seg1->mr_offset -= pageoff; /* start of page */
1808 seg1->mr_len += pageoff;
1809 len = -pageoff;
1810 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1811 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1812 for (i = 0; i < *nsegs;) {
1813 rpcrdma_map_one(ia, seg, writing);
1814 physaddrs[i] = seg->mr_dma;
1815 len += seg->mr_len;
1816 ++seg;
1817 ++i;
1818 /* Check for holes */
1819 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1820 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1821 break;
1822 }
1823 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1824 physaddrs, i, seg1->mr_dma);
1825 if (rc) {
1826 dprintk("RPC: %s: failed ib_map_phys_fmr "
1827 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1828 len, (unsigned long long)seg1->mr_dma,
1829 pageoff, i, rc);
1830 while (i--)
1831 rpcrdma_unmap_one(ia, --seg);
1832 } else {
1833 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1834 seg1->mr_base = seg1->mr_dma + pageoff;
1835 seg1->mr_nsegs = i;
1836 seg1->mr_len = len;
1837 }
1838 *nsegs = i;
1839 return rc;
1840}
1841
1842static int
1843rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1844 struct rpcrdma_ia *ia)
1845{
1846 struct rpcrdma_mr_seg *seg1 = seg;
1847 LIST_HEAD(l);
1848 int rc;
1849
1850 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1851 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001852 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001853 while (seg1->mr_nsegs--)
1854 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001855 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001856 if (rc)
1857 dprintk("RPC: %s: failed ib_unmap_fmr,"
1858 " status %i\n", __func__, rc);
1859 return rc;
1860}
1861
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001862int
1863rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1864 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1865{
1866 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001867 int rc = 0;
1868
1869 switch (ia->ri_memreg_strategy) {
1870
1871#if RPCRDMA_PERSISTENT_REGISTRATION
1872 case RPCRDMA_ALLPHYSICAL:
1873 rpcrdma_map_one(ia, seg, writing);
1874 seg->mr_rkey = ia->ri_bind_mem->rkey;
1875 seg->mr_base = seg->mr_dma;
1876 seg->mr_nsegs = 1;
1877 nsegs = 1;
1878 break;
1879#endif
1880
Tom Talpey3197d3092008-10-09 15:00:20 -04001881 /* Registration using frmr registration */
1882 case RPCRDMA_FRMR:
1883 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1884 break;
1885
Tom Talpey8d4ba032008-10-09 14:59:49 -04001886 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001887 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001888 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001889 break;
1890
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001891 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001892 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001893 }
1894 if (rc)
1895 return -1;
1896
1897 return nsegs;
1898}
1899
1900int
1901rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001902 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001903{
1904 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001905 int nsegs = seg->mr_nsegs, rc;
1906
1907 switch (ia->ri_memreg_strategy) {
1908
1909#if RPCRDMA_PERSISTENT_REGISTRATION
1910 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04001911 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001912 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04001913 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001914 break;
1915#endif
1916
Tom Talpey3197d3092008-10-09 15:00:20 -04001917 case RPCRDMA_FRMR:
1918 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1919 break;
1920
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001921 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001922 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001923 break;
1924
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001925 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001926 break;
1927 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001928 return nsegs;
1929}
1930
1931/*
1932 * Prepost any receive buffer, then post send.
1933 *
1934 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1935 */
1936int
1937rpcrdma_ep_post(struct rpcrdma_ia *ia,
1938 struct rpcrdma_ep *ep,
1939 struct rpcrdma_req *req)
1940{
1941 struct ib_send_wr send_wr, *send_wr_fail;
1942 struct rpcrdma_rep *rep = req->rl_reply;
1943 int rc;
1944
1945 if (rep) {
1946 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1947 if (rc)
1948 goto out;
1949 req->rl_reply = NULL;
1950 }
1951
1952 send_wr.next = NULL;
1953 send_wr.wr_id = 0ULL; /* no send cookie */
1954 send_wr.sg_list = req->rl_send_iov;
1955 send_wr.num_sge = req->rl_niovs;
1956 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001957 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1958 ib_dma_sync_single_for_device(ia->ri_id->device,
1959 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1960 DMA_TO_DEVICE);
1961 ib_dma_sync_single_for_device(ia->ri_id->device,
1962 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1963 DMA_TO_DEVICE);
1964 ib_dma_sync_single_for_device(ia->ri_id->device,
1965 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1966 DMA_TO_DEVICE);
1967
1968 if (DECR_CQCOUNT(ep) > 0)
1969 send_wr.send_flags = 0;
1970 else { /* Provider must take a send completion every now and then */
1971 INIT_CQCOUNT(ep);
1972 send_wr.send_flags = IB_SEND_SIGNALED;
1973 }
1974
1975 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1976 if (rc)
1977 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1978 rc);
1979out:
1980 return rc;
1981}
1982
1983/*
1984 * (Re)post a receive buffer.
1985 */
1986int
1987rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1988 struct rpcrdma_ep *ep,
1989 struct rpcrdma_rep *rep)
1990{
1991 struct ib_recv_wr recv_wr, *recv_wr_fail;
1992 int rc;
1993
1994 recv_wr.next = NULL;
1995 recv_wr.wr_id = (u64) (unsigned long) rep;
1996 recv_wr.sg_list = &rep->rr_iov;
1997 recv_wr.num_sge = 1;
1998
1999 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2000 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2001
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002002 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2003
2004 if (rc)
2005 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2006 rc);
2007 return rc;
2008}
Chuck Lever43e95982014-07-29 17:23:34 -04002009
2010/* Physical mapping means one Read/Write list entry per-page.
2011 * All list entries must fit within an inline buffer
2012 *
2013 * NB: The server must return a Write list for NFS READ,
2014 * which has the same constraint. Factor in the inline
2015 * rsize as well.
2016 */
2017static size_t
2018rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2019{
2020 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2021 unsigned int inline_size, pages;
2022
2023 inline_size = min_t(unsigned int,
2024 cdata->inline_wsize, cdata->inline_rsize);
2025 inline_size -= RPCRDMA_HDRLEN_MIN;
2026 pages = inline_size / sizeof(struct rpcrdma_segment);
2027 return pages << PAGE_SHIFT;
2028}
2029
2030static size_t
2031rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2032{
2033 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2034}
2035
2036size_t
2037rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2038{
2039 size_t result;
2040
2041 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2042 case RPCRDMA_ALLPHYSICAL:
2043 result = rpcrdma_physical_max_payload(r_xprt);
2044 break;
2045 default:
2046 result = rpcrdma_mr_max_payload(r_xprt);
2047 }
2048 return result;
2049}