blob: 281ddb87ac8d5f9fa0ef7c7ec782e63f70d97873 [file] [log] [blame]
Chuck Levera2268cf2018-05-04 15:34:32 -04001// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04002/*
Chuck Lever3a9568f2020-11-09 14:39:42 -05003 * Copyright (c) 2014-2020, Oracle and/or its affiliates.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04004 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the BSD-type
10 * license below:
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * Neither the name of the Network Appliance, Inc. nor the names of
25 * its contributors may be used to endorse or promote products
26 * derived from this software without specific prior written
27 * permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42/*
43 * rpc_rdma.c
44 *
45 * This file contains the guts of the RPC RDMA protocol, and
46 * does marshaling/unmarshaling, etc. It is also where interfacing
47 * to the Linux RPC framework lives.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040048 */
49
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -040050#include <linux/highmem.h>
51
Chuck Leverbd2abef2018-05-07 15:27:16 -040052#include <linux/sunrpc/svc_rdma.h>
53
Chuck Leverb6e717cb2018-05-07 15:27:05 -040054#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
Chuck Lever302d3de2016-05-02 14:41:05 -040057/* Returns size of largest RPC-over-RDMA header in a Call message
58 *
Chuck Lever94f58c52016-05-02 14:41:30 -040059 * The largest Call header contains a full-size Read list and a
60 * minimal Reply chunk.
Chuck Lever302d3de2016-05-02 14:41:05 -040061 */
62static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
63{
64 unsigned int size;
65
66 /* Fixed header fields and list discriminators */
67 size = RPCRDMA_HDRLEN_MIN;
68
69 /* Maximum Read list size */
Colin Ian King91228842020-07-15 17:26:04 +010070 size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
Chuck Lever302d3de2016-05-02 14:41:05 -040071
Chuck Lever94f58c52016-05-02 14:41:30 -040072 /* Minimal Read chunk size */
73 size += sizeof(__be32); /* segment count */
Chuck Lever2232df52017-10-30 16:21:57 -040074 size += rpcrdma_segment_maxsz * sizeof(__be32);
Chuck Lever94f58c52016-05-02 14:41:30 -040075 size += sizeof(__be32); /* list discriminator */
76
Chuck Lever302d3de2016-05-02 14:41:05 -040077 return size;
78}
79
80/* Returns size of largest RPC-over-RDMA header in a Reply message
81 *
82 * There is only one Write list or one Reply chunk per Reply
83 * message. The larger list is the Write list.
84 */
85static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
86{
87 unsigned int size;
88
89 /* Fixed header fields and list discriminators */
90 size = RPCRDMA_HDRLEN_MIN;
91
92 /* Maximum Write list size */
Colin Ian King91228842020-07-15 17:26:04 +010093 size += sizeof(__be32); /* segment count */
Chuck Lever2232df52017-10-30 16:21:57 -040094 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
Chuck Lever302d3de2016-05-02 14:41:05 -040095 size += sizeof(__be32); /* list discriminator */
96
Chuck Lever302d3de2016-05-02 14:41:05 -040097 return size;
98}
99
Chuck Lever94087e92019-04-24 09:40:20 -0400100/**
101 * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
Chuck Lever93aa8e02020-02-21 17:00:54 -0500102 * @ep: endpoint to initialize
Chuck Lever94087e92019-04-24 09:40:20 -0400103 *
104 * The max_inline fields contain the maximum size of an RPC message
105 * so the marshaling code doesn't have to repeat this calculation
106 * for every RPC.
107 */
Chuck Lever93aa8e02020-02-21 17:00:54 -0500108void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
Chuck Lever302d3de2016-05-02 14:41:05 -0400109{
Chuck Lever93aa8e02020-02-21 17:00:54 -0500110 unsigned int maxsegs = ep->re_max_rdma_segs;
Chuck Lever87cfb9a2016-09-15 10:57:07 -0400111
Chuck Lever93aa8e02020-02-21 17:00:54 -0500112 ep->re_max_inline_send =
113 ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
114 ep->re_max_inline_recv =
115 ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
Chuck Lever302d3de2016-05-02 14:41:05 -0400116}
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400117
Chuck Lever5457ced2015-08-03 13:03:49 -0400118/* The client can send a request inline as long as the RPCRDMA header
119 * plus the RPC call fit under the transport's inline limit. If the
120 * combined call message size exceeds that limit, the client must use
Chuck Lever16f906d2017-02-08 17:00:10 -0500121 * a Read chunk for this operation.
122 *
123 * A Read chunk is also required if sending the RPC call inline would
124 * exceed this device's max_sge limit.
Chuck Lever5457ced2015-08-03 13:03:49 -0400125 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400126static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
127 struct rpc_rqst *rqst)
Chuck Lever5457ced2015-08-03 13:03:49 -0400128{
Chuck Lever16f906d2017-02-08 17:00:10 -0500129 struct xdr_buf *xdr = &rqst->rq_snd_buf;
Chuck Levere28ce902020-02-21 17:01:05 -0500130 struct rpcrdma_ep *ep = r_xprt->rx_ep;
Chuck Lever16f906d2017-02-08 17:00:10 -0500131 unsigned int count, remaining, offset;
Chuck Lever5457ced2015-08-03 13:03:49 -0400132
Chuck Levere28ce902020-02-21 17:01:05 -0500133 if (xdr->len > ep->re_max_inline_send)
Chuck Lever16f906d2017-02-08 17:00:10 -0500134 return false;
135
136 if (xdr->page_len) {
137 remaining = xdr->page_len;
Chuck Leverd933cc32017-06-08 11:53:16 -0400138 offset = offset_in_page(xdr->page_base);
Chuck Lever1179e2c2018-01-31 12:34:05 -0500139 count = RPCRDMA_MIN_SEND_SGES;
Chuck Lever16f906d2017-02-08 17:00:10 -0500140 while (remaining) {
141 remaining -= min_t(unsigned int,
142 PAGE_SIZE - offset, remaining);
143 offset = 0;
Chuck Levere28ce902020-02-21 17:01:05 -0500144 if (++count > ep->re_attr.cap.max_send_sge)
Chuck Lever16f906d2017-02-08 17:00:10 -0500145 return false;
146 }
147 }
148
149 return true;
Chuck Lever5457ced2015-08-03 13:03:49 -0400150}
151
152/* The client can't know how large the actual reply will be. Thus it
153 * plans for the largest possible reply for that particular ULP
154 * operation. If the maximum combined reply message size exceeds that
155 * limit, the client must provide a write list or a reply chunk for
156 * this request.
157 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400158static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
159 struct rpc_rqst *rqst)
Chuck Lever5457ced2015-08-03 13:03:49 -0400160{
Chuck Levere28ce902020-02-21 17:01:05 -0500161 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
Chuck Lever5457ced2015-08-03 13:03:49 -0400162}
163
Chuck Leverd4550bb2019-02-11 11:23:49 -0500164/* The client is required to provide a Reply chunk if the maximum
165 * size of the non-payload part of the RPC Reply is larger than
166 * the inline threshold.
167 */
168static bool
169rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
170 const struct rpc_rqst *rqst)
171{
172 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500173
Chuck Lever94087e92019-04-24 09:40:20 -0400174 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
Chuck Levere28ce902020-02-21 17:01:05 -0500175 r_xprt->rx_ep->re_max_inline_recv;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500176}
177
Chuck Lever15261b92020-12-08 18:29:02 -0500178/* ACL likes to be lazy in allocating pages. For TCP, these
179 * pages can be allocated during receive processing. Not true
180 * for RDMA, which must always provision receive buffers
181 * up front.
182 */
183static noinline int
184rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
185{
186 struct page **ppages;
187 int len;
188
189 len = buf->page_len;
190 ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
191 while (len > 0) {
192 if (!*ppages)
193 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
194 if (!*ppages)
195 return -ENOBUFS;
196 ppages++;
197 len -= PAGE_SIZE;
198 }
199
200 return 0;
201}
202
Chuck Lever9929f4a2021-02-04 11:59:01 -0500203/* Convert @vec to a single SGL element.
Chuck Lever28d9d562017-08-14 15:38:22 -0400204 *
205 * Returns pointer to next available SGE, and bumps the total number
206 * of SGEs consumed.
Chuck Lever821c7912016-03-04 11:27:52 -0500207 */
Chuck Lever28d9d562017-08-14 15:38:22 -0400208static struct rpcrdma_mr_seg *
209rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
210 unsigned int *n)
Chuck Lever821c7912016-03-04 11:27:52 -0500211{
Chuck Lever54e6aec2021-02-04 11:59:07 -0500212 seg->mr_page = virt_to_page(vec->iov_base);
Chuck Lever67b16622021-02-04 11:59:13 -0500213 seg->mr_offset = offset_in_page(vec->iov_base);
Chuck Lever9929f4a2021-02-04 11:59:01 -0500214 seg->mr_len = vec->iov_len;
215 ++seg;
216 ++(*n);
Chuck Lever28d9d562017-08-14 15:38:22 -0400217 return seg;
Chuck Lever821c7912016-03-04 11:27:52 -0500218}
219
Chuck Lever28d9d562017-08-14 15:38:22 -0400220/* Convert @xdrbuf into SGEs no larger than a page each. As they
221 * are registered, these SGEs are then coalesced into RDMA segments
222 * when the selected memreg mode supports it.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400223 *
Chuck Lever28d9d562017-08-14 15:38:22 -0400224 * Returns positive number of SGEs consumed, or a negative errno.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400225 */
226
227static int
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500228rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
229 unsigned int pos, enum rpcrdma_chunktype type,
230 struct rpcrdma_mr_seg *seg)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400231{
Chuck Lever28d9d562017-08-14 15:38:22 -0400232 unsigned long page_base;
233 unsigned int len, n;
Tom Tuckerbd7ea312011-02-09 19:45:28 +0000234 struct page **ppages;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400235
Chuck Lever5ab81422016-06-29 13:54:25 -0400236 n = 0;
Chuck Lever28d9d562017-08-14 15:38:22 -0400237 if (pos == 0)
238 seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400239
Tom Tuckerbd7ea312011-02-09 19:45:28 +0000240 len = xdrbuf->page_len;
241 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
Chuck Leverd933cc32017-06-08 11:53:16 -0400242 page_base = offset_in_page(xdrbuf->page_base);
Chuck Lever28d9d562017-08-14 15:38:22 -0400243 while (len) {
Chuck Lever28d9d562017-08-14 15:38:22 -0400244 seg->mr_page = *ppages;
Chuck Lever67b16622021-02-04 11:59:13 -0500245 seg->mr_offset = page_base;
Chuck Lever28d9d562017-08-14 15:38:22 -0400246 seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
247 len -= seg->mr_len;
248 ++ppages;
249 ++seg;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400250 ++n;
Chuck Lever28d9d562017-08-14 15:38:22 -0400251 page_base = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400252 }
253
Chuck Lever21037b82021-10-05 10:17:59 -0400254 if (type == rpcrdma_readch || type == rpcrdma_writech)
Chuck Lever28d9d562017-08-14 15:38:22 -0400255 goto out;
Chuck Leverc8b920b2016-09-15 10:57:16 -0400256
Chuck Lever28d9d562017-08-14 15:38:22 -0400257 if (xdrbuf->tail[0].iov_len)
Chuck Lever9929f4a2021-02-04 11:59:01 -0500258 rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400259
Chuck Lever28d9d562017-08-14 15:38:22 -0400260out:
261 if (unlikely(n > RPCRDMA_MAX_SEGS))
262 return -EIO;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400263 return n;
264}
265
Chuck Lever39f4cd92017-08-10 12:47:36 -0400266static int
Chuck Lever96cedde2017-12-14 20:57:55 -0500267encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400268{
269 __be32 *p;
270
271 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
272 if (unlikely(!p))
273 return -EMSGSIZE;
274
Chuck Lever379c3bc2020-04-07 15:32:14 -0400275 xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400276 return 0;
277}
278
279static int
Chuck Lever96cedde2017-12-14 20:57:55 -0500280encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
Chuck Lever39f4cd92017-08-10 12:47:36 -0400281 u32 position)
282{
283 __be32 *p;
284
285 p = xdr_reserve_space(xdr, 6 * sizeof(*p));
286 if (unlikely(!p))
287 return -EMSGSIZE;
288
289 *p++ = xdr_one; /* Item present */
Chuck Lever379c3bc2020-04-07 15:32:14 -0400290 xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
291 mr->mr_offset);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400292 return 0;
293}
294
Chuck Lever3b39f522019-08-19 18:45:37 -0400295static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
296 struct rpcrdma_req *req,
297 struct rpcrdma_mr_seg *seg,
298 int nsegs, bool writing,
299 struct rpcrdma_mr **mr)
300{
Chuck Lever6dc6ec92019-08-19 18:47:10 -0400301 *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
302 if (!*mr) {
303 *mr = rpcrdma_mr_get(r_xprt);
304 if (!*mr)
305 goto out_getmr_err;
Chuck Lever6dc6ec92019-08-19 18:47:10 -0400306 (*mr)->mr_req = req;
307 }
Chuck Lever3b39f522019-08-19 18:45:37 -0400308
309 rpcrdma_mr_push(*mr, &req->rl_registered);
310 return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
311
312out_getmr_err:
Chuck Lever0307cde2020-11-09 14:39:58 -0500313 trace_xprtrdma_nomrs_err(r_xprt, req);
Chuck Lever3b39f522019-08-19 18:45:37 -0400314 xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
Chuck Lever9d2da4f2019-10-09 13:07:48 -0400315 rpcrdma_mrs_refresh(r_xprt);
Chuck Lever3b39f522019-08-19 18:45:37 -0400316 return ERR_PTR(-EAGAIN);
317}
318
Chuck Lever39f4cd92017-08-10 12:47:36 -0400319/* Register and XDR encode the Read list. Supports encoding a list of read
Chuck Lever94f58c52016-05-02 14:41:30 -0400320 * segments that belong to a single read chunk.
321 *
322 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
323 *
324 * Read chunklist (a linked list):
325 * N elements, position P (same P for all chunks of same arg!):
326 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
327 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400328 * Returns zero on success, or a negative errno if a failure occurred.
329 * @xdr is advanced to the next position in the stream.
330 *
331 * Only a single @pos value is currently supported.
Chuck Lever94f58c52016-05-02 14:41:30 -0400332 */
Chuck Lever1738de32019-08-19 18:51:03 -0400333static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
334 struct rpcrdma_req *req,
335 struct rpc_rqst *rqst,
336 enum rpcrdma_chunktype rtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400337{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400338 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever5ab81422016-06-29 13:54:25 -0400339 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500340 struct rpcrdma_mr *mr;
Chuck Lever94f58c52016-05-02 14:41:30 -0400341 unsigned int pos;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400342 int nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400343
Chuck Lever614f3c92019-10-17 14:31:53 -0400344 if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400345 goto done;
346
Chuck Lever94f58c52016-05-02 14:41:30 -0400347 pos = rqst->rq_snd_buf.head[0].iov_len;
348 if (rtype == rpcrdma_areadch)
349 pos = 0;
Chuck Lever5ab81422016-06-29 13:54:25 -0400350 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500351 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
352 rtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400353 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400354 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400355
356 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400357 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400358 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400359 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400360
Chuck Lever96cedde2017-12-14 20:57:55 -0500361 if (encode_read_segment(xdr, mr, pos) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400362 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400363
Chuck Leveraba118312018-12-19 10:59:49 -0500364 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400365 r_xprt->rx_stats.read_chunk_count++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500366 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400367 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400368
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400369done:
Chuck Lever48a124e2020-04-19 20:03:10 -0400370 if (xdr_stream_encode_item_absent(xdr) < 0)
371 return -EMSGSIZE;
372 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400373}
374
Chuck Lever39f4cd92017-08-10 12:47:36 -0400375/* Register and XDR encode the Write list. Supports encoding a list
376 * containing one array of plain segments that belong to a single
377 * write chunk.
Chuck Lever94f58c52016-05-02 14:41:30 -0400378 *
379 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
380 *
381 * Write chunklist (a list of (one) counted array):
382 * N elements:
383 * 1 - N - HLOO - HLOO - ... - HLOO - 0
384 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400385 * Returns zero on success, or a negative errno if a failure occurred.
386 * @xdr is advanced to the next position in the stream.
387 *
388 * Only a single Write chunk is currently supported.
Chuck Lever94f58c52016-05-02 14:41:30 -0400389 */
Chuck Lever1738de32019-08-19 18:51:03 -0400390static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
391 struct rpcrdma_req *req,
392 struct rpc_rqst *rqst,
393 enum rpcrdma_chunktype wtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400394{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400395 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever21037b82021-10-05 10:17:59 -0400396 struct rpcrdma_ep *ep = r_xprt->rx_ep;
Chuck Lever5ab81422016-06-29 13:54:25 -0400397 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500398 struct rpcrdma_mr *mr;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400399 int nsegs, nchunks;
Chuck Lever94f58c52016-05-02 14:41:30 -0400400 __be32 *segcount;
401
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400402 if (wtype != rpcrdma_writech)
403 goto done;
404
Chuck Lever5ab81422016-06-29 13:54:25 -0400405 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500406 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
Chuck Lever94f58c52016-05-02 14:41:30 -0400407 rqst->rq_rcv_buf.head[0].iov_len,
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500408 wtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400409 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400410 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400411
Chuck Lever5c266df2020-03-02 15:02:20 -0500412 if (xdr_stream_encode_item_present(xdr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400413 return -EMSGSIZE;
414 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
415 if (unlikely(!segcount))
416 return -EMSGSIZE;
417 /* Actual value encoded below */
Chuck Lever94f58c52016-05-02 14:41:30 -0400418
419 nchunks = 0;
420 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400421 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400422 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400423 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400424
Chuck Lever96cedde2017-12-14 20:57:55 -0500425 if (encode_rdma_segment(xdr, mr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400426 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400427
Chuck Leveraba118312018-12-19 10:59:49 -0500428 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400429 r_xprt->rx_stats.write_chunk_count++;
Chuck Leveraae23492018-01-03 15:38:09 -0500430 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
Chuck Lever94f58c52016-05-02 14:41:30 -0400431 nchunks++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500432 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400433 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400434
Chuck Lever21037b82021-10-05 10:17:59 -0400435 if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
436 if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
437 return -EMSGSIZE;
438
439 trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
440 nsegs);
441 r_xprt->rx_stats.write_chunk_count++;
442 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
443 nchunks++;
444 nsegs -= mr->mr_nents;
445 }
446
Chuck Lever94f58c52016-05-02 14:41:30 -0400447 /* Update count of segments in this Write chunk */
448 *segcount = cpu_to_be32(nchunks);
449
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400450done:
Chuck Lever48a124e2020-04-19 20:03:10 -0400451 if (xdr_stream_encode_item_absent(xdr) < 0)
452 return -EMSGSIZE;
453 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400454}
455
Chuck Lever39f4cd92017-08-10 12:47:36 -0400456/* Register and XDR encode the Reply chunk. Supports encoding an array
457 * of plain segments that belong to a single write (reply) chunk.
Chuck Lever94f58c52016-05-02 14:41:30 -0400458 *
459 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
460 *
461 * Reply chunk (a counted array):
462 * N elements:
463 * 1 - N - HLOO - HLOO - ... - HLOO
464 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400465 * Returns zero on success, or a negative errno if a failure occurred.
466 * @xdr is advanced to the next position in the stream.
Chuck Lever94f58c52016-05-02 14:41:30 -0400467 */
Chuck Lever1738de32019-08-19 18:51:03 -0400468static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
469 struct rpcrdma_req *req,
470 struct rpc_rqst *rqst,
471 enum rpcrdma_chunktype wtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400472{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400473 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever5ab81422016-06-29 13:54:25 -0400474 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500475 struct rpcrdma_mr *mr;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400476 int nsegs, nchunks;
Chuck Lever94f58c52016-05-02 14:41:30 -0400477 __be32 *segcount;
478
Chuck Lever48a124e2020-04-19 20:03:10 -0400479 if (wtype != rpcrdma_replych) {
480 if (xdr_stream_encode_item_absent(xdr) < 0)
481 return -EMSGSIZE;
482 return 0;
483 }
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400484
Chuck Lever5ab81422016-06-29 13:54:25 -0400485 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500486 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400487 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400488 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400489
Chuck Lever5c266df2020-03-02 15:02:20 -0500490 if (xdr_stream_encode_item_present(xdr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400491 return -EMSGSIZE;
492 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
493 if (unlikely(!segcount))
494 return -EMSGSIZE;
495 /* Actual value encoded below */
Chuck Lever94f58c52016-05-02 14:41:30 -0400496
497 nchunks = 0;
498 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400499 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400500 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400501 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400502
Chuck Lever96cedde2017-12-14 20:57:55 -0500503 if (encode_rdma_segment(xdr, mr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400504 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400505
Chuck Leveraba118312018-12-19 10:59:49 -0500506 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400507 r_xprt->rx_stats.reply_chunk_count++;
Chuck Leveraae23492018-01-03 15:38:09 -0500508 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
Chuck Lever94f58c52016-05-02 14:41:30 -0400509 nchunks++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500510 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400511 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400512
513 /* Update count of segments in the Reply chunk */
514 *segcount = cpu_to_be32(nchunks);
515
Chuck Lever39f4cd92017-08-10 12:47:36 -0400516 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400517}
518
Chuck Lever0ab11522019-06-19 10:33:15 -0400519static void rpcrdma_sendctx_done(struct kref *kref)
520{
521 struct rpcrdma_req *req =
522 container_of(kref, struct rpcrdma_req, rl_kref);
523 struct rpcrdma_rep *rep = req->rl_reply;
524
525 rpcrdma_complete_rqst(rep);
526 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
527}
528
Chuck Lever394b2c72017-10-20 10:47:47 -0400529/**
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400530 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
Chuck Leverae729502017-10-20 10:48:12 -0400531 * @sc: sendctx containing SGEs to unmap
Chuck Lever394b2c72017-10-20 10:47:47 -0400532 *
533 */
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400534void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
Chuck Lever394b2c72017-10-20 10:47:47 -0400535{
Chuck Leverb5cde6a2019-10-17 14:31:27 -0400536 struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
Chuck Lever394b2c72017-10-20 10:47:47 -0400537 struct ib_sge *sge;
Chuck Lever394b2c72017-10-20 10:47:47 -0400538
Chuck Lever0ab11522019-06-19 10:33:15 -0400539 if (!sc->sc_unmap_count)
540 return;
541
Chuck Lever394b2c72017-10-20 10:47:47 -0400542 /* The first two SGEs contain the transport header and
543 * the inline buffer. These are always left mapped so
544 * they can be cheaply re-used.
545 */
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400546 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
547 ++sge, --sc->sc_unmap_count)
Chuck Leverb5cde6a2019-10-17 14:31:27 -0400548 ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400549 DMA_TO_DEVICE);
Chuck Lever01bb35c2017-10-20 10:48:36 -0400550
Chuck Lever0ab11522019-06-19 10:33:15 -0400551 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
Chuck Lever394b2c72017-10-20 10:47:47 -0400552}
553
Chuck Levera062a2a2017-10-20 10:48:03 -0400554/* Prepare an SGE for the RPC-over-RDMA transport header.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400555 */
Chuck Leverb78de1d2020-01-03 11:56:53 -0500556static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
Chuck Leverd2832af2019-04-24 09:39:32 -0400557 struct rpcrdma_req *req, u32 len)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400558{
Chuck Leverae729502017-10-20 10:48:12 -0400559 struct rpcrdma_sendctx *sc = req->rl_sendctx;
Chuck Lever655fec62016-09-15 10:57:24 -0400560 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400561 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400562
Chuck Levera062a2a2017-10-20 10:48:03 -0400563 sge->addr = rdmab_addr(rb);
Chuck Lever655fec62016-09-15 10:57:24 -0400564 sge->length = len;
Chuck Levera062a2a2017-10-20 10:48:03 -0400565 sge->lkey = rdmab_lkey(rb);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400566
Chuck Leverd2832af2019-04-24 09:39:32 -0400567 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
568 DMA_TO_DEVICE);
Chuck Leverd6764bb2019-10-17 14:31:44 -0400569}
570
571/* The head iovec is straightforward, as it is usually already
572 * DMA-mapped. Sync the content that has changed.
573 */
574static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
575 struct rpcrdma_req *req, unsigned int len)
576{
577 struct rpcrdma_sendctx *sc = req->rl_sendctx;
578 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
579 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
580
581 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
582 return false;
583
584 sge->addr = rdmab_addr(rb);
585 sge->length = len;
586 sge->lkey = rdmab_lkey(rb);
587
588 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
589 DMA_TO_DEVICE);
590 return true;
591}
592
593/* If there is a page list present, DMA map and prepare an
594 * SGE for each page to be sent.
595 */
596static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
597 struct xdr_buf *xdr)
598{
599 struct rpcrdma_sendctx *sc = req->rl_sendctx;
600 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
601 unsigned int page_base, len, remaining;
602 struct page **ppages;
603 struct ib_sge *sge;
604
605 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
606 page_base = offset_in_page(xdr->page_base);
607 remaining = xdr->page_len;
608 while (remaining) {
609 sge = &sc->sc_sges[req->rl_wr.num_sge++];
610 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
611 sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
612 page_base, len, DMA_TO_DEVICE);
613 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
614 goto out_mapping_err;
615
616 sge->length = len;
617 sge->lkey = rdmab_lkey(rb);
618
619 sc->sc_unmap_count++;
620 ppages++;
621 remaining -= len;
622 page_base = 0;
623 }
624
Chuck Lever655fec62016-09-15 10:57:24 -0400625 return true;
Chuck Lever857f9ac2017-10-20 10:47:55 -0400626
Chuck Leverd6764bb2019-10-17 14:31:44 -0400627out_mapping_err:
628 trace_xprtrdma_dma_maperr(sge->addr);
Chuck Lever857f9ac2017-10-20 10:47:55 -0400629 return false;
Chuck Lever655fec62016-09-15 10:57:24 -0400630}
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400631
Chuck Leverae605ee2021-05-26 15:35:20 -0400632/* The tail iovec may include an XDR pad for the page list,
633 * as well as additional content, and may not reside in the
634 * same page as the head iovec.
Chuck Lever655fec62016-09-15 10:57:24 -0400635 */
Chuck Leverd6764bb2019-10-17 14:31:44 -0400636static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
Chuck Leverd2832af2019-04-24 09:39:32 -0400637 struct xdr_buf *xdr,
Chuck Leverd6764bb2019-10-17 14:31:44 -0400638 unsigned int page_base, unsigned int len)
Chuck Lever655fec62016-09-15 10:57:24 -0400639{
Chuck Leverae729502017-10-20 10:48:12 -0400640 struct rpcrdma_sendctx *sc = req->rl_sendctx;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400641 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
Chuck Lever655fec62016-09-15 10:57:24 -0400642 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400643 struct page *page = virt_to_page(xdr->tail[0].iov_base);
Tom Talpeyb38ab402009-03-11 14:37:55 -0400644
Chuck Leverd6764bb2019-10-17 14:31:44 -0400645 sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
646 DMA_TO_DEVICE);
647 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
648 goto out_mapping_err;
Chuck Lever655fec62016-09-15 10:57:24 -0400649
Chuck Leverd6764bb2019-10-17 14:31:44 -0400650 sge->length = len;
651 sge->lkey = rdmab_lkey(rb);
652 ++sc->sc_unmap_count;
Chuck Lever655fec62016-09-15 10:57:24 -0400653 return true;
654
Chuck Lever655fec62016-09-15 10:57:24 -0400655out_mapping_err:
Chuck Leverd6764bb2019-10-17 14:31:44 -0400656 trace_xprtrdma_dma_maperr(sge->addr);
Chuck Lever655fec62016-09-15 10:57:24 -0400657 return false;
658}
659
Chuck Lever614f3c92019-10-17 14:31:53 -0400660/* Copy the tail to the end of the head buffer.
661 */
662static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
663 struct rpcrdma_req *req,
664 struct xdr_buf *xdr)
665{
666 unsigned char *dst;
667
668 dst = (unsigned char *)xdr->head[0].iov_base;
669 dst += xdr->head[0].iov_len + xdr->page_len;
670 memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
671 r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
672}
673
674/* Copy pagelist content into the head buffer.
675 */
676static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
677 struct rpcrdma_req *req,
678 struct xdr_buf *xdr)
679{
680 unsigned int len, page_base, remaining;
681 struct page **ppages;
682 unsigned char *src, *dst;
683
684 dst = (unsigned char *)xdr->head[0].iov_base;
685 dst += xdr->head[0].iov_len;
686 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
687 page_base = offset_in_page(xdr->page_base);
688 remaining = xdr->page_len;
689 while (remaining) {
690 src = page_address(*ppages);
691 src += page_base;
692 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
693 memcpy(dst, src, len);
694 r_xprt->rx_stats.pullup_copy_count += len;
695
696 ppages++;
697 dst += len;
698 remaining -= len;
699 page_base = 0;
700 }
701}
702
703/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
704 * When the head, pagelist, and tail are small, a pull-up copy
705 * is considerably less costly than DMA mapping the components
706 * of @xdr.
707 *
708 * Assumptions:
709 * - the caller has already verified that the total length
710 * of the RPC Call body will fit into @rl_sendbuf.
711 */
712static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
713 struct rpcrdma_req *req,
714 struct xdr_buf *xdr)
715{
716 if (unlikely(xdr->tail[0].iov_len))
717 rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
718
719 if (unlikely(xdr->page_len))
720 rpcrdma_pullup_pagelist(r_xprt, req, xdr);
721
722 /* The whole RPC message resides in the head iovec now */
723 return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
724}
725
Chuck Leverd6764bb2019-10-17 14:31:44 -0400726static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
727 struct rpcrdma_req *req,
728 struct xdr_buf *xdr)
729{
730 struct kvec *tail = &xdr->tail[0];
731
732 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
733 return false;
734 if (xdr->page_len)
735 if (!rpcrdma_prepare_pagelist(req, xdr))
736 return false;
737 if (tail->iov_len)
738 if (!rpcrdma_prepare_tail_iov(req, xdr,
739 offset_in_page(tail->iov_base),
740 tail->iov_len))
741 return false;
742
743 if (req->rl_sendctx->sc_unmap_count)
744 kref_get(&req->rl_kref);
745 return true;
746}
747
748static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
749 struct rpcrdma_req *req,
750 struct xdr_buf *xdr)
751{
752 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
753 return false;
754
Chuck Leverae605ee2021-05-26 15:35:20 -0400755 /* If there is a Read chunk, the page list is being handled
Chuck Leverd6764bb2019-10-17 14:31:44 -0400756 * via explicit RDMA, and thus is skipped here.
757 */
758
Chuck Leverae605ee2021-05-26 15:35:20 -0400759 /* Do not include the tail if it is only an XDR pad */
760 if (xdr->tail[0].iov_len > 3) {
761 unsigned int page_base, len;
762
763 /* If the content in the page list is an odd length,
764 * xdr_write_pages() adds a pad at the beginning of
765 * the tail iovec. Force the tail's non-pad content to
766 * land at the next XDR position in the Send message.
767 */
768 page_base = offset_in_page(xdr->tail[0].iov_base);
769 len = xdr->tail[0].iov_len;
770 page_base += len & 3;
771 len -= len & 3;
772 if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
Chuck Leverd6764bb2019-10-17 14:31:44 -0400773 return false;
774 kref_get(&req->rl_kref);
775 }
776
777 return true;
778}
779
Chuck Lever857f9ac2017-10-20 10:47:55 -0400780/**
781 * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
782 * @r_xprt: controlling transport
783 * @req: context of RPC Call being marshalled
784 * @hdrlen: size of transport header, in bytes
785 * @xdr: xdr_buf containing RPC Call
786 * @rtype: chunk type being encoded
787 *
788 * Returns 0 on success; otherwise a negative errno is returned.
789 */
Chuck Leverd6764bb2019-10-17 14:31:44 -0400790inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
791 struct rpcrdma_req *req, u32 hdrlen,
792 struct xdr_buf *xdr,
793 enum rpcrdma_chunktype rtype)
Chuck Lever655fec62016-09-15 10:57:24 -0400794{
Chuck Lever05eb06d2019-06-19 10:32:48 -0400795 int ret;
796
797 ret = -EAGAIN;
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400798 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
Chuck Leverae729502017-10-20 10:48:12 -0400799 if (!req->rl_sendctx)
Chuck Leverd6764bb2019-10-17 14:31:44 -0400800 goto out_nosc;
Chuck Leverae729502017-10-20 10:48:12 -0400801 req->rl_sendctx->sc_unmap_count = 0;
Chuck Lever01bb35c2017-10-20 10:48:36 -0400802 req->rl_sendctx->sc_req = req;
Chuck Lever0ab11522019-06-19 10:33:15 -0400803 kref_init(&req->rl_kref);
Chuck Leverdc15c3d2019-10-17 14:31:35 -0400804 req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
805 req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
806 req->rl_wr.num_sge = 0;
807 req->rl_wr.opcode = IB_WR_SEND;
Chuck Lever655fec62016-09-15 10:57:24 -0400808
Chuck Leverb78de1d2020-01-03 11:56:53 -0500809 rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
Chuck Leverd6764bb2019-10-17 14:31:44 -0400810
Chuck Leverb78de1d2020-01-03 11:56:53 -0500811 ret = -EIO;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400812 switch (rtype) {
Chuck Lever614f3c92019-10-17 14:31:53 -0400813 case rpcrdma_noch_pullup:
814 if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
815 goto out_unmap;
816 break;
817 case rpcrdma_noch_mapped:
Chuck Leverd6764bb2019-10-17 14:31:44 -0400818 if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
819 goto out_unmap;
820 break;
821 case rpcrdma_readch:
822 if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
823 goto out_unmap;
824 break;
825 case rpcrdma_areadch:
826 break;
827 default:
828 goto out_unmap;
829 }
830
Chuck Lever857f9ac2017-10-20 10:47:55 -0400831 return 0;
Chuck Lever05eb06d2019-06-19 10:32:48 -0400832
Chuck Leverd6764bb2019-10-17 14:31:44 -0400833out_unmap:
834 rpcrdma_sendctx_unmap(req->rl_sendctx);
835out_nosc:
Chuck Lever05eb06d2019-06-19 10:32:48 -0400836 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
837 return ret;
Chuck Lever655fec62016-09-15 10:57:24 -0400838}
839
Chuck Lever09e60642017-08-10 12:47:12 -0400840/**
841 * rpcrdma_marshal_req - Marshal and send one RPC request
842 * @r_xprt: controlling transport
843 * @rqst: RPC request to be marshaled
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400844 *
Chuck Lever09e60642017-08-10 12:47:12 -0400845 * For the RPC in "rqst", this function:
846 * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
847 * - Registers Read, Write, and Reply chunks
848 * - Constructs the transport header
849 * - Posts a Send WR to send the transport header and request
850 *
851 * Returns:
852 * %0 if the RPC was sent successfully,
853 * %-ENOTCONN if the connection was lost,
Chuck Lever9e679d52018-02-28 15:30:44 -0500854 * %-EAGAIN if the caller should call again with the same arguments,
855 * %-ENOBUFS if the caller should call again after a delay,
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400856 * %-EMSGSIZE if the transport header is too small,
Chuck Lever09e60642017-08-10 12:47:12 -0400857 * %-EIO if a permanent problem occurred while marshaling.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400858 */
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400859int
Chuck Lever09e60642017-08-10 12:47:12 -0400860rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400861{
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400862 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400863 struct xdr_stream *xdr = &req->rl_stream;
Chuck Levere2377942015-03-30 14:33:53 -0400864 enum rpcrdma_chunktype rtype, wtype;
Chuck Lever614f3c92019-10-17 14:31:53 -0400865 struct xdr_buf *buf = &rqst->rq_snd_buf;
Chuck Lever65b80172016-06-29 13:55:06 -0400866 bool ddp_allowed;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400867 __be32 *p;
Chuck Lever39f4cd92017-08-10 12:47:36 -0400868 int ret;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400869
Chuck Lever15261b92020-12-08 18:29:02 -0500870 if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
871 ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
872 if (ret)
873 return ret;
874 }
875
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400876 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
Chuck Lever8cec3db2019-04-24 09:39:16 -0400877 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
878 rqst);
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400879
880 /* Fixed header fields */
Chuck Lever39f4cd92017-08-10 12:47:36 -0400881 ret = -EMSGSIZE;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400882 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
883 if (!p)
884 goto out_err;
885 *p++ = rqst->rq_xid;
886 *p++ = rpcrdma_version;
Chuck Lever7581d902020-01-03 11:56:37 -0500887 *p++ = r_xprt->rx_buf.rb_max_requests;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400888
Chuck Lever65b80172016-06-29 13:55:06 -0400889 /* When the ULP employs a GSS flavor that guarantees integrity
890 * or privacy, direct data placement of individual data items
891 * is not allowed.
892 */
Chuck Lever53bc19f2020-05-12 17:13:01 -0400893 ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
894 &rqst->rq_cred->cr_auth->au_flags);
Chuck Lever65b80172016-06-29 13:55:06 -0400895
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400896 /*
897 * Chunks needed for results?
898 *
899 * o If the expected result is under the inline threshold, all ops
Chuck Lever33943b22015-08-03 13:04:08 -0400900 * return as inline.
Chuck Levercce6dee2016-05-02 14:41:14 -0400901 * o Large read ops return data as write chunk(s), header as
902 * inline.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400903 * o Large non-read ops return as a single reply chunk.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400904 */
Chuck Levercce6dee2016-05-02 14:41:14 -0400905 if (rpcrdma_results_inline(r_xprt, rqst))
Chuck Lever02eb57d82015-08-03 13:03:58 -0400906 wtype = rpcrdma_noch;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500907 else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
908 rpcrdma_nonpayload_inline(r_xprt, rqst))
Chuck Levercce6dee2016-05-02 14:41:14 -0400909 wtype = rpcrdma_writech;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400910 else
Chuck Levere2377942015-03-30 14:33:53 -0400911 wtype = rpcrdma_replych;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400912
913 /*
914 * Chunks needed for arguments?
915 *
916 * o If the total request is under the inline threshold, all ops
917 * are sent as inline.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400918 * o Large write ops transmit data as read chunk(s), header as
919 * inline.
Chuck Lever2fcc2132015-08-03 13:04:26 -0400920 * o Large non-write ops are sent with the entire message as a
921 * single read chunk (protocol 0-position special case).
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400922 *
Chuck Lever2fcc2132015-08-03 13:04:26 -0400923 * This assumes that the upper layer does not present a request
924 * that both has a data payload, and whose non-data arguments
925 * by themselves are larger than the inline threshold.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400926 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400927 if (rpcrdma_args_inline(r_xprt, rqst)) {
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400928 *p++ = rdma_msg;
Chuck Lever614f3c92019-10-17 14:31:53 -0400929 rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
930 rpcrdma_noch_pullup : rpcrdma_noch_mapped;
931 } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400932 *p++ = rdma_msg;
Chuck Levere2377942015-03-30 14:33:53 -0400933 rtype = rpcrdma_readch;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400934 } else {
Chuck Lever860477d2015-08-03 13:04:45 -0400935 r_xprt->rx_stats.nomsg_call_count++;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400936 *p++ = rdma_nomsg;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400937 rtype = rpcrdma_areadch;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400938 }
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400939
Chuck Lever94f58c52016-05-02 14:41:30 -0400940 /* This implementation supports the following combinations
941 * of chunk lists in one RPC-over-RDMA Call message:
942 *
943 * - Read list
944 * - Write list
945 * - Reply chunk
946 * - Read list + Reply chunk
947 *
948 * It might not yet support the following combinations:
949 *
950 * - Read list + Write list
951 *
952 * It does not support the following combinations:
953 *
954 * - Write list + Reply chunk
955 * - Read list + Write list + Reply chunk
956 *
957 * This implementation supports only a single chunk in each
958 * Read or Write list. Thus for example the client cannot
959 * send a Call message with a Position Zero Read chunk and a
960 * regular Read chunk at the same time.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400961 */
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400962 ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400963 if (ret)
Chuck Lever18c0fb32017-02-08 17:00:27 -0500964 goto out_err;
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400965 ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400966 if (ret)
967 goto out_err;
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400968 ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
Chuck Lever18c0fb32017-02-08 17:00:27 -0500969 if (ret)
Chuck Lever94f58c52016-05-02 14:41:30 -0400970 goto out_err;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400971
Chuck Lever13100512019-06-19 10:32:43 -0400972 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
Chuck Lever614f3c92019-10-17 14:31:53 -0400973 buf, rtype);
Chuck Lever857f9ac2017-10-20 10:47:55 -0400974 if (ret)
Chuck Lever18c0fb32017-02-08 17:00:27 -0500975 goto out_err;
Chuck Lever13100512019-06-19 10:32:43 -0400976
977 trace_xprtrdma_marshal(req, rtype, wtype);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400978 return 0;
Chuck Lever302d3de2016-05-02 14:41:05 -0400979
Chuck Lever18c0fb32017-02-08 17:00:27 -0500980out_err:
Chuck Lever17e4c442019-04-24 09:39:48 -0400981 trace_xprtrdma_marshal_failed(rqst, ret);
Chuck Lever05eb06d2019-06-19 10:32:48 -0400982 r_xprt->rx_stats.failed_marshal_count++;
Chuck Lever40088f02019-06-19 10:33:04 -0400983 frwr_reset(req);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400984 return ret;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400985}
986
Chuck Levereea63ca2019-10-09 13:07:32 -0400987static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
988 struct rpcrdma_buffer *buf,
989 u32 grant)
990{
991 buf->rb_credits = grant;
992 xprt->cwnd = grant << RPC_CWNDSHIFT;
993}
994
995static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
996{
997 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
998
999 spin_lock(&xprt->transport_lock);
1000 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1001 spin_unlock(&xprt->transport_lock);
1002}
1003
1004/**
1005 * rpcrdma_reset_cwnd - Reset the xprt's congestion window
1006 * @r_xprt: controlling transport instance
1007 *
1008 * Prepare @r_xprt for the next connection by reinitializing
1009 * its credit grant to one (see RFC 8166, Section 3.3.3).
1010 */
1011void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1012{
1013 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1014
1015 spin_lock(&xprt->transport_lock);
1016 xprt->cong = 0;
1017 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1018 spin_unlock(&xprt->transport_lock);
1019}
1020
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001021/**
1022 * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
1023 * @rqst: controlling RPC request
1024 * @srcp: points to RPC message payload in receive buffer
1025 * @copy_len: remaining length of receive buffer content
1026 * @pad: Write chunk pad bytes needed (zero for pure inline)
1027 *
1028 * The upper layer has set the maximum number of bytes it can
1029 * receive in each component of rq_rcv_buf. These values are set in
1030 * the head.iov_len, page_len, tail.iov_len, and buflen fields.
Chuck Levercfabe2c2016-06-29 13:54:49 -04001031 *
1032 * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in
1033 * many cases this function simply updates iov_base pointers in
1034 * rq_rcv_buf to point directly to the received reply data, to
1035 * avoid copying reply data.
Chuck Lever64695bde2016-06-29 13:54:58 -04001036 *
1037 * Returns the count of bytes which had to be memcopied.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001038 */
Chuck Lever64695bde2016-06-29 13:54:58 -04001039static unsigned long
Tom Talpey9191ca32008-10-09 15:01:11 -04001040rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001041{
Chuck Lever64695bde2016-06-29 13:54:58 -04001042 unsigned long fixup_copy_count;
1043 int i, npages, curlen;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001044 char *destp;
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001045 struct page **ppages;
1046 int page_base;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001047
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001048 /* The head iovec is redirected to the RPC reply message
1049 * in the receive buffer, to avoid a memcopy.
1050 */
1051 rqst->rq_rcv_buf.head[0].iov_base = srcp;
Chuck Levercfabe2c2016-06-29 13:54:49 -04001052 rqst->rq_private_buf.head[0].iov_base = srcp;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001053
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001054 /* The contents of the receive buffer that follow
1055 * head.iov_len bytes are copied into the page list.
1056 */
1057 curlen = rqst->rq_rcv_buf.head[0].iov_len;
1058 if (curlen > copy_len)
1059 curlen = copy_len;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001060 srcp += curlen;
1061 copy_len -= curlen;
1062
Chuck Leverd933cc32017-06-08 11:53:16 -04001063 ppages = rqst->rq_rcv_buf.pages +
1064 (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1065 page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
Chuck Lever64695bde2016-06-29 13:54:58 -04001066 fixup_copy_count = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001067 if (copy_len && rqst->rq_rcv_buf.page_len) {
Chuck Lever80414ab2016-06-29 13:54:33 -04001068 int pagelist_len;
1069
1070 pagelist_len = rqst->rq_rcv_buf.page_len;
1071 if (pagelist_len > copy_len)
1072 pagelist_len = copy_len;
1073 npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
Chuck Lever64695bde2016-06-29 13:54:58 -04001074 for (i = 0; i < npages; i++) {
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001075 curlen = PAGE_SIZE - page_base;
Chuck Lever80414ab2016-06-29 13:54:33 -04001076 if (curlen > pagelist_len)
1077 curlen = pagelist_len;
1078
Cong Wangb8541782011-11-25 23:14:40 +08001079 destp = kmap_atomic(ppages[i]);
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001080 memcpy(destp + page_base, srcp, curlen);
1081 flush_dcache_page(ppages[i]);
Cong Wangb8541782011-11-25 23:14:40 +08001082 kunmap_atomic(destp);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001083 srcp += curlen;
1084 copy_len -= curlen;
Chuck Lever64695bde2016-06-29 13:54:58 -04001085 fixup_copy_count += curlen;
Chuck Lever80414ab2016-06-29 13:54:33 -04001086 pagelist_len -= curlen;
1087 if (!pagelist_len)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001088 break;
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001089 page_base = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001090 }
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001091
1092 /* Implicit padding for the last segment in a Write
1093 * chunk is inserted inline at the front of the tail
1094 * iovec. The upper layer ignores the content of
1095 * the pad. Simply ensure inline content in the tail
1096 * that follows the Write chunk is properly aligned.
1097 */
1098 if (pad)
1099 srcp -= pad;
Chuck Lever2b7bbc92014-03-12 12:51:30 -04001100 }
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001101
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001102 /* The tail iovec is redirected to the remaining data
1103 * in the receive buffer, to avoid a memcopy.
1104 */
Chuck Levercfabe2c2016-06-29 13:54:49 -04001105 if (copy_len || pad) {
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001106 rqst->rq_rcv_buf.tail[0].iov_base = srcp;
Chuck Levercfabe2c2016-06-29 13:54:49 -04001107 rqst->rq_private_buf.tail[0].iov_base = srcp;
1108 }
Tom Talpey9191ca32008-10-09 15:01:11 -04001109
Chuck Leverd4957f02019-10-23 10:02:03 -04001110 if (fixup_copy_count)
1111 trace_xprtrdma_fixup(rqst, fixup_copy_count);
Chuck Lever64695bde2016-06-29 13:54:58 -04001112 return fixup_copy_count;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001113}
1114
Chuck Lever63cae472015-10-24 17:28:08 -04001115/* By convention, backchannel calls arrive via rdma_msg type
1116 * messages, and never populate the chunk lists. This makes
1117 * the RPC/RDMA header small and fixed in size, so it is
1118 * straightforward to check the RPC header's direction field.
1119 */
1120static bool
Chuck Lever5381e0e2017-10-16 15:01:14 -04001121rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
Chuck Lever41c8f702017-08-03 14:30:11 -04001122#if defined(CONFIG_SUNRPC_BACKCHANNEL)
Chuck Lever63cae472015-10-24 17:28:08 -04001123{
Chuck Lever41c8f702017-08-03 14:30:11 -04001124 struct xdr_stream *xdr = &rep->rr_stream;
1125 __be32 *p;
Chuck Lever63cae472015-10-24 17:28:08 -04001126
Chuck Lever5381e0e2017-10-16 15:01:14 -04001127 if (rep->rr_proc != rdma_msg)
Chuck Lever63cae472015-10-24 17:28:08 -04001128 return false;
1129
Chuck Lever41c8f702017-08-03 14:30:11 -04001130 /* Peek at stream contents without advancing. */
1131 p = xdr_inline_decode(xdr, 0);
1132
1133 /* Chunk lists */
Chuck Lever07e9a632020-03-28 13:43:22 -04001134 if (xdr_item_is_present(p++))
Chuck Lever63cae472015-10-24 17:28:08 -04001135 return false;
Chuck Lever07e9a632020-03-28 13:43:22 -04001136 if (xdr_item_is_present(p++))
Chuck Lever41c8f702017-08-03 14:30:11 -04001137 return false;
Chuck Lever07e9a632020-03-28 13:43:22 -04001138 if (xdr_item_is_present(p++))
Chuck Lever63cae472015-10-24 17:28:08 -04001139 return false;
1140
Chuck Lever41c8f702017-08-03 14:30:11 -04001141 /* RPC header */
Chuck Lever5381e0e2017-10-16 15:01:14 -04001142 if (*p++ != rep->rr_xid)
Chuck Lever41c8f702017-08-03 14:30:11 -04001143 return false;
1144 if (*p != cpu_to_be32(RPC_CALL))
1145 return false;
1146
1147 /* Now that we are sure this is a backchannel call,
1148 * advance to the RPC header.
1149 */
1150 p = xdr_inline_decode(xdr, 3 * sizeof(*p));
1151 if (unlikely(!p))
Chuck Lever84dff5e2021-02-04 11:59:19 -05001152 return true;
Chuck Lever41c8f702017-08-03 14:30:11 -04001153
1154 rpcrdma_bc_receive_call(r_xprt, rep);
Chuck Lever63cae472015-10-24 17:28:08 -04001155 return true;
1156}
Chuck Lever41c8f702017-08-03 14:30:11 -04001157#else /* CONFIG_SUNRPC_BACKCHANNEL */
1158{
1159 return false;
Chuck Lever63cae472015-10-24 17:28:08 -04001160}
1161#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1162
Chuck Lever264b0cd2017-08-03 14:30:27 -04001163static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1164{
Chuck Levere11b7c92017-12-20 16:31:04 -05001165 u32 handle;
1166 u64 offset;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001167 __be32 *p;
1168
1169 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1170 if (unlikely(!p))
1171 return -EIO;
1172
Chuck Leverf60a0862020-03-29 16:44:13 -04001173 xdr_decode_rdma_segment(p, &handle, length, &offset);
Chuck Levere11b7c92017-12-20 16:31:04 -05001174 trace_xprtrdma_decode_seg(handle, *length, offset);
Chuck Lever264b0cd2017-08-03 14:30:27 -04001175 return 0;
1176}
1177
1178static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1179{
1180 u32 segcount, seglength;
1181 __be32 *p;
1182
1183 p = xdr_inline_decode(xdr, sizeof(*p));
1184 if (unlikely(!p))
1185 return -EIO;
1186
1187 *length = 0;
1188 segcount = be32_to_cpup(p);
1189 while (segcount--) {
1190 if (decode_rdma_segment(xdr, &seglength))
1191 return -EIO;
1192 *length += seglength;
1193 }
1194
Chuck Lever264b0cd2017-08-03 14:30:27 -04001195 return 0;
1196}
1197
1198/* In RPC-over-RDMA Version One replies, a Read list is never
1199 * expected. This decoder is a stub that returns an error if
1200 * a Read list is present.
1201 */
1202static int decode_read_list(struct xdr_stream *xdr)
1203{
1204 __be32 *p;
1205
1206 p = xdr_inline_decode(xdr, sizeof(*p));
1207 if (unlikely(!p))
1208 return -EIO;
Chuck Lever07e9a632020-03-28 13:43:22 -04001209 if (unlikely(xdr_item_is_present(p)))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001210 return -EIO;
1211 return 0;
1212}
1213
1214/* Supports only one Write chunk in the Write list
1215 */
1216static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1217{
1218 u32 chunklen;
1219 bool first;
1220 __be32 *p;
1221
1222 *length = 0;
1223 first = true;
1224 do {
1225 p = xdr_inline_decode(xdr, sizeof(*p));
1226 if (unlikely(!p))
1227 return -EIO;
Chuck Lever07e9a632020-03-28 13:43:22 -04001228 if (xdr_item_is_absent(p))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001229 break;
1230 if (!first)
1231 return -EIO;
1232
1233 if (decode_write_chunk(xdr, &chunklen))
1234 return -EIO;
1235 *length += chunklen;
1236 first = false;
1237 } while (true);
1238 return 0;
1239}
1240
1241static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1242{
1243 __be32 *p;
1244
1245 p = xdr_inline_decode(xdr, sizeof(*p));
1246 if (unlikely(!p))
1247 return -EIO;
1248
1249 *length = 0;
Chuck Lever07e9a632020-03-28 13:43:22 -04001250 if (xdr_item_is_present(p))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001251 if (decode_write_chunk(xdr, length))
1252 return -EIO;
1253 return 0;
1254}
1255
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001256static int
1257rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1258 struct rpc_rqst *rqst)
1259{
1260 struct xdr_stream *xdr = &rep->rr_stream;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001261 u32 writelist, replychunk, rpclen;
1262 char *base;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001263
Chuck Lever264b0cd2017-08-03 14:30:27 -04001264 /* Decode the chunk lists */
1265 if (decode_read_list(xdr))
1266 return -EIO;
1267 if (decode_write_list(xdr, &writelist))
1268 return -EIO;
1269 if (decode_reply_chunk(xdr, &replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001270 return -EIO;
1271
Chuck Lever264b0cd2017-08-03 14:30:27 -04001272 /* RDMA_MSG sanity checks */
1273 if (unlikely(replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001274 return -EIO;
1275
Chuck Lever264b0cd2017-08-03 14:30:27 -04001276 /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
1277 base = (char *)xdr_inline_decode(xdr, 0);
1278 rpclen = xdr_stream_remaining(xdr);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001279 r_xprt->rx_stats.fixup_copy_count +=
Chuck Lever264b0cd2017-08-03 14:30:27 -04001280 rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001281
Chuck Lever264b0cd2017-08-03 14:30:27 -04001282 r_xprt->rx_stats.total_rdma_reply += writelist;
1283 return rpclen + xdr_align_size(writelist);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001284}
1285
1286static noinline int
1287rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1288{
1289 struct xdr_stream *xdr = &rep->rr_stream;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001290 u32 writelist, replychunk;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001291
Chuck Lever264b0cd2017-08-03 14:30:27 -04001292 /* Decode the chunk lists */
1293 if (decode_read_list(xdr))
1294 return -EIO;
1295 if (decode_write_list(xdr, &writelist))
1296 return -EIO;
1297 if (decode_reply_chunk(xdr, &replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001298 return -EIO;
1299
Chuck Lever264b0cd2017-08-03 14:30:27 -04001300 /* RDMA_NOMSG sanity checks */
1301 if (unlikely(writelist))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001302 return -EIO;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001303 if (unlikely(!replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001304 return -EIO;
1305
Chuck Lever264b0cd2017-08-03 14:30:27 -04001306 /* Reply chunk buffer already is the reply vector */
1307 r_xprt->rx_stats.total_rdma_reply += replychunk;
1308 return replychunk;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001309}
1310
1311static noinline int
1312rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1313 struct rpc_rqst *rqst)
1314{
1315 struct xdr_stream *xdr = &rep->rr_stream;
1316 __be32 *p;
1317
1318 p = xdr_inline_decode(xdr, sizeof(*p));
1319 if (unlikely(!p))
1320 return -EIO;
1321
1322 switch (*p) {
1323 case err_vers:
1324 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1325 if (!p)
1326 break;
Chuck Lever3821e232020-11-09 14:39:15 -05001327 trace_xprtrdma_err_vers(rqst, p, p + 1);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001328 break;
1329 case err_chunk:
Chuck Lever3821e232020-11-09 14:39:15 -05001330 trace_xprtrdma_err_chunk(rqst);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001331 break;
1332 default:
Chuck Lever3821e232020-11-09 14:39:15 -05001333 trace_xprtrdma_err_unrecognized(rqst, p);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001334 }
1335
Chuck Lever7b2182e2020-06-15 09:21:13 -04001336 return -EIO;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001337}
1338
Chuck Lever8a053432021-04-19 14:03:19 -04001339/**
1340 * rpcrdma_unpin_rqst - Release rqst without completing it
1341 * @rep: RPC/RDMA Receive context
1342 *
1343 * This is done when a connection is lost so that a Reply
1344 * can be dropped and its matching Call can be subsequently
1345 * retransmitted on a new connection.
1346 */
1347void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
1348{
1349 struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
1350 struct rpc_rqst *rqst = rep->rr_rqst;
1351 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
1352
1353 req->rl_reply = NULL;
1354 rep->rr_rqst = NULL;
1355
1356 spin_lock(&xprt->queue_lock);
1357 xprt_unpin_rqst(rqst);
1358 spin_unlock(&xprt->queue_lock);
1359}
1360
1361/**
1362 * rpcrdma_complete_rqst - Pass completed rqst back to RPC
1363 * @rep: RPC/RDMA Receive context
1364 *
1365 * Reconstruct the RPC reply and complete the transaction
1366 * while @rqst is still pinned to ensure the rep, rqst, and
1367 * rq_task pointers remain stable.
Chuck Levere1352c92017-10-16 15:01:22 -04001368 */
1369void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1370{
1371 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1372 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1373 struct rpc_rqst *rqst = rep->rr_rqst;
Chuck Levere1352c92017-10-16 15:01:22 -04001374 int status;
1375
Chuck Levere1352c92017-10-16 15:01:22 -04001376 switch (rep->rr_proc) {
1377 case rdma_msg:
1378 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1379 break;
1380 case rdma_nomsg:
1381 status = rpcrdma_decode_nomsg(r_xprt, rep);
1382 break;
1383 case rdma_error:
1384 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1385 break;
1386 default:
1387 status = -EIO;
1388 }
1389 if (status < 0)
1390 goto out_badheader;
1391
1392out:
Trond Myklebust75c84152018-08-31 10:21:00 -04001393 spin_lock(&xprt->queue_lock);
Chuck Levere1352c92017-10-16 15:01:22 -04001394 xprt_complete_rqst(rqst->rq_task, status);
1395 xprt_unpin_rqst(rqst);
Trond Myklebust75c84152018-08-31 10:21:00 -04001396 spin_unlock(&xprt->queue_lock);
Chuck Levere1352c92017-10-16 15:01:22 -04001397 return;
1398
Chuck Levere1352c92017-10-16 15:01:22 -04001399out_badheader:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001400 trace_xprtrdma_reply_hdr_err(rep);
Chuck Levere1352c92017-10-16 15:01:22 -04001401 r_xprt->rx_stats.bad_reply_count++;
Chuck Lever7b2182e2020-06-15 09:21:13 -04001402 rqst->rq_task->tk_status = status;
1403 status = 0;
Chuck Levere1352c92017-10-16 15:01:22 -04001404 goto out;
1405}
1406
Chuck Lever0ab11522019-06-19 10:33:15 -04001407static void rpcrdma_reply_done(struct kref *kref)
Chuck Lever0ba6f372017-10-20 10:48:28 -04001408{
Chuck Lever0ab11522019-06-19 10:33:15 -04001409 struct rpcrdma_req *req =
1410 container_of(kref, struct rpcrdma_req, rl_kref);
Chuck Lever01bb35c2017-10-20 10:48:36 -04001411
Chuck Lever0ab11522019-06-19 10:33:15 -04001412 rpcrdma_complete_rqst(req->rl_reply);
Chuck Lever0ba6f372017-10-20 10:48:28 -04001413}
1414
Chuck Leverd8099fe2019-06-19 10:33:10 -04001415/**
1416 * rpcrdma_reply_handler - Process received RPC/RDMA messages
1417 * @rep: Incoming rpcrdma_rep object to process
Chuck Leverfe97b472015-10-24 17:27:10 -04001418 *
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001419 * Errors must result in the RPC task either being awakened, or
1420 * allowed to timeout, to discover the errors at that time.
1421 */
Chuck Leverd8f532d2017-10-16 15:01:30 -04001422void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001423{
Chuck Lever431af642017-06-08 11:52:20 -04001424 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
Chuck Lever431af642017-06-08 11:52:20 -04001425 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
Chuck Leverbe798f92017-10-16 15:01:39 -04001426 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001427 struct rpcrdma_req *req;
1428 struct rpc_rqst *rqst;
Chuck Leverbe798f92017-10-16 15:01:39 -04001429 u32 credits;
Chuck Lever5381e0e2017-10-16 15:01:14 -04001430 __be32 *p;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001431
Chuck Leverf9e1afe2019-08-26 13:12:51 -04001432 /* Any data means we had a useful conversation, so
1433 * then we don't need to delay the next reconnect.
1434 */
1435 if (xprt->reestablish_timeout)
1436 xprt->reestablish_timeout = 0;
1437
Chuck Lever7c8d9e72018-05-04 15:35:20 -04001438 /* Fixed transport header fields */
Chuck Lever5381e0e2017-10-16 15:01:14 -04001439 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
Chuck Lever0ccc61b2019-02-11 11:24:05 -05001440 rep->rr_hdrbuf.head[0].iov_base, NULL);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001441 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
Chuck Lever96f87782017-08-03 14:30:03 -04001442 if (unlikely(!p))
Chuck Leverb0e178a2015-10-24 17:26:54 -04001443 goto out_shortreply;
Chuck Lever5381e0e2017-10-16 15:01:14 -04001444 rep->rr_xid = *p++;
1445 rep->rr_vers = *p++;
Chuck Leverbe798f92017-10-16 15:01:39 -04001446 credits = be32_to_cpu(*p++);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001447 rep->rr_proc = *p++;
Chuck Leverb0e178a2015-10-24 17:26:54 -04001448
Chuck Lever5381e0e2017-10-16 15:01:14 -04001449 if (rep->rr_vers != rpcrdma_version)
Chuck Lever61433af2017-10-16 15:01:06 -04001450 goto out_badversion;
1451
Chuck Lever5381e0e2017-10-16 15:01:14 -04001452 if (rpcrdma_is_bcall(r_xprt, rep))
Chuck Lever41c8f702017-08-03 14:30:11 -04001453 return;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001454
Chuck Leverfe97b472015-10-24 17:27:10 -04001455 /* Match incoming rpcrdma_rep to an rpcrdma_req to
1456 * get context for handling any incoming chunks.
1457 */
Trond Myklebust75c84152018-08-31 10:21:00 -04001458 spin_lock(&xprt->queue_lock);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001459 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
Chuck Lever9590d082017-08-23 17:05:58 -04001460 if (!rqst)
1461 goto out_norqst;
1462 xprt_pin_rqst(rqst);
Trond Myklebust93bdcf92018-10-18 17:29:00 -04001463 spin_unlock(&xprt->queue_lock);
Chuck Leverbe798f92017-10-16 15:01:39 -04001464
1465 if (credits == 0)
1466 credits = 1; /* don't deadlock */
Chuck Levere28ce902020-02-21 17:01:05 -05001467 else if (credits > r_xprt->rx_ep->re_max_requests)
1468 credits = r_xprt->rx_ep->re_max_requests;
Chuck Lever35d8b102021-04-19 14:02:41 -04001469 rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
1470 false);
Chuck Levereea63ca2019-10-09 13:07:32 -04001471 if (buf->rb_credits != credits)
1472 rpcrdma_update_cwnd(r_xprt, credits);
Chuck Leverbe798f92017-10-16 15:01:39 -04001473
Chuck Lever9590d082017-08-23 17:05:58 -04001474 req = rpcr_to_rdmar(rqst);
Chuck Lever03ffd922020-11-09 14:39:47 -05001475 if (unlikely(req->rl_reply))
Chuck Leverc35ca602021-04-19 14:02:47 -04001476 rpcrdma_rep_put(buf, req->rl_reply);
Chuck Lever4b196dc62017-06-08 11:51:56 -04001477 req->rl_reply = rep;
Chuck Levere1352c92017-10-16 15:01:22 -04001478 rep->rr_rqst = rqst;
Chuck Lever431af642017-06-08 11:52:20 -04001479
Chuck Lever03ffd922020-11-09 14:39:47 -05001480 trace_xprtrdma_reply(rqst->rq_task, rep, credits);
Chuck Leverd8099fe2019-06-19 10:33:10 -04001481
1482 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1483 frwr_reminv(rep, &req->rl_registered);
Chuck Lever0ab11522019-06-19 10:33:15 -04001484 if (!list_empty(&req->rl_registered))
Chuck Leverd8099fe2019-06-19 10:33:10 -04001485 frwr_unmap_async(r_xprt, req);
1486 /* LocalInv completion will complete the RPC */
Chuck Lever0ab11522019-06-19 10:33:15 -04001487 else
1488 kref_put(&req->rl_kref, rpcrdma_reply_done);
Chuck Leverb0e178a2015-10-24 17:26:54 -04001489 return;
1490
Chuck Lever61433af2017-10-16 15:01:06 -04001491out_badversion:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001492 trace_xprtrdma_reply_vers_err(rep);
Chuck Lever6ceea362018-12-19 10:58:24 -05001493 goto out;
Chuck Lever61433af2017-10-16 15:01:06 -04001494
Chuck Lever431af642017-06-08 11:52:20 -04001495out_norqst:
Trond Myklebust75c84152018-08-31 10:21:00 -04001496 spin_unlock(&xprt->queue_lock);
Chuck Lever3a9568f2020-11-09 14:39:42 -05001497 trace_xprtrdma_reply_rqst_err(rep);
Chuck Lever6ceea362018-12-19 10:58:24 -05001498 goto out;
Chuck Leverb0e178a2015-10-24 17:26:54 -04001499
Chuck Lever9590d082017-08-23 17:05:58 -04001500out_shortreply:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001501 trace_xprtrdma_reply_short_err(rep);
Chuck Leverb0e178a2015-10-24 17:26:54 -04001502
Chuck Lever6ceea362018-12-19 10:58:24 -05001503out:
Chuck Leverc35ca602021-04-19 14:02:47 -04001504 rpcrdma_rep_put(buf, rep);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001505}