blob: 8035a983c8cec0306b0c0a7c23bcf7f85af7c621 [file] [log] [blame]
Chuck Levera2268cf2018-05-04 15:34:32 -04001// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04002/*
Chuck Lever3a9568f2020-11-09 14:39:42 -05003 * Copyright (c) 2014-2020, Oracle and/or its affiliates.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04004 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the BSD-type
10 * license below:
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
23 *
24 * Neither the name of the Network Appliance, Inc. nor the names of
25 * its contributors may be used to endorse or promote products
26 * derived from this software without specific prior written
27 * permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 */
41
42/*
43 * rpc_rdma.c
44 *
45 * This file contains the guts of the RPC RDMA protocol, and
46 * does marshaling/unmarshaling, etc. It is also where interfacing
47 * to the Linux RPC framework lives.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040048 */
49
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -040050#include <linux/highmem.h>
51
Chuck Leverbd2abef2018-05-07 15:27:16 -040052#include <linux/sunrpc/svc_rdma.h>
53
Chuck Leverb6e717cb2018-05-07 15:27:05 -040054#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
Jeff Laytonf895b252014-11-17 16:58:04 -050057#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -040058# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
Chuck Lever302d3de2016-05-02 14:41:05 -040061/* Returns size of largest RPC-over-RDMA header in a Call message
62 *
Chuck Lever94f58c52016-05-02 14:41:30 -040063 * The largest Call header contains a full-size Read list and a
64 * minimal Reply chunk.
Chuck Lever302d3de2016-05-02 14:41:05 -040065 */
66static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
67{
68 unsigned int size;
69
70 /* Fixed header fields and list discriminators */
71 size = RPCRDMA_HDRLEN_MIN;
72
73 /* Maximum Read list size */
Colin Ian King91228842020-07-15 17:26:04 +010074 size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
Chuck Lever302d3de2016-05-02 14:41:05 -040075
Chuck Lever94f58c52016-05-02 14:41:30 -040076 /* Minimal Read chunk size */
77 size += sizeof(__be32); /* segment count */
Chuck Lever2232df52017-10-30 16:21:57 -040078 size += rpcrdma_segment_maxsz * sizeof(__be32);
Chuck Lever94f58c52016-05-02 14:41:30 -040079 size += sizeof(__be32); /* list discriminator */
80
Chuck Lever302d3de2016-05-02 14:41:05 -040081 return size;
82}
83
84/* Returns size of largest RPC-over-RDMA header in a Reply message
85 *
86 * There is only one Write list or one Reply chunk per Reply
87 * message. The larger list is the Write list.
88 */
89static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
90{
91 unsigned int size;
92
93 /* Fixed header fields and list discriminators */
94 size = RPCRDMA_HDRLEN_MIN;
95
96 /* Maximum Write list size */
Colin Ian King91228842020-07-15 17:26:04 +010097 size += sizeof(__be32); /* segment count */
Chuck Lever2232df52017-10-30 16:21:57 -040098 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
Chuck Lever302d3de2016-05-02 14:41:05 -040099 size += sizeof(__be32); /* list discriminator */
100
Chuck Lever302d3de2016-05-02 14:41:05 -0400101 return size;
102}
103
Chuck Lever94087e92019-04-24 09:40:20 -0400104/**
105 * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
Chuck Lever93aa8e02020-02-21 17:00:54 -0500106 * @ep: endpoint to initialize
Chuck Lever94087e92019-04-24 09:40:20 -0400107 *
108 * The max_inline fields contain the maximum size of an RPC message
109 * so the marshaling code doesn't have to repeat this calculation
110 * for every RPC.
111 */
Chuck Lever93aa8e02020-02-21 17:00:54 -0500112void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
Chuck Lever302d3de2016-05-02 14:41:05 -0400113{
Chuck Lever93aa8e02020-02-21 17:00:54 -0500114 unsigned int maxsegs = ep->re_max_rdma_segs;
Chuck Lever87cfb9a2016-09-15 10:57:07 -0400115
Chuck Lever93aa8e02020-02-21 17:00:54 -0500116 ep->re_max_inline_send =
117 ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
118 ep->re_max_inline_recv =
119 ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
Chuck Lever302d3de2016-05-02 14:41:05 -0400120}
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400121
Chuck Lever5457ced2015-08-03 13:03:49 -0400122/* The client can send a request inline as long as the RPCRDMA header
123 * plus the RPC call fit under the transport's inline limit. If the
124 * combined call message size exceeds that limit, the client must use
Chuck Lever16f906d2017-02-08 17:00:10 -0500125 * a Read chunk for this operation.
126 *
127 * A Read chunk is also required if sending the RPC call inline would
128 * exceed this device's max_sge limit.
Chuck Lever5457ced2015-08-03 13:03:49 -0400129 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400130static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
131 struct rpc_rqst *rqst)
Chuck Lever5457ced2015-08-03 13:03:49 -0400132{
Chuck Lever16f906d2017-02-08 17:00:10 -0500133 struct xdr_buf *xdr = &rqst->rq_snd_buf;
Chuck Levere28ce902020-02-21 17:01:05 -0500134 struct rpcrdma_ep *ep = r_xprt->rx_ep;
Chuck Lever16f906d2017-02-08 17:00:10 -0500135 unsigned int count, remaining, offset;
Chuck Lever5457ced2015-08-03 13:03:49 -0400136
Chuck Levere28ce902020-02-21 17:01:05 -0500137 if (xdr->len > ep->re_max_inline_send)
Chuck Lever16f906d2017-02-08 17:00:10 -0500138 return false;
139
140 if (xdr->page_len) {
141 remaining = xdr->page_len;
Chuck Leverd933cc32017-06-08 11:53:16 -0400142 offset = offset_in_page(xdr->page_base);
Chuck Lever1179e2c2018-01-31 12:34:05 -0500143 count = RPCRDMA_MIN_SEND_SGES;
Chuck Lever16f906d2017-02-08 17:00:10 -0500144 while (remaining) {
145 remaining -= min_t(unsigned int,
146 PAGE_SIZE - offset, remaining);
147 offset = 0;
Chuck Levere28ce902020-02-21 17:01:05 -0500148 if (++count > ep->re_attr.cap.max_send_sge)
Chuck Lever16f906d2017-02-08 17:00:10 -0500149 return false;
150 }
151 }
152
153 return true;
Chuck Lever5457ced2015-08-03 13:03:49 -0400154}
155
156/* The client can't know how large the actual reply will be. Thus it
157 * plans for the largest possible reply for that particular ULP
158 * operation. If the maximum combined reply message size exceeds that
159 * limit, the client must provide a write list or a reply chunk for
160 * this request.
161 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400162static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
163 struct rpc_rqst *rqst)
Chuck Lever5457ced2015-08-03 13:03:49 -0400164{
Chuck Levere28ce902020-02-21 17:01:05 -0500165 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
Chuck Lever5457ced2015-08-03 13:03:49 -0400166}
167
Chuck Leverd4550bb2019-02-11 11:23:49 -0500168/* The client is required to provide a Reply chunk if the maximum
169 * size of the non-payload part of the RPC Reply is larger than
170 * the inline threshold.
171 */
172static bool
173rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
174 const struct rpc_rqst *rqst)
175{
176 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500177
Chuck Lever94087e92019-04-24 09:40:20 -0400178 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
Chuck Levere28ce902020-02-21 17:01:05 -0500179 r_xprt->rx_ep->re_max_inline_recv;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500180}
181
Chuck Lever15261b92020-12-08 18:29:02 -0500182/* ACL likes to be lazy in allocating pages. For TCP, these
183 * pages can be allocated during receive processing. Not true
184 * for RDMA, which must always provision receive buffers
185 * up front.
186 */
187static noinline int
188rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
189{
190 struct page **ppages;
191 int len;
192
193 len = buf->page_len;
194 ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
195 while (len > 0) {
196 if (!*ppages)
197 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
198 if (!*ppages)
199 return -ENOBUFS;
200 ppages++;
201 len -= PAGE_SIZE;
202 }
203
204 return 0;
205}
206
Chuck Lever9929f4a2021-02-04 11:59:01 -0500207/* Convert @vec to a single SGL element.
Chuck Lever28d9d562017-08-14 15:38:22 -0400208 *
209 * Returns pointer to next available SGE, and bumps the total number
210 * of SGEs consumed.
Chuck Lever821c7912016-03-04 11:27:52 -0500211 */
Chuck Lever28d9d562017-08-14 15:38:22 -0400212static struct rpcrdma_mr_seg *
213rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
214 unsigned int *n)
Chuck Lever821c7912016-03-04 11:27:52 -0500215{
Chuck Lever54e6aec2021-02-04 11:59:07 -0500216 seg->mr_page = virt_to_page(vec->iov_base);
Chuck Lever67b16622021-02-04 11:59:13 -0500217 seg->mr_offset = offset_in_page(vec->iov_base);
Chuck Lever9929f4a2021-02-04 11:59:01 -0500218 seg->mr_len = vec->iov_len;
219 ++seg;
220 ++(*n);
Chuck Lever28d9d562017-08-14 15:38:22 -0400221 return seg;
Chuck Lever821c7912016-03-04 11:27:52 -0500222}
223
Chuck Lever28d9d562017-08-14 15:38:22 -0400224/* Convert @xdrbuf into SGEs no larger than a page each. As they
225 * are registered, these SGEs are then coalesced into RDMA segments
226 * when the selected memreg mode supports it.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400227 *
Chuck Lever28d9d562017-08-14 15:38:22 -0400228 * Returns positive number of SGEs consumed, or a negative errno.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400229 */
230
231static int
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500232rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
233 unsigned int pos, enum rpcrdma_chunktype type,
234 struct rpcrdma_mr_seg *seg)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400235{
Chuck Lever28d9d562017-08-14 15:38:22 -0400236 unsigned long page_base;
237 unsigned int len, n;
Tom Tuckerbd7ea312011-02-09 19:45:28 +0000238 struct page **ppages;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400239
Chuck Lever5ab81422016-06-29 13:54:25 -0400240 n = 0;
Chuck Lever28d9d562017-08-14 15:38:22 -0400241 if (pos == 0)
242 seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400243
Tom Tuckerbd7ea312011-02-09 19:45:28 +0000244 len = xdrbuf->page_len;
245 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
Chuck Leverd933cc32017-06-08 11:53:16 -0400246 page_base = offset_in_page(xdrbuf->page_base);
Chuck Lever28d9d562017-08-14 15:38:22 -0400247 while (len) {
Chuck Lever28d9d562017-08-14 15:38:22 -0400248 seg->mr_page = *ppages;
Chuck Lever67b16622021-02-04 11:59:13 -0500249 seg->mr_offset = page_base;
Chuck Lever28d9d562017-08-14 15:38:22 -0400250 seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
251 len -= seg->mr_len;
252 ++ppages;
253 ++seg;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400254 ++n;
Chuck Lever28d9d562017-08-14 15:38:22 -0400255 page_base = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400256 }
257
Chuck Lever21037b82021-10-05 10:17:59 -0400258 if (type == rpcrdma_readch || type == rpcrdma_writech)
Chuck Lever28d9d562017-08-14 15:38:22 -0400259 goto out;
Chuck Leverc8b920b2016-09-15 10:57:16 -0400260
Chuck Lever28d9d562017-08-14 15:38:22 -0400261 if (xdrbuf->tail[0].iov_len)
Chuck Lever9929f4a2021-02-04 11:59:01 -0500262 rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400263
Chuck Lever28d9d562017-08-14 15:38:22 -0400264out:
265 if (unlikely(n > RPCRDMA_MAX_SEGS))
266 return -EIO;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400267 return n;
268}
269
Chuck Lever39f4cd92017-08-10 12:47:36 -0400270static int
Chuck Lever96cedde2017-12-14 20:57:55 -0500271encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400272{
273 __be32 *p;
274
275 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
276 if (unlikely(!p))
277 return -EMSGSIZE;
278
Chuck Lever379c3bc2020-04-07 15:32:14 -0400279 xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400280 return 0;
281}
282
283static int
Chuck Lever96cedde2017-12-14 20:57:55 -0500284encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
Chuck Lever39f4cd92017-08-10 12:47:36 -0400285 u32 position)
286{
287 __be32 *p;
288
289 p = xdr_reserve_space(xdr, 6 * sizeof(*p));
290 if (unlikely(!p))
291 return -EMSGSIZE;
292
293 *p++ = xdr_one; /* Item present */
Chuck Lever379c3bc2020-04-07 15:32:14 -0400294 xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
295 mr->mr_offset);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400296 return 0;
297}
298
Chuck Lever3b39f522019-08-19 18:45:37 -0400299static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
300 struct rpcrdma_req *req,
301 struct rpcrdma_mr_seg *seg,
302 int nsegs, bool writing,
303 struct rpcrdma_mr **mr)
304{
Chuck Lever6dc6ec92019-08-19 18:47:10 -0400305 *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
306 if (!*mr) {
307 *mr = rpcrdma_mr_get(r_xprt);
308 if (!*mr)
309 goto out_getmr_err;
Chuck Lever6dc6ec92019-08-19 18:47:10 -0400310 (*mr)->mr_req = req;
311 }
Chuck Lever3b39f522019-08-19 18:45:37 -0400312
313 rpcrdma_mr_push(*mr, &req->rl_registered);
314 return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
315
316out_getmr_err:
Chuck Lever0307cde2020-11-09 14:39:58 -0500317 trace_xprtrdma_nomrs_err(r_xprt, req);
Chuck Lever3b39f522019-08-19 18:45:37 -0400318 xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
Chuck Lever9d2da4f2019-10-09 13:07:48 -0400319 rpcrdma_mrs_refresh(r_xprt);
Chuck Lever3b39f522019-08-19 18:45:37 -0400320 return ERR_PTR(-EAGAIN);
321}
322
Chuck Lever39f4cd92017-08-10 12:47:36 -0400323/* Register and XDR encode the Read list. Supports encoding a list of read
Chuck Lever94f58c52016-05-02 14:41:30 -0400324 * segments that belong to a single read chunk.
325 *
326 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
327 *
328 * Read chunklist (a linked list):
329 * N elements, position P (same P for all chunks of same arg!):
330 * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
331 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400332 * Returns zero on success, or a negative errno if a failure occurred.
333 * @xdr is advanced to the next position in the stream.
334 *
335 * Only a single @pos value is currently supported.
Chuck Lever94f58c52016-05-02 14:41:30 -0400336 */
Chuck Lever1738de32019-08-19 18:51:03 -0400337static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
338 struct rpcrdma_req *req,
339 struct rpc_rqst *rqst,
340 enum rpcrdma_chunktype rtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400341{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400342 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever5ab81422016-06-29 13:54:25 -0400343 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500344 struct rpcrdma_mr *mr;
Chuck Lever94f58c52016-05-02 14:41:30 -0400345 unsigned int pos;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400346 int nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400347
Chuck Lever614f3c92019-10-17 14:31:53 -0400348 if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400349 goto done;
350
Chuck Lever94f58c52016-05-02 14:41:30 -0400351 pos = rqst->rq_snd_buf.head[0].iov_len;
352 if (rtype == rpcrdma_areadch)
353 pos = 0;
Chuck Lever5ab81422016-06-29 13:54:25 -0400354 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500355 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
356 rtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400357 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400358 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400359
360 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400361 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400362 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400363 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400364
Chuck Lever96cedde2017-12-14 20:57:55 -0500365 if (encode_read_segment(xdr, mr, pos) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400366 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400367
Chuck Leveraba118312018-12-19 10:59:49 -0500368 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400369 r_xprt->rx_stats.read_chunk_count++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500370 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400371 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400372
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400373done:
Chuck Lever48a124e2020-04-19 20:03:10 -0400374 if (xdr_stream_encode_item_absent(xdr) < 0)
375 return -EMSGSIZE;
376 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400377}
378
Chuck Lever39f4cd92017-08-10 12:47:36 -0400379/* Register and XDR encode the Write list. Supports encoding a list
380 * containing one array of plain segments that belong to a single
381 * write chunk.
Chuck Lever94f58c52016-05-02 14:41:30 -0400382 *
383 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
384 *
385 * Write chunklist (a list of (one) counted array):
386 * N elements:
387 * 1 - N - HLOO - HLOO - ... - HLOO - 0
388 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400389 * Returns zero on success, or a negative errno if a failure occurred.
390 * @xdr is advanced to the next position in the stream.
391 *
392 * Only a single Write chunk is currently supported.
Chuck Lever94f58c52016-05-02 14:41:30 -0400393 */
Chuck Lever1738de32019-08-19 18:51:03 -0400394static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
395 struct rpcrdma_req *req,
396 struct rpc_rqst *rqst,
397 enum rpcrdma_chunktype wtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400398{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400399 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever21037b82021-10-05 10:17:59 -0400400 struct rpcrdma_ep *ep = r_xprt->rx_ep;
Chuck Lever5ab81422016-06-29 13:54:25 -0400401 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500402 struct rpcrdma_mr *mr;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400403 int nsegs, nchunks;
Chuck Lever94f58c52016-05-02 14:41:30 -0400404 __be32 *segcount;
405
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400406 if (wtype != rpcrdma_writech)
407 goto done;
408
Chuck Lever5ab81422016-06-29 13:54:25 -0400409 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500410 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
Chuck Lever94f58c52016-05-02 14:41:30 -0400411 rqst->rq_rcv_buf.head[0].iov_len,
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500412 wtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400413 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400414 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400415
Chuck Lever5c266df2020-03-02 15:02:20 -0500416 if (xdr_stream_encode_item_present(xdr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400417 return -EMSGSIZE;
418 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
419 if (unlikely(!segcount))
420 return -EMSGSIZE;
421 /* Actual value encoded below */
Chuck Lever94f58c52016-05-02 14:41:30 -0400422
423 nchunks = 0;
424 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400425 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400426 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400427 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400428
Chuck Lever96cedde2017-12-14 20:57:55 -0500429 if (encode_rdma_segment(xdr, mr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400430 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400431
Chuck Leveraba118312018-12-19 10:59:49 -0500432 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400433 r_xprt->rx_stats.write_chunk_count++;
Chuck Leveraae23492018-01-03 15:38:09 -0500434 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
Chuck Lever94f58c52016-05-02 14:41:30 -0400435 nchunks++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500436 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400437 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400438
Chuck Lever21037b82021-10-05 10:17:59 -0400439 if (xdr_pad_size(rqst->rq_rcv_buf.page_len)) {
440 if (encode_rdma_segment(xdr, ep->re_write_pad_mr) < 0)
441 return -EMSGSIZE;
442
443 trace_xprtrdma_chunk_wp(rqst->rq_task, ep->re_write_pad_mr,
444 nsegs);
445 r_xprt->rx_stats.write_chunk_count++;
446 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
447 nchunks++;
448 nsegs -= mr->mr_nents;
449 }
450
Chuck Lever94f58c52016-05-02 14:41:30 -0400451 /* Update count of segments in this Write chunk */
452 *segcount = cpu_to_be32(nchunks);
453
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400454done:
Chuck Lever48a124e2020-04-19 20:03:10 -0400455 if (xdr_stream_encode_item_absent(xdr) < 0)
456 return -EMSGSIZE;
457 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400458}
459
Chuck Lever39f4cd92017-08-10 12:47:36 -0400460/* Register and XDR encode the Reply chunk. Supports encoding an array
461 * of plain segments that belong to a single write (reply) chunk.
Chuck Lever94f58c52016-05-02 14:41:30 -0400462 *
463 * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
464 *
465 * Reply chunk (a counted array):
466 * N elements:
467 * 1 - N - HLOO - HLOO - ... - HLOO
468 *
Chuck Lever39f4cd92017-08-10 12:47:36 -0400469 * Returns zero on success, or a negative errno if a failure occurred.
470 * @xdr is advanced to the next position in the stream.
Chuck Lever94f58c52016-05-02 14:41:30 -0400471 */
Chuck Lever1738de32019-08-19 18:51:03 -0400472static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
473 struct rpcrdma_req *req,
474 struct rpc_rqst *rqst,
475 enum rpcrdma_chunktype wtype)
Chuck Lever94f58c52016-05-02 14:41:30 -0400476{
Chuck Lever39f4cd92017-08-10 12:47:36 -0400477 struct xdr_stream *xdr = &req->rl_stream;
Chuck Lever5ab81422016-06-29 13:54:25 -0400478 struct rpcrdma_mr_seg *seg;
Chuck Lever96cedde2017-12-14 20:57:55 -0500479 struct rpcrdma_mr *mr;
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400480 int nsegs, nchunks;
Chuck Lever94f58c52016-05-02 14:41:30 -0400481 __be32 *segcount;
482
Chuck Lever48a124e2020-04-19 20:03:10 -0400483 if (wtype != rpcrdma_replych) {
484 if (xdr_stream_encode_item_absent(xdr) < 0)
485 return -EMSGSIZE;
486 return 0;
487 }
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400488
Chuck Lever5ab81422016-06-29 13:54:25 -0400489 seg = req->rl_segments;
Chuck Leverb5f0afb2017-02-08 16:59:54 -0500490 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400491 if (nsegs < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400492 return nsegs;
Chuck Lever94f58c52016-05-02 14:41:30 -0400493
Chuck Lever5c266df2020-03-02 15:02:20 -0500494 if (xdr_stream_encode_item_present(xdr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400495 return -EMSGSIZE;
496 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
497 if (unlikely(!segcount))
498 return -EMSGSIZE;
499 /* Actual value encoded below */
Chuck Lever94f58c52016-05-02 14:41:30 -0400500
501 nchunks = 0;
502 do {
Chuck Lever3b39f522019-08-19 18:45:37 -0400503 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
Chuck Lever6748b0ca2017-08-14 15:38:30 -0400504 if (IS_ERR(seg))
Chuck Levered3aa742018-05-04 15:35:52 -0400505 return PTR_ERR(seg);
Chuck Lever94f58c52016-05-02 14:41:30 -0400506
Chuck Lever96cedde2017-12-14 20:57:55 -0500507 if (encode_rdma_segment(xdr, mr) < 0)
Chuck Lever39f4cd92017-08-10 12:47:36 -0400508 return -EMSGSIZE;
Chuck Lever94f58c52016-05-02 14:41:30 -0400509
Chuck Leveraba118312018-12-19 10:59:49 -0500510 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400511 r_xprt->rx_stats.reply_chunk_count++;
Chuck Leveraae23492018-01-03 15:38:09 -0500512 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
Chuck Lever94f58c52016-05-02 14:41:30 -0400513 nchunks++;
Chuck Lever96cedde2017-12-14 20:57:55 -0500514 nsegs -= mr->mr_nents;
Chuck Lever94f58c52016-05-02 14:41:30 -0400515 } while (nsegs);
Chuck Lever94f58c52016-05-02 14:41:30 -0400516
517 /* Update count of segments in the Reply chunk */
518 *segcount = cpu_to_be32(nchunks);
519
Chuck Lever39f4cd92017-08-10 12:47:36 -0400520 return 0;
Chuck Lever94f58c52016-05-02 14:41:30 -0400521}
522
Chuck Lever0ab11522019-06-19 10:33:15 -0400523static void rpcrdma_sendctx_done(struct kref *kref)
524{
525 struct rpcrdma_req *req =
526 container_of(kref, struct rpcrdma_req, rl_kref);
527 struct rpcrdma_rep *rep = req->rl_reply;
528
529 rpcrdma_complete_rqst(rep);
530 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
531}
532
Chuck Lever394b2c72017-10-20 10:47:47 -0400533/**
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400534 * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
Chuck Leverae729502017-10-20 10:48:12 -0400535 * @sc: sendctx containing SGEs to unmap
Chuck Lever394b2c72017-10-20 10:47:47 -0400536 *
537 */
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400538void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
Chuck Lever394b2c72017-10-20 10:47:47 -0400539{
Chuck Leverb5cde6a2019-10-17 14:31:27 -0400540 struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
Chuck Lever394b2c72017-10-20 10:47:47 -0400541 struct ib_sge *sge;
Chuck Lever394b2c72017-10-20 10:47:47 -0400542
Chuck Lever0ab11522019-06-19 10:33:15 -0400543 if (!sc->sc_unmap_count)
544 return;
545
Chuck Lever394b2c72017-10-20 10:47:47 -0400546 /* The first two SGEs contain the transport header and
547 * the inline buffer. These are always left mapped so
548 * they can be cheaply re-used.
549 */
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400550 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
551 ++sge, --sc->sc_unmap_count)
Chuck Leverb5cde6a2019-10-17 14:31:27 -0400552 ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400553 DMA_TO_DEVICE);
Chuck Lever01bb35c2017-10-20 10:48:36 -0400554
Chuck Lever0ab11522019-06-19 10:33:15 -0400555 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
Chuck Lever394b2c72017-10-20 10:47:47 -0400556}
557
Chuck Levera062a2a2017-10-20 10:48:03 -0400558/* Prepare an SGE for the RPC-over-RDMA transport header.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400559 */
Chuck Leverb78de1d2020-01-03 11:56:53 -0500560static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
Chuck Leverd2832af2019-04-24 09:39:32 -0400561 struct rpcrdma_req *req, u32 len)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400562{
Chuck Leverae729502017-10-20 10:48:12 -0400563 struct rpcrdma_sendctx *sc = req->rl_sendctx;
Chuck Lever655fec62016-09-15 10:57:24 -0400564 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400565 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400566
Chuck Levera062a2a2017-10-20 10:48:03 -0400567 sge->addr = rdmab_addr(rb);
Chuck Lever655fec62016-09-15 10:57:24 -0400568 sge->length = len;
Chuck Levera062a2a2017-10-20 10:48:03 -0400569 sge->lkey = rdmab_lkey(rb);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400570
Chuck Leverd2832af2019-04-24 09:39:32 -0400571 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
572 DMA_TO_DEVICE);
Chuck Leverd6764bb2019-10-17 14:31:44 -0400573}
574
575/* The head iovec is straightforward, as it is usually already
576 * DMA-mapped. Sync the content that has changed.
577 */
578static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
579 struct rpcrdma_req *req, unsigned int len)
580{
581 struct rpcrdma_sendctx *sc = req->rl_sendctx;
582 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
583 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
584
585 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
586 return false;
587
588 sge->addr = rdmab_addr(rb);
589 sge->length = len;
590 sge->lkey = rdmab_lkey(rb);
591
592 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
593 DMA_TO_DEVICE);
594 return true;
595}
596
597/* If there is a page list present, DMA map and prepare an
598 * SGE for each page to be sent.
599 */
600static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
601 struct xdr_buf *xdr)
602{
603 struct rpcrdma_sendctx *sc = req->rl_sendctx;
604 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
605 unsigned int page_base, len, remaining;
606 struct page **ppages;
607 struct ib_sge *sge;
608
609 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
610 page_base = offset_in_page(xdr->page_base);
611 remaining = xdr->page_len;
612 while (remaining) {
613 sge = &sc->sc_sges[req->rl_wr.num_sge++];
614 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
615 sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
616 page_base, len, DMA_TO_DEVICE);
617 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
618 goto out_mapping_err;
619
620 sge->length = len;
621 sge->lkey = rdmab_lkey(rb);
622
623 sc->sc_unmap_count++;
624 ppages++;
625 remaining -= len;
626 page_base = 0;
627 }
628
Chuck Lever655fec62016-09-15 10:57:24 -0400629 return true;
Chuck Lever857f9ac2017-10-20 10:47:55 -0400630
Chuck Leverd6764bb2019-10-17 14:31:44 -0400631out_mapping_err:
632 trace_xprtrdma_dma_maperr(sge->addr);
Chuck Lever857f9ac2017-10-20 10:47:55 -0400633 return false;
Chuck Lever655fec62016-09-15 10:57:24 -0400634}
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400635
Chuck Leverae605ee2021-05-26 15:35:20 -0400636/* The tail iovec may include an XDR pad for the page list,
637 * as well as additional content, and may not reside in the
638 * same page as the head iovec.
Chuck Lever655fec62016-09-15 10:57:24 -0400639 */
Chuck Leverd6764bb2019-10-17 14:31:44 -0400640static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
Chuck Leverd2832af2019-04-24 09:39:32 -0400641 struct xdr_buf *xdr,
Chuck Leverd6764bb2019-10-17 14:31:44 -0400642 unsigned int page_base, unsigned int len)
Chuck Lever655fec62016-09-15 10:57:24 -0400643{
Chuck Leverae729502017-10-20 10:48:12 -0400644 struct rpcrdma_sendctx *sc = req->rl_sendctx;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400645 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
Chuck Lever655fec62016-09-15 10:57:24 -0400646 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400647 struct page *page = virt_to_page(xdr->tail[0].iov_base);
Tom Talpeyb38ab402009-03-11 14:37:55 -0400648
Chuck Leverd6764bb2019-10-17 14:31:44 -0400649 sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
650 DMA_TO_DEVICE);
651 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
652 goto out_mapping_err;
Chuck Lever655fec62016-09-15 10:57:24 -0400653
Chuck Leverd6764bb2019-10-17 14:31:44 -0400654 sge->length = len;
655 sge->lkey = rdmab_lkey(rb);
656 ++sc->sc_unmap_count;
Chuck Lever655fec62016-09-15 10:57:24 -0400657 return true;
658
Chuck Lever655fec62016-09-15 10:57:24 -0400659out_mapping_err:
Chuck Leverd6764bb2019-10-17 14:31:44 -0400660 trace_xprtrdma_dma_maperr(sge->addr);
Chuck Lever655fec62016-09-15 10:57:24 -0400661 return false;
662}
663
Chuck Lever614f3c92019-10-17 14:31:53 -0400664/* Copy the tail to the end of the head buffer.
665 */
666static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
667 struct rpcrdma_req *req,
668 struct xdr_buf *xdr)
669{
670 unsigned char *dst;
671
672 dst = (unsigned char *)xdr->head[0].iov_base;
673 dst += xdr->head[0].iov_len + xdr->page_len;
674 memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
675 r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
676}
677
678/* Copy pagelist content into the head buffer.
679 */
680static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
681 struct rpcrdma_req *req,
682 struct xdr_buf *xdr)
683{
684 unsigned int len, page_base, remaining;
685 struct page **ppages;
686 unsigned char *src, *dst;
687
688 dst = (unsigned char *)xdr->head[0].iov_base;
689 dst += xdr->head[0].iov_len;
690 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
691 page_base = offset_in_page(xdr->page_base);
692 remaining = xdr->page_len;
693 while (remaining) {
694 src = page_address(*ppages);
695 src += page_base;
696 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
697 memcpy(dst, src, len);
698 r_xprt->rx_stats.pullup_copy_count += len;
699
700 ppages++;
701 dst += len;
702 remaining -= len;
703 page_base = 0;
704 }
705}
706
707/* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
708 * When the head, pagelist, and tail are small, a pull-up copy
709 * is considerably less costly than DMA mapping the components
710 * of @xdr.
711 *
712 * Assumptions:
713 * - the caller has already verified that the total length
714 * of the RPC Call body will fit into @rl_sendbuf.
715 */
716static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
717 struct rpcrdma_req *req,
718 struct xdr_buf *xdr)
719{
720 if (unlikely(xdr->tail[0].iov_len))
721 rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
722
723 if (unlikely(xdr->page_len))
724 rpcrdma_pullup_pagelist(r_xprt, req, xdr);
725
726 /* The whole RPC message resides in the head iovec now */
727 return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
728}
729
Chuck Leverd6764bb2019-10-17 14:31:44 -0400730static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
731 struct rpcrdma_req *req,
732 struct xdr_buf *xdr)
733{
734 struct kvec *tail = &xdr->tail[0];
735
736 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
737 return false;
738 if (xdr->page_len)
739 if (!rpcrdma_prepare_pagelist(req, xdr))
740 return false;
741 if (tail->iov_len)
742 if (!rpcrdma_prepare_tail_iov(req, xdr,
743 offset_in_page(tail->iov_base),
744 tail->iov_len))
745 return false;
746
747 if (req->rl_sendctx->sc_unmap_count)
748 kref_get(&req->rl_kref);
749 return true;
750}
751
752static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
753 struct rpcrdma_req *req,
754 struct xdr_buf *xdr)
755{
756 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
757 return false;
758
Chuck Leverae605ee2021-05-26 15:35:20 -0400759 /* If there is a Read chunk, the page list is being handled
Chuck Leverd6764bb2019-10-17 14:31:44 -0400760 * via explicit RDMA, and thus is skipped here.
761 */
762
Chuck Leverae605ee2021-05-26 15:35:20 -0400763 /* Do not include the tail if it is only an XDR pad */
764 if (xdr->tail[0].iov_len > 3) {
765 unsigned int page_base, len;
766
767 /* If the content in the page list is an odd length,
768 * xdr_write_pages() adds a pad at the beginning of
769 * the tail iovec. Force the tail's non-pad content to
770 * land at the next XDR position in the Send message.
771 */
772 page_base = offset_in_page(xdr->tail[0].iov_base);
773 len = xdr->tail[0].iov_len;
774 page_base += len & 3;
775 len -= len & 3;
776 if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
Chuck Leverd6764bb2019-10-17 14:31:44 -0400777 return false;
778 kref_get(&req->rl_kref);
779 }
780
781 return true;
782}
783
Chuck Lever857f9ac2017-10-20 10:47:55 -0400784/**
785 * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
786 * @r_xprt: controlling transport
787 * @req: context of RPC Call being marshalled
788 * @hdrlen: size of transport header, in bytes
789 * @xdr: xdr_buf containing RPC Call
790 * @rtype: chunk type being encoded
791 *
792 * Returns 0 on success; otherwise a negative errno is returned.
793 */
Chuck Leverd6764bb2019-10-17 14:31:44 -0400794inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
795 struct rpcrdma_req *req, u32 hdrlen,
796 struct xdr_buf *xdr,
797 enum rpcrdma_chunktype rtype)
Chuck Lever655fec62016-09-15 10:57:24 -0400798{
Chuck Lever05eb06d2019-06-19 10:32:48 -0400799 int ret;
800
801 ret = -EAGAIN;
Chuck Leverdbcc53a2019-04-24 09:39:53 -0400802 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
Chuck Leverae729502017-10-20 10:48:12 -0400803 if (!req->rl_sendctx)
Chuck Leverd6764bb2019-10-17 14:31:44 -0400804 goto out_nosc;
Chuck Leverae729502017-10-20 10:48:12 -0400805 req->rl_sendctx->sc_unmap_count = 0;
Chuck Lever01bb35c2017-10-20 10:48:36 -0400806 req->rl_sendctx->sc_req = req;
Chuck Lever0ab11522019-06-19 10:33:15 -0400807 kref_init(&req->rl_kref);
Chuck Leverdc15c3d2019-10-17 14:31:35 -0400808 req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
809 req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
810 req->rl_wr.num_sge = 0;
811 req->rl_wr.opcode = IB_WR_SEND;
Chuck Lever655fec62016-09-15 10:57:24 -0400812
Chuck Leverb78de1d2020-01-03 11:56:53 -0500813 rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
Chuck Leverd6764bb2019-10-17 14:31:44 -0400814
Chuck Leverb78de1d2020-01-03 11:56:53 -0500815 ret = -EIO;
Chuck Leverd6764bb2019-10-17 14:31:44 -0400816 switch (rtype) {
Chuck Lever614f3c92019-10-17 14:31:53 -0400817 case rpcrdma_noch_pullup:
818 if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
819 goto out_unmap;
820 break;
821 case rpcrdma_noch_mapped:
Chuck Leverd6764bb2019-10-17 14:31:44 -0400822 if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
823 goto out_unmap;
824 break;
825 case rpcrdma_readch:
826 if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
827 goto out_unmap;
828 break;
829 case rpcrdma_areadch:
830 break;
831 default:
832 goto out_unmap;
833 }
834
Chuck Lever857f9ac2017-10-20 10:47:55 -0400835 return 0;
Chuck Lever05eb06d2019-06-19 10:32:48 -0400836
Chuck Leverd6764bb2019-10-17 14:31:44 -0400837out_unmap:
838 rpcrdma_sendctx_unmap(req->rl_sendctx);
839out_nosc:
Chuck Lever05eb06d2019-06-19 10:32:48 -0400840 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
841 return ret;
Chuck Lever655fec62016-09-15 10:57:24 -0400842}
843
Chuck Lever09e60642017-08-10 12:47:12 -0400844/**
845 * rpcrdma_marshal_req - Marshal and send one RPC request
846 * @r_xprt: controlling transport
847 * @rqst: RPC request to be marshaled
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400848 *
Chuck Lever09e60642017-08-10 12:47:12 -0400849 * For the RPC in "rqst", this function:
850 * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
851 * - Registers Read, Write, and Reply chunks
852 * - Constructs the transport header
853 * - Posts a Send WR to send the transport header and request
854 *
855 * Returns:
856 * %0 if the RPC was sent successfully,
857 * %-ENOTCONN if the connection was lost,
Chuck Lever9e679d52018-02-28 15:30:44 -0500858 * %-EAGAIN if the caller should call again with the same arguments,
859 * %-ENOBUFS if the caller should call again after a delay,
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400860 * %-EMSGSIZE if the transport header is too small,
Chuck Lever09e60642017-08-10 12:47:12 -0400861 * %-EIO if a permanent problem occurred while marshaling.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400862 */
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400863int
Chuck Lever09e60642017-08-10 12:47:12 -0400864rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400865{
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400866 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400867 struct xdr_stream *xdr = &req->rl_stream;
Chuck Levere2377942015-03-30 14:33:53 -0400868 enum rpcrdma_chunktype rtype, wtype;
Chuck Lever614f3c92019-10-17 14:31:53 -0400869 struct xdr_buf *buf = &rqst->rq_snd_buf;
Chuck Lever65b80172016-06-29 13:55:06 -0400870 bool ddp_allowed;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400871 __be32 *p;
Chuck Lever39f4cd92017-08-10 12:47:36 -0400872 int ret;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400873
Chuck Lever15261b92020-12-08 18:29:02 -0500874 if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
875 ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
876 if (ret)
877 return ret;
878 }
879
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400880 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
Chuck Lever8cec3db2019-04-24 09:39:16 -0400881 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
882 rqst);
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400883
884 /* Fixed header fields */
Chuck Lever39f4cd92017-08-10 12:47:36 -0400885 ret = -EMSGSIZE;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400886 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
887 if (!p)
888 goto out_err;
889 *p++ = rqst->rq_xid;
890 *p++ = rpcrdma_version;
Chuck Lever7581d902020-01-03 11:56:37 -0500891 *p++ = r_xprt->rx_buf.rb_max_requests;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400892
Chuck Lever65b80172016-06-29 13:55:06 -0400893 /* When the ULP employs a GSS flavor that guarantees integrity
894 * or privacy, direct data placement of individual data items
895 * is not allowed.
896 */
Chuck Lever53bc19f2020-05-12 17:13:01 -0400897 ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
898 &rqst->rq_cred->cr_auth->au_flags);
Chuck Lever65b80172016-06-29 13:55:06 -0400899
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400900 /*
901 * Chunks needed for results?
902 *
903 * o If the expected result is under the inline threshold, all ops
Chuck Lever33943b22015-08-03 13:04:08 -0400904 * return as inline.
Chuck Levercce6dee2016-05-02 14:41:14 -0400905 * o Large read ops return data as write chunk(s), header as
906 * inline.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400907 * o Large non-read ops return as a single reply chunk.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400908 */
Chuck Levercce6dee2016-05-02 14:41:14 -0400909 if (rpcrdma_results_inline(r_xprt, rqst))
Chuck Lever02eb57d82015-08-03 13:03:58 -0400910 wtype = rpcrdma_noch;
Chuck Leverd4550bb2019-02-11 11:23:49 -0500911 else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
912 rpcrdma_nonpayload_inline(r_xprt, rqst))
Chuck Levercce6dee2016-05-02 14:41:14 -0400913 wtype = rpcrdma_writech;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400914 else
Chuck Levere2377942015-03-30 14:33:53 -0400915 wtype = rpcrdma_replych;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400916
917 /*
918 * Chunks needed for arguments?
919 *
920 * o If the total request is under the inline threshold, all ops
921 * are sent as inline.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400922 * o Large write ops transmit data as read chunk(s), header as
923 * inline.
Chuck Lever2fcc2132015-08-03 13:04:26 -0400924 * o Large non-write ops are sent with the entire message as a
925 * single read chunk (protocol 0-position special case).
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400926 *
Chuck Lever2fcc2132015-08-03 13:04:26 -0400927 * This assumes that the upper layer does not present a request
928 * that both has a data payload, and whose non-data arguments
929 * by themselves are larger than the inline threshold.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400930 */
Chuck Lever302d3de2016-05-02 14:41:05 -0400931 if (rpcrdma_args_inline(r_xprt, rqst)) {
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400932 *p++ = rdma_msg;
Chuck Lever614f3c92019-10-17 14:31:53 -0400933 rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
934 rpcrdma_noch_pullup : rpcrdma_noch_mapped;
935 } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400936 *p++ = rdma_msg;
Chuck Levere2377942015-03-30 14:33:53 -0400937 rtype = rpcrdma_readch;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400938 } else {
Chuck Lever860477d2015-08-03 13:04:45 -0400939 r_xprt->rx_stats.nomsg_call_count++;
Chuck Lever7a80f3f2017-08-10 12:47:28 -0400940 *p++ = rdma_nomsg;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400941 rtype = rpcrdma_areadch;
Chuck Lever2fcc2132015-08-03 13:04:26 -0400942 }
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400943
Chuck Lever94f58c52016-05-02 14:41:30 -0400944 /* This implementation supports the following combinations
945 * of chunk lists in one RPC-over-RDMA Call message:
946 *
947 * - Read list
948 * - Write list
949 * - Reply chunk
950 * - Read list + Reply chunk
951 *
952 * It might not yet support the following combinations:
953 *
954 * - Read list + Write list
955 *
956 * It does not support the following combinations:
957 *
958 * - Write list + Reply chunk
959 * - Read list + Write list + Reply chunk
960 *
961 * This implementation supports only a single chunk in each
962 * Read or Write list. Thus for example the client cannot
963 * send a Call message with a Position Zero Read chunk and a
964 * regular Read chunk at the same time.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400965 */
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400966 ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400967 if (ret)
Chuck Lever18c0fb32017-02-08 17:00:27 -0500968 goto out_err;
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400969 ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400970 if (ret)
971 goto out_err;
Chuck Lever6a6c6de2019-06-19 10:33:31 -0400972 ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
Chuck Lever18c0fb32017-02-08 17:00:27 -0500973 if (ret)
Chuck Lever94f58c52016-05-02 14:41:30 -0400974 goto out_err;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400975
Chuck Lever13100512019-06-19 10:32:43 -0400976 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
Chuck Lever614f3c92019-10-17 14:31:53 -0400977 buf, rtype);
Chuck Lever857f9ac2017-10-20 10:47:55 -0400978 if (ret)
Chuck Lever18c0fb32017-02-08 17:00:27 -0500979 goto out_err;
Chuck Lever13100512019-06-19 10:32:43 -0400980
981 trace_xprtrdma_marshal(req, rtype, wtype);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400982 return 0;
Chuck Lever302d3de2016-05-02 14:41:05 -0400983
Chuck Lever18c0fb32017-02-08 17:00:27 -0500984out_err:
Chuck Lever17e4c442019-04-24 09:39:48 -0400985 trace_xprtrdma_marshal_failed(rqst, ret);
Chuck Lever05eb06d2019-06-19 10:32:48 -0400986 r_xprt->rx_stats.failed_marshal_count++;
Chuck Lever40088f02019-06-19 10:33:04 -0400987 frwr_reset(req);
Chuck Lever39f4cd92017-08-10 12:47:36 -0400988 return ret;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -0400989}
990
Chuck Levereea63ca2019-10-09 13:07:32 -0400991static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
992 struct rpcrdma_buffer *buf,
993 u32 grant)
994{
995 buf->rb_credits = grant;
996 xprt->cwnd = grant << RPC_CWNDSHIFT;
997}
998
999static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
1000{
1001 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1002
1003 spin_lock(&xprt->transport_lock);
1004 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1005 spin_unlock(&xprt->transport_lock);
1006}
1007
1008/**
1009 * rpcrdma_reset_cwnd - Reset the xprt's congestion window
1010 * @r_xprt: controlling transport instance
1011 *
1012 * Prepare @r_xprt for the next connection by reinitializing
1013 * its credit grant to one (see RFC 8166, Section 3.3.3).
1014 */
1015void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1016{
1017 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1018
1019 spin_lock(&xprt->transport_lock);
1020 xprt->cong = 0;
1021 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1022 spin_unlock(&xprt->transport_lock);
1023}
1024
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001025/**
1026 * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
1027 * @rqst: controlling RPC request
1028 * @srcp: points to RPC message payload in receive buffer
1029 * @copy_len: remaining length of receive buffer content
1030 * @pad: Write chunk pad bytes needed (zero for pure inline)
1031 *
1032 * The upper layer has set the maximum number of bytes it can
1033 * receive in each component of rq_rcv_buf. These values are set in
1034 * the head.iov_len, page_len, tail.iov_len, and buflen fields.
Chuck Levercfabe2c2016-06-29 13:54:49 -04001035 *
1036 * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in
1037 * many cases this function simply updates iov_base pointers in
1038 * rq_rcv_buf to point directly to the received reply data, to
1039 * avoid copying reply data.
Chuck Lever64695bde2016-06-29 13:54:58 -04001040 *
1041 * Returns the count of bytes which had to be memcopied.
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001042 */
Chuck Lever64695bde2016-06-29 13:54:58 -04001043static unsigned long
Tom Talpey9191ca32008-10-09 15:01:11 -04001044rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001045{
Chuck Lever64695bde2016-06-29 13:54:58 -04001046 unsigned long fixup_copy_count;
1047 int i, npages, curlen;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001048 char *destp;
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001049 struct page **ppages;
1050 int page_base;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001051
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001052 /* The head iovec is redirected to the RPC reply message
1053 * in the receive buffer, to avoid a memcopy.
1054 */
1055 rqst->rq_rcv_buf.head[0].iov_base = srcp;
Chuck Levercfabe2c2016-06-29 13:54:49 -04001056 rqst->rq_private_buf.head[0].iov_base = srcp;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001057
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001058 /* The contents of the receive buffer that follow
1059 * head.iov_len bytes are copied into the page list.
1060 */
1061 curlen = rqst->rq_rcv_buf.head[0].iov_len;
1062 if (curlen > copy_len)
1063 curlen = copy_len;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001064 srcp += curlen;
1065 copy_len -= curlen;
1066
Chuck Leverd933cc32017-06-08 11:53:16 -04001067 ppages = rqst->rq_rcv_buf.pages +
1068 (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1069 page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
Chuck Lever64695bde2016-06-29 13:54:58 -04001070 fixup_copy_count = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001071 if (copy_len && rqst->rq_rcv_buf.page_len) {
Chuck Lever80414ab2016-06-29 13:54:33 -04001072 int pagelist_len;
1073
1074 pagelist_len = rqst->rq_rcv_buf.page_len;
1075 if (pagelist_len > copy_len)
1076 pagelist_len = copy_len;
1077 npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
Chuck Lever64695bde2016-06-29 13:54:58 -04001078 for (i = 0; i < npages; i++) {
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001079 curlen = PAGE_SIZE - page_base;
Chuck Lever80414ab2016-06-29 13:54:33 -04001080 if (curlen > pagelist_len)
1081 curlen = pagelist_len;
1082
Cong Wangb8541782011-11-25 23:14:40 +08001083 destp = kmap_atomic(ppages[i]);
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001084 memcpy(destp + page_base, srcp, curlen);
1085 flush_dcache_page(ppages[i]);
Cong Wangb8541782011-11-25 23:14:40 +08001086 kunmap_atomic(destp);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001087 srcp += curlen;
1088 copy_len -= curlen;
Chuck Lever64695bde2016-06-29 13:54:58 -04001089 fixup_copy_count += curlen;
Chuck Lever80414ab2016-06-29 13:54:33 -04001090 pagelist_len -= curlen;
1091 if (!pagelist_len)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001092 break;
Tom Tuckerbd7ea312011-02-09 19:45:28 +00001093 page_base = 0;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001094 }
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001095
1096 /* Implicit padding for the last segment in a Write
1097 * chunk is inserted inline at the front of the tail
1098 * iovec. The upper layer ignores the content of
1099 * the pad. Simply ensure inline content in the tail
1100 * that follows the Write chunk is properly aligned.
1101 */
1102 if (pad)
1103 srcp -= pad;
Chuck Lever2b7bbc92014-03-12 12:51:30 -04001104 }
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001105
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001106 /* The tail iovec is redirected to the remaining data
1107 * in the receive buffer, to avoid a memcopy.
1108 */
Chuck Levercfabe2c2016-06-29 13:54:49 -04001109 if (copy_len || pad) {
Chuck Levercb0ae1f2016-06-29 13:54:41 -04001110 rqst->rq_rcv_buf.tail[0].iov_base = srcp;
Chuck Levercfabe2c2016-06-29 13:54:49 -04001111 rqst->rq_private_buf.tail[0].iov_base = srcp;
1112 }
Tom Talpey9191ca32008-10-09 15:01:11 -04001113
Chuck Leverd4957f02019-10-23 10:02:03 -04001114 if (fixup_copy_count)
1115 trace_xprtrdma_fixup(rqst, fixup_copy_count);
Chuck Lever64695bde2016-06-29 13:54:58 -04001116 return fixup_copy_count;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001117}
1118
Chuck Lever63cae472015-10-24 17:28:08 -04001119/* By convention, backchannel calls arrive via rdma_msg type
1120 * messages, and never populate the chunk lists. This makes
1121 * the RPC/RDMA header small and fixed in size, so it is
1122 * straightforward to check the RPC header's direction field.
1123 */
1124static bool
Chuck Lever5381e0e2017-10-16 15:01:14 -04001125rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
Chuck Lever41c8f702017-08-03 14:30:11 -04001126#if defined(CONFIG_SUNRPC_BACKCHANNEL)
Chuck Lever63cae472015-10-24 17:28:08 -04001127{
Chuck Lever41c8f702017-08-03 14:30:11 -04001128 struct xdr_stream *xdr = &rep->rr_stream;
1129 __be32 *p;
Chuck Lever63cae472015-10-24 17:28:08 -04001130
Chuck Lever5381e0e2017-10-16 15:01:14 -04001131 if (rep->rr_proc != rdma_msg)
Chuck Lever63cae472015-10-24 17:28:08 -04001132 return false;
1133
Chuck Lever41c8f702017-08-03 14:30:11 -04001134 /* Peek at stream contents without advancing. */
1135 p = xdr_inline_decode(xdr, 0);
1136
1137 /* Chunk lists */
Chuck Lever07e9a632020-03-28 13:43:22 -04001138 if (xdr_item_is_present(p++))
Chuck Lever63cae472015-10-24 17:28:08 -04001139 return false;
Chuck Lever07e9a632020-03-28 13:43:22 -04001140 if (xdr_item_is_present(p++))
Chuck Lever41c8f702017-08-03 14:30:11 -04001141 return false;
Chuck Lever07e9a632020-03-28 13:43:22 -04001142 if (xdr_item_is_present(p++))
Chuck Lever63cae472015-10-24 17:28:08 -04001143 return false;
1144
Chuck Lever41c8f702017-08-03 14:30:11 -04001145 /* RPC header */
Chuck Lever5381e0e2017-10-16 15:01:14 -04001146 if (*p++ != rep->rr_xid)
Chuck Lever41c8f702017-08-03 14:30:11 -04001147 return false;
1148 if (*p != cpu_to_be32(RPC_CALL))
1149 return false;
1150
1151 /* Now that we are sure this is a backchannel call,
1152 * advance to the RPC header.
1153 */
1154 p = xdr_inline_decode(xdr, 3 * sizeof(*p));
1155 if (unlikely(!p))
Chuck Lever84dff5e2021-02-04 11:59:19 -05001156 return true;
Chuck Lever41c8f702017-08-03 14:30:11 -04001157
1158 rpcrdma_bc_receive_call(r_xprt, rep);
Chuck Lever63cae472015-10-24 17:28:08 -04001159 return true;
1160}
Chuck Lever41c8f702017-08-03 14:30:11 -04001161#else /* CONFIG_SUNRPC_BACKCHANNEL */
1162{
1163 return false;
Chuck Lever63cae472015-10-24 17:28:08 -04001164}
1165#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1166
Chuck Lever264b0cd2017-08-03 14:30:27 -04001167static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1168{
Chuck Levere11b7c92017-12-20 16:31:04 -05001169 u32 handle;
1170 u64 offset;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001171 __be32 *p;
1172
1173 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1174 if (unlikely(!p))
1175 return -EIO;
1176
Chuck Leverf60a0862020-03-29 16:44:13 -04001177 xdr_decode_rdma_segment(p, &handle, length, &offset);
Chuck Levere11b7c92017-12-20 16:31:04 -05001178 trace_xprtrdma_decode_seg(handle, *length, offset);
Chuck Lever264b0cd2017-08-03 14:30:27 -04001179 return 0;
1180}
1181
1182static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1183{
1184 u32 segcount, seglength;
1185 __be32 *p;
1186
1187 p = xdr_inline_decode(xdr, sizeof(*p));
1188 if (unlikely(!p))
1189 return -EIO;
1190
1191 *length = 0;
1192 segcount = be32_to_cpup(p);
1193 while (segcount--) {
1194 if (decode_rdma_segment(xdr, &seglength))
1195 return -EIO;
1196 *length += seglength;
1197 }
1198
Chuck Lever264b0cd2017-08-03 14:30:27 -04001199 return 0;
1200}
1201
1202/* In RPC-over-RDMA Version One replies, a Read list is never
1203 * expected. This decoder is a stub that returns an error if
1204 * a Read list is present.
1205 */
1206static int decode_read_list(struct xdr_stream *xdr)
1207{
1208 __be32 *p;
1209
1210 p = xdr_inline_decode(xdr, sizeof(*p));
1211 if (unlikely(!p))
1212 return -EIO;
Chuck Lever07e9a632020-03-28 13:43:22 -04001213 if (unlikely(xdr_item_is_present(p)))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001214 return -EIO;
1215 return 0;
1216}
1217
1218/* Supports only one Write chunk in the Write list
1219 */
1220static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1221{
1222 u32 chunklen;
1223 bool first;
1224 __be32 *p;
1225
1226 *length = 0;
1227 first = true;
1228 do {
1229 p = xdr_inline_decode(xdr, sizeof(*p));
1230 if (unlikely(!p))
1231 return -EIO;
Chuck Lever07e9a632020-03-28 13:43:22 -04001232 if (xdr_item_is_absent(p))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001233 break;
1234 if (!first)
1235 return -EIO;
1236
1237 if (decode_write_chunk(xdr, &chunklen))
1238 return -EIO;
1239 *length += chunklen;
1240 first = false;
1241 } while (true);
1242 return 0;
1243}
1244
1245static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1246{
1247 __be32 *p;
1248
1249 p = xdr_inline_decode(xdr, sizeof(*p));
1250 if (unlikely(!p))
1251 return -EIO;
1252
1253 *length = 0;
Chuck Lever07e9a632020-03-28 13:43:22 -04001254 if (xdr_item_is_present(p))
Chuck Lever264b0cd2017-08-03 14:30:27 -04001255 if (decode_write_chunk(xdr, length))
1256 return -EIO;
1257 return 0;
1258}
1259
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001260static int
1261rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1262 struct rpc_rqst *rqst)
1263{
1264 struct xdr_stream *xdr = &rep->rr_stream;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001265 u32 writelist, replychunk, rpclen;
1266 char *base;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001267
Chuck Lever264b0cd2017-08-03 14:30:27 -04001268 /* Decode the chunk lists */
1269 if (decode_read_list(xdr))
1270 return -EIO;
1271 if (decode_write_list(xdr, &writelist))
1272 return -EIO;
1273 if (decode_reply_chunk(xdr, &replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001274 return -EIO;
1275
Chuck Lever264b0cd2017-08-03 14:30:27 -04001276 /* RDMA_MSG sanity checks */
1277 if (unlikely(replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001278 return -EIO;
1279
Chuck Lever264b0cd2017-08-03 14:30:27 -04001280 /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
1281 base = (char *)xdr_inline_decode(xdr, 0);
1282 rpclen = xdr_stream_remaining(xdr);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001283 r_xprt->rx_stats.fixup_copy_count +=
Chuck Lever264b0cd2017-08-03 14:30:27 -04001284 rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001285
Chuck Lever264b0cd2017-08-03 14:30:27 -04001286 r_xprt->rx_stats.total_rdma_reply += writelist;
1287 return rpclen + xdr_align_size(writelist);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001288}
1289
1290static noinline int
1291rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1292{
1293 struct xdr_stream *xdr = &rep->rr_stream;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001294 u32 writelist, replychunk;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001295
Chuck Lever264b0cd2017-08-03 14:30:27 -04001296 /* Decode the chunk lists */
1297 if (decode_read_list(xdr))
1298 return -EIO;
1299 if (decode_write_list(xdr, &writelist))
1300 return -EIO;
1301 if (decode_reply_chunk(xdr, &replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001302 return -EIO;
1303
Chuck Lever264b0cd2017-08-03 14:30:27 -04001304 /* RDMA_NOMSG sanity checks */
1305 if (unlikely(writelist))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001306 return -EIO;
Chuck Lever264b0cd2017-08-03 14:30:27 -04001307 if (unlikely(!replychunk))
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001308 return -EIO;
1309
Chuck Lever264b0cd2017-08-03 14:30:27 -04001310 /* Reply chunk buffer already is the reply vector */
1311 r_xprt->rx_stats.total_rdma_reply += replychunk;
1312 return replychunk;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001313}
1314
1315static noinline int
1316rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1317 struct rpc_rqst *rqst)
1318{
1319 struct xdr_stream *xdr = &rep->rr_stream;
1320 __be32 *p;
1321
1322 p = xdr_inline_decode(xdr, sizeof(*p));
1323 if (unlikely(!p))
1324 return -EIO;
1325
1326 switch (*p) {
1327 case err_vers:
1328 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1329 if (!p)
1330 break;
Chuck Lever3821e232020-11-09 14:39:15 -05001331 trace_xprtrdma_err_vers(rqst, p, p + 1);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001332 break;
1333 case err_chunk:
Chuck Lever3821e232020-11-09 14:39:15 -05001334 trace_xprtrdma_err_chunk(rqst);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001335 break;
1336 default:
Chuck Lever3821e232020-11-09 14:39:15 -05001337 trace_xprtrdma_err_unrecognized(rqst, p);
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001338 }
1339
Chuck Lever7b2182e2020-06-15 09:21:13 -04001340 return -EIO;
Chuck Lever07ff2dd2017-08-03 14:30:19 -04001341}
1342
Chuck Lever8a053432021-04-19 14:03:19 -04001343/**
1344 * rpcrdma_unpin_rqst - Release rqst without completing it
1345 * @rep: RPC/RDMA Receive context
1346 *
1347 * This is done when a connection is lost so that a Reply
1348 * can be dropped and its matching Call can be subsequently
1349 * retransmitted on a new connection.
1350 */
1351void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
1352{
1353 struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
1354 struct rpc_rqst *rqst = rep->rr_rqst;
1355 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
1356
1357 req->rl_reply = NULL;
1358 rep->rr_rqst = NULL;
1359
1360 spin_lock(&xprt->queue_lock);
1361 xprt_unpin_rqst(rqst);
1362 spin_unlock(&xprt->queue_lock);
1363}
1364
1365/**
1366 * rpcrdma_complete_rqst - Pass completed rqst back to RPC
1367 * @rep: RPC/RDMA Receive context
1368 *
1369 * Reconstruct the RPC reply and complete the transaction
1370 * while @rqst is still pinned to ensure the rep, rqst, and
1371 * rq_task pointers remain stable.
Chuck Levere1352c92017-10-16 15:01:22 -04001372 */
1373void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1374{
1375 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1376 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1377 struct rpc_rqst *rqst = rep->rr_rqst;
Chuck Levere1352c92017-10-16 15:01:22 -04001378 int status;
1379
Chuck Levere1352c92017-10-16 15:01:22 -04001380 switch (rep->rr_proc) {
1381 case rdma_msg:
1382 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1383 break;
1384 case rdma_nomsg:
1385 status = rpcrdma_decode_nomsg(r_xprt, rep);
1386 break;
1387 case rdma_error:
1388 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1389 break;
1390 default:
1391 status = -EIO;
1392 }
1393 if (status < 0)
1394 goto out_badheader;
1395
1396out:
Trond Myklebust75c84152018-08-31 10:21:00 -04001397 spin_lock(&xprt->queue_lock);
Chuck Levere1352c92017-10-16 15:01:22 -04001398 xprt_complete_rqst(rqst->rq_task, status);
1399 xprt_unpin_rqst(rqst);
Trond Myklebust75c84152018-08-31 10:21:00 -04001400 spin_unlock(&xprt->queue_lock);
Chuck Levere1352c92017-10-16 15:01:22 -04001401 return;
1402
Chuck Levere1352c92017-10-16 15:01:22 -04001403out_badheader:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001404 trace_xprtrdma_reply_hdr_err(rep);
Chuck Levere1352c92017-10-16 15:01:22 -04001405 r_xprt->rx_stats.bad_reply_count++;
Chuck Lever7b2182e2020-06-15 09:21:13 -04001406 rqst->rq_task->tk_status = status;
1407 status = 0;
Chuck Levere1352c92017-10-16 15:01:22 -04001408 goto out;
1409}
1410
Chuck Lever0ab11522019-06-19 10:33:15 -04001411static void rpcrdma_reply_done(struct kref *kref)
Chuck Lever0ba6f372017-10-20 10:48:28 -04001412{
Chuck Lever0ab11522019-06-19 10:33:15 -04001413 struct rpcrdma_req *req =
1414 container_of(kref, struct rpcrdma_req, rl_kref);
Chuck Lever01bb35c2017-10-20 10:48:36 -04001415
Chuck Lever0ab11522019-06-19 10:33:15 -04001416 rpcrdma_complete_rqst(req->rl_reply);
Chuck Lever0ba6f372017-10-20 10:48:28 -04001417}
1418
Chuck Leverd8099fe2019-06-19 10:33:10 -04001419/**
1420 * rpcrdma_reply_handler - Process received RPC/RDMA messages
1421 * @rep: Incoming rpcrdma_rep object to process
Chuck Leverfe97b472015-10-24 17:27:10 -04001422 *
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001423 * Errors must result in the RPC task either being awakened, or
1424 * allowed to timeout, to discover the errors at that time.
1425 */
Chuck Leverd8f532d2017-10-16 15:01:30 -04001426void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001427{
Chuck Lever431af642017-06-08 11:52:20 -04001428 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
Chuck Lever431af642017-06-08 11:52:20 -04001429 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
Chuck Leverbe798f92017-10-16 15:01:39 -04001430 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001431 struct rpcrdma_req *req;
1432 struct rpc_rqst *rqst;
Chuck Leverbe798f92017-10-16 15:01:39 -04001433 u32 credits;
Chuck Lever5381e0e2017-10-16 15:01:14 -04001434 __be32 *p;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001435
Chuck Leverf9e1afe2019-08-26 13:12:51 -04001436 /* Any data means we had a useful conversation, so
1437 * then we don't need to delay the next reconnect.
1438 */
1439 if (xprt->reestablish_timeout)
1440 xprt->reestablish_timeout = 0;
1441
Chuck Lever7c8d9e72018-05-04 15:35:20 -04001442 /* Fixed transport header fields */
Chuck Lever5381e0e2017-10-16 15:01:14 -04001443 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
Chuck Lever0ccc61b2019-02-11 11:24:05 -05001444 rep->rr_hdrbuf.head[0].iov_base, NULL);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001445 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
Chuck Lever96f87782017-08-03 14:30:03 -04001446 if (unlikely(!p))
Chuck Leverb0e178a2015-10-24 17:26:54 -04001447 goto out_shortreply;
Chuck Lever5381e0e2017-10-16 15:01:14 -04001448 rep->rr_xid = *p++;
1449 rep->rr_vers = *p++;
Chuck Leverbe798f92017-10-16 15:01:39 -04001450 credits = be32_to_cpu(*p++);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001451 rep->rr_proc = *p++;
Chuck Leverb0e178a2015-10-24 17:26:54 -04001452
Chuck Lever5381e0e2017-10-16 15:01:14 -04001453 if (rep->rr_vers != rpcrdma_version)
Chuck Lever61433af2017-10-16 15:01:06 -04001454 goto out_badversion;
1455
Chuck Lever5381e0e2017-10-16 15:01:14 -04001456 if (rpcrdma_is_bcall(r_xprt, rep))
Chuck Lever41c8f702017-08-03 14:30:11 -04001457 return;
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001458
Chuck Leverfe97b472015-10-24 17:27:10 -04001459 /* Match incoming rpcrdma_rep to an rpcrdma_req to
1460 * get context for handling any incoming chunks.
1461 */
Trond Myklebust75c84152018-08-31 10:21:00 -04001462 spin_lock(&xprt->queue_lock);
Chuck Lever5381e0e2017-10-16 15:01:14 -04001463 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
Chuck Lever9590d082017-08-23 17:05:58 -04001464 if (!rqst)
1465 goto out_norqst;
1466 xprt_pin_rqst(rqst);
Trond Myklebust93bdcf92018-10-18 17:29:00 -04001467 spin_unlock(&xprt->queue_lock);
Chuck Leverbe798f92017-10-16 15:01:39 -04001468
1469 if (credits == 0)
1470 credits = 1; /* don't deadlock */
Chuck Levere28ce902020-02-21 17:01:05 -05001471 else if (credits > r_xprt->rx_ep->re_max_requests)
1472 credits = r_xprt->rx_ep->re_max_requests;
Chuck Lever35d8b102021-04-19 14:02:41 -04001473 rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
1474 false);
Chuck Levereea63ca2019-10-09 13:07:32 -04001475 if (buf->rb_credits != credits)
1476 rpcrdma_update_cwnd(r_xprt, credits);
Chuck Leverbe798f92017-10-16 15:01:39 -04001477
Chuck Lever9590d082017-08-23 17:05:58 -04001478 req = rpcr_to_rdmar(rqst);
Chuck Lever03ffd922020-11-09 14:39:47 -05001479 if (unlikely(req->rl_reply))
Chuck Leverc35ca602021-04-19 14:02:47 -04001480 rpcrdma_rep_put(buf, req->rl_reply);
Chuck Lever4b196dc62017-06-08 11:51:56 -04001481 req->rl_reply = rep;
Chuck Levere1352c92017-10-16 15:01:22 -04001482 rep->rr_rqst = rqst;
Chuck Lever431af642017-06-08 11:52:20 -04001483
Chuck Lever03ffd922020-11-09 14:39:47 -05001484 trace_xprtrdma_reply(rqst->rq_task, rep, credits);
Chuck Leverd8099fe2019-06-19 10:33:10 -04001485
1486 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1487 frwr_reminv(rep, &req->rl_registered);
Chuck Lever0ab11522019-06-19 10:33:15 -04001488 if (!list_empty(&req->rl_registered))
Chuck Leverd8099fe2019-06-19 10:33:10 -04001489 frwr_unmap_async(r_xprt, req);
1490 /* LocalInv completion will complete the RPC */
Chuck Lever0ab11522019-06-19 10:33:15 -04001491 else
1492 kref_put(&req->rl_kref, rpcrdma_reply_done);
Chuck Leverb0e178a2015-10-24 17:26:54 -04001493 return;
1494
Chuck Lever61433af2017-10-16 15:01:06 -04001495out_badversion:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001496 trace_xprtrdma_reply_vers_err(rep);
Chuck Lever6ceea362018-12-19 10:58:24 -05001497 goto out;
Chuck Lever61433af2017-10-16 15:01:06 -04001498
Chuck Lever431af642017-06-08 11:52:20 -04001499out_norqst:
Trond Myklebust75c84152018-08-31 10:21:00 -04001500 spin_unlock(&xprt->queue_lock);
Chuck Lever3a9568f2020-11-09 14:39:42 -05001501 trace_xprtrdma_reply_rqst_err(rep);
Chuck Lever6ceea362018-12-19 10:58:24 -05001502 goto out;
Chuck Leverb0e178a2015-10-24 17:26:54 -04001503
Chuck Lever9590d082017-08-23 17:05:58 -04001504out_shortreply:
Chuck Lever3a9568f2020-11-09 14:39:42 -05001505 trace_xprtrdma_reply_short_err(rep);
Chuck Leverb0e178a2015-10-24 17:26:54 -04001506
Chuck Lever6ceea362018-12-19 10:58:24 -05001507out:
Chuck Leverc35ca602021-04-19 14:02:47 -04001508 rpcrdma_rep_put(buf, rep);
\"Talpey, Thomas\e9601822007-09-10 13:50:42 -04001509}