blob: dff318a2d5bf416ff152df9e66c7f51fd7e522d5 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Ursula Brauna046d572017-01-09 16:55:16 +01002/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
7 *
8 * Copyright IBM Corp. 2016
9 *
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 */
12
13#include <linux/in.h>
Ursula Braun143c0172017-01-12 14:57:15 +010014#include <linux/if_ether.h>
Ingo Molnarc3edc402017-02-02 08:35:14 +010015#include <linux/sched/signal.h>
16
Ursula Brauna046d572017-01-09 16:55:16 +010017#include <net/sock.h>
18#include <net/tcp.h>
19
20#include "smc.h"
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010021#include "smc_core.h"
Ursula Brauna046d572017-01-09 16:55:16 +010022#include "smc_clc.h"
23#include "smc_ib.h"
24
Stefan Raspl0f627122018-03-01 13:51:26 +010025/* eye catcher "SMCR" EBCDIC for CLC messages */
26static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
27
Ursula Braune7b7a642017-12-07 13:38:49 +010028/* check if received message has a correct header length and contains valid
29 * heading and trailing eyecatchers
30 */
31static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
32{
33 struct smc_clc_msg_proposal_prefix *pclc_prfx;
34 struct smc_clc_msg_accept_confirm *clc;
35 struct smc_clc_msg_proposal *pclc;
36 struct smc_clc_msg_decline *dclc;
37 struct smc_clc_msg_trail *trl;
38
39 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
40 return false;
41 switch (clcm->type) {
42 case SMC_CLC_PROPOSAL:
43 pclc = (struct smc_clc_msg_proposal *)clcm;
44 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
45 if (ntohs(pclc->hdr.length) !=
46 sizeof(*pclc) + ntohs(pclc->iparea_offset) +
47 sizeof(*pclc_prfx) +
48 pclc_prfx->ipv6_prefixes_cnt *
49 sizeof(struct smc_clc_ipv6_prefix) +
50 sizeof(*trl))
51 return false;
52 trl = (struct smc_clc_msg_trail *)
53 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
54 break;
55 case SMC_CLC_ACCEPT:
56 case SMC_CLC_CONFIRM:
57 clc = (struct smc_clc_msg_accept_confirm *)clcm;
58 if (ntohs(clc->hdr.length) != sizeof(*clc))
59 return false;
60 trl = &clc->trl;
61 break;
62 case SMC_CLC_DECLINE:
63 dclc = (struct smc_clc_msg_decline *)clcm;
64 if (ntohs(dclc->hdr.length) != sizeof(*dclc))
65 return false;
66 trl = &dclc->trl;
67 break;
68 default:
69 return false;
70 }
71 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
72 return false;
73 return true;
74}
75
Ursula Brauna046d572017-01-09 16:55:16 +010076/* Wait for data on the tcp-socket, analyze received data
77 * Returns:
78 * 0 if success and it was not a decline that we received.
79 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
80 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
81 */
82int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
83 u8 expected_type)
84{
85 struct sock *clc_sk = smc->clcsock->sk;
86 struct smc_clc_msg_hdr *clcm = buf;
87 struct msghdr msg = {NULL, 0};
88 int reason_code = 0;
Al Virod63d2712017-09-20 20:21:22 -040089 struct kvec vec = {buf, buflen};
Ursula Brauna046d572017-01-09 16:55:16 +010090 int len, datlen;
91 int krflags;
92
93 /* peek the first few bytes to determine length of data to receive
94 * so we don't consume any subsequent CLC message or payload data
95 * in the TCP byte stream
96 */
Al Virod63d2712017-09-20 20:21:22 -040097 /*
98 * Caller must make sure that buflen is no less than
99 * sizeof(struct smc_clc_msg_hdr)
100 */
Ursula Brauna046d572017-01-09 16:55:16 +0100101 krflags = MSG_PEEK | MSG_WAITALL;
102 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
Al Virod63d2712017-09-20 20:21:22 -0400103 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
104 sizeof(struct smc_clc_msg_hdr));
105 len = sock_recvmsg(smc->clcsock, &msg, krflags);
Ursula Brauna046d572017-01-09 16:55:16 +0100106 if (signal_pending(current)) {
107 reason_code = -EINTR;
108 clc_sk->sk_err = EINTR;
109 smc->sk.sk_err = EINTR;
110 goto out;
111 }
112 if (clc_sk->sk_err) {
113 reason_code = -clc_sk->sk_err;
114 smc->sk.sk_err = clc_sk->sk_err;
115 goto out;
116 }
117 if (!len) { /* peer has performed orderly shutdown */
118 smc->sk.sk_err = ECONNRESET;
119 reason_code = -ECONNRESET;
120 goto out;
121 }
122 if (len < 0) {
123 smc->sk.sk_err = -len;
124 reason_code = len;
125 goto out;
126 }
127 datlen = ntohs(clcm->length);
128 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
Ursula Braune7b7a642017-12-07 13:38:49 +0100129 (datlen > buflen) ||
Ursula Brauna046d572017-01-09 16:55:16 +0100130 ((clcm->type != SMC_CLC_DECLINE) &&
131 (clcm->type != expected_type))) {
132 smc->sk.sk_err = EPROTO;
133 reason_code = -EPROTO;
134 goto out;
135 }
136
137 /* receive the complete CLC message */
Ursula Brauna046d572017-01-09 16:55:16 +0100138 memset(&msg, 0, sizeof(struct msghdr));
Al Virod63d2712017-09-20 20:21:22 -0400139 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
Ursula Brauna046d572017-01-09 16:55:16 +0100140 krflags = MSG_WAITALL;
141 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
Al Virod63d2712017-09-20 20:21:22 -0400142 len = sock_recvmsg(smc->clcsock, &msg, krflags);
Ursula Braune7b7a642017-12-07 13:38:49 +0100143 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
Ursula Brauna046d572017-01-09 16:55:16 +0100144 smc->sk.sk_err = EPROTO;
145 reason_code = -EPROTO;
146 goto out;
147 }
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100148 if (clcm->type == SMC_CLC_DECLINE) {
Ursula Brauna046d572017-01-09 16:55:16 +0100149 reason_code = SMC_CLC_DECL_REPLY;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200150 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100151 smc->conn.lgr->sync_err = true;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200152 smc_lgr_terminate(smc->conn.lgr);
153 }
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100154 }
155
Ursula Brauna046d572017-01-09 16:55:16 +0100156out:
157 return reason_code;
158}
159
160/* send CLC DECLINE message across internal TCP socket */
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200161int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
Ursula Brauna046d572017-01-09 16:55:16 +0100162{
163 struct smc_clc_msg_decline dclc;
164 struct msghdr msg;
165 struct kvec vec;
166 int len;
167
168 memset(&dclc, 0, sizeof(dclc));
169 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
170 dclc.hdr.type = SMC_CLC_DECLINE;
171 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
172 dclc.hdr.version = SMC_CLC_V1;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200173 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
Ursula Brauna046d572017-01-09 16:55:16 +0100174 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
175 dclc.peer_diagnosis = htonl(peer_diag_info);
176 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
177
178 memset(&msg, 0, sizeof(msg));
179 vec.iov_base = &dclc;
180 vec.iov_len = sizeof(struct smc_clc_msg_decline);
181 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
182 sizeof(struct smc_clc_msg_decline));
183 if (len < sizeof(struct smc_clc_msg_decline))
184 smc->sk.sk_err = EPROTO;
185 if (len < 0)
186 smc->sk.sk_err = -len;
Ursula Braun0c9f1512017-12-07 13:38:45 +0100187 return sock_error(&smc->sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100188}
189
190/* send CLC PROPOSAL message across internal TCP socket */
191int smc_clc_send_proposal(struct smc_sock *smc,
192 struct smc_ib_device *smcibdev,
193 u8 ibport)
194{
Ursula Braune7b7a642017-12-07 13:38:49 +0100195 struct smc_clc_msg_proposal_prefix pclc_prfx;
Ursula Brauna046d572017-01-09 16:55:16 +0100196 struct smc_clc_msg_proposal pclc;
Ursula Braune7b7a642017-12-07 13:38:49 +0100197 struct smc_clc_msg_trail trl;
Ursula Brauna046d572017-01-09 16:55:16 +0100198 int reason_code = 0;
Ursula Braune7b7a642017-12-07 13:38:49 +0100199 struct kvec vec[3];
Ursula Brauna046d572017-01-09 16:55:16 +0100200 struct msghdr msg;
Ursula Braune7b7a642017-12-07 13:38:49 +0100201 int len, plen, rc;
Ursula Brauna046d572017-01-09 16:55:16 +0100202
203 /* send SMC Proposal CLC message */
Ursula Braune7b7a642017-12-07 13:38:49 +0100204 plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
Ursula Brauna046d572017-01-09 16:55:16 +0100205 memset(&pclc, 0, sizeof(pclc));
206 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
207 pclc.hdr.type = SMC_CLC_PROPOSAL;
Ursula Braune7b7a642017-12-07 13:38:49 +0100208 pclc.hdr.length = htons(plen);
Ursula Brauna046d572017-01-09 16:55:16 +0100209 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
210 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
211 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100212 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
Ursula Braune7b7a642017-12-07 13:38:49 +0100213 pclc.iparea_offset = htons(0);
Ursula Brauna046d572017-01-09 16:55:16 +0100214
Ursula Braune7b7a642017-12-07 13:38:49 +0100215 memset(&pclc_prfx, 0, sizeof(pclc_prfx));
Ursula Brauna046d572017-01-09 16:55:16 +0100216 /* determine subnet and mask from internal TCP socket */
Ursula Braune7b7a642017-12-07 13:38:49 +0100217 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
218 &pclc_prfx.prefix_len);
Ursula Brauna046d572017-01-09 16:55:16 +0100219 if (rc)
220 return SMC_CLC_DECL_CNFERR; /* configuration error */
Ursula Braune7b7a642017-12-07 13:38:49 +0100221 pclc_prfx.ipv6_prefixes_cnt = 0;
222 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
Ursula Brauna046d572017-01-09 16:55:16 +0100223 memset(&msg, 0, sizeof(msg));
Ursula Braune7b7a642017-12-07 13:38:49 +0100224 vec[0].iov_base = &pclc;
225 vec[0].iov_len = sizeof(pclc);
226 vec[1].iov_base = &pclc_prfx;
227 vec[1].iov_len = sizeof(pclc_prfx);
228 vec[2].iov_base = &trl;
229 vec[2].iov_len = sizeof(trl);
Ursula Brauna046d572017-01-09 16:55:16 +0100230 /* due to the few bytes needed for clc-handshake this cannot block */
Ursula Braune7b7a642017-12-07 13:38:49 +0100231 len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
Ursula Brauna046d572017-01-09 16:55:16 +0100232 if (len < sizeof(pclc)) {
233 if (len >= 0) {
234 reason_code = -ENETUNREACH;
235 smc->sk.sk_err = -reason_code;
236 } else {
237 smc->sk.sk_err = smc->clcsock->sk->sk_err;
238 reason_code = -smc->sk.sk_err;
239 }
240 }
241
242 return reason_code;
243}
244
245/* send CLC CONFIRM message across internal TCP socket */
246int smc_clc_send_confirm(struct smc_sock *smc)
247{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100248 struct smc_connection *conn = &smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100249 struct smc_clc_msg_accept_confirm cclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100250 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100251 int reason_code = 0;
252 struct msghdr msg;
253 struct kvec vec;
254 int len;
255
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100256 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100257 /* send SMC Confirm CLC msg */
258 memset(&cclc, 0, sizeof(cclc));
259 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
260 cclc.hdr.type = SMC_CLC_CONFIRM;
261 cclc.hdr.length = htons(sizeof(cclc));
262 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
263 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100264 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
265 SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100266 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100267 hton24(cclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100268 cclc.rmb_rkey =
Ursula Braun897e1c22017-07-28 13:56:16 +0200269 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100270 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100271 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
272 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100273 cclc.rmbe_size = conn->rmbe_size_short;
Ursula Brauna3fe3d02017-07-28 13:56:15 +0200274 cclc.rmb_dma_addr = cpu_to_be64(
275 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100276 hton24(cclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100277
278 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
279
280 memset(&msg, 0, sizeof(msg));
281 vec.iov_base = &cclc;
282 vec.iov_len = sizeof(cclc);
283 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
284 if (len < sizeof(cclc)) {
285 if (len >= 0) {
286 reason_code = -ENETUNREACH;
287 smc->sk.sk_err = -reason_code;
288 } else {
289 smc->sk.sk_err = smc->clcsock->sk->sk_err;
290 reason_code = -smc->sk.sk_err;
291 }
292 }
293 return reason_code;
294}
295
296/* send CLC ACCEPT message across internal TCP socket */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100297int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
Ursula Brauna046d572017-01-09 16:55:16 +0100298{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100299 struct smc_connection *conn = &new_smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100300 struct smc_clc_msg_accept_confirm aclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100301 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100302 struct msghdr msg;
303 struct kvec vec;
304 int rc = 0;
305 int len;
306
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100307 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100308 memset(&aclc, 0, sizeof(aclc));
309 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
310 aclc.hdr.type = SMC_CLC_ACCEPT;
311 aclc.hdr.length = htons(sizeof(aclc));
312 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100313 if (srv_first_contact)
314 aclc.hdr.flag = 1;
Ursula Brauna046d572017-01-09 16:55:16 +0100315 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100316 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
317 SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100318 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100319 hton24(aclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100320 aclc.rmb_rkey =
Ursula Braun897e1c22017-07-28 13:56:16 +0200321 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100322 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100323 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
324 aclc.qp_mtu = link->path_mtu;
Ursula Brauncd6851f2017-01-09 16:55:18 +0100325 aclc.rmbe_size = conn->rmbe_size_short,
Ursula Brauna3fe3d02017-07-28 13:56:15 +0200326 aclc.rmb_dma_addr = cpu_to_be64(
327 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100328 hton24(aclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100329 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
330
331 memset(&msg, 0, sizeof(msg));
332 vec.iov_base = &aclc;
333 vec.iov_len = sizeof(aclc);
334 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
335 if (len < sizeof(aclc)) {
336 if (len >= 0)
337 new_smc->sk.sk_err = EPROTO;
338 else
339 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
340 rc = sock_error(&new_smc->sk);
341 }
342
343 return rc;
344}