blob: 874c5a75d6dd1754127aef5c72296321f564c25a [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Ursula Brauna046d572017-01-09 16:55:16 +01002/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
7 *
8 * Copyright IBM Corp. 2016
9 *
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 */
12
13#include <linux/in.h>
Karsten Graul696cd302018-03-01 13:51:27 +010014#include <linux/inetdevice.h>
Ursula Braun143c0172017-01-12 14:57:15 +010015#include <linux/if_ether.h>
Ingo Molnarc3edc402017-02-02 08:35:14 +010016#include <linux/sched/signal.h>
17
Ursula Brauna046d572017-01-09 16:55:16 +010018#include <net/sock.h>
19#include <net/tcp.h>
20
21#include "smc.h"
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010022#include "smc_core.h"
Ursula Brauna046d572017-01-09 16:55:16 +010023#include "smc_clc.h"
24#include "smc_ib.h"
25
Stefan Raspl0f627122018-03-01 13:51:26 +010026/* eye catcher "SMCR" EBCDIC for CLC messages */
27static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
28
Ursula Braune7b7a642017-12-07 13:38:49 +010029/* check if received message has a correct header length and contains valid
30 * heading and trailing eyecatchers
31 */
32static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
33{
34 struct smc_clc_msg_proposal_prefix *pclc_prfx;
35 struct smc_clc_msg_accept_confirm *clc;
36 struct smc_clc_msg_proposal *pclc;
37 struct smc_clc_msg_decline *dclc;
38 struct smc_clc_msg_trail *trl;
39
40 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
41 return false;
42 switch (clcm->type) {
43 case SMC_CLC_PROPOSAL:
44 pclc = (struct smc_clc_msg_proposal *)clcm;
45 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
46 if (ntohs(pclc->hdr.length) !=
47 sizeof(*pclc) + ntohs(pclc->iparea_offset) +
48 sizeof(*pclc_prfx) +
49 pclc_prfx->ipv6_prefixes_cnt *
50 sizeof(struct smc_clc_ipv6_prefix) +
51 sizeof(*trl))
52 return false;
53 trl = (struct smc_clc_msg_trail *)
54 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
55 break;
56 case SMC_CLC_ACCEPT:
57 case SMC_CLC_CONFIRM:
58 clc = (struct smc_clc_msg_accept_confirm *)clcm;
59 if (ntohs(clc->hdr.length) != sizeof(*clc))
60 return false;
61 trl = &clc->trl;
62 break;
63 case SMC_CLC_DECLINE:
64 dclc = (struct smc_clc_msg_decline *)clcm;
65 if (ntohs(dclc->hdr.length) != sizeof(*dclc))
66 return false;
67 trl = &dclc->trl;
68 break;
69 default:
70 return false;
71 }
72 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
73 return false;
74 return true;
75}
76
Karsten Graul696cd302018-03-01 13:51:27 +010077/* determine subnet and mask of internal TCP socket */
78int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
79 __be32 *subnet, u8 *prefix_len)
80{
81 struct dst_entry *dst = sk_dst_get(clcsock->sk);
82 struct in_device *in_dev;
83 struct sockaddr_in addr;
84 int rc = -ENOENT;
85
86 if (!dst) {
87 rc = -ENOTCONN;
88 goto out;
89 }
90 if (!dst->dev) {
91 rc = -ENODEV;
92 goto out_rel;
93 }
94
95 /* get address to which the internal TCP socket is bound */
96 kernel_getsockname(clcsock, (struct sockaddr *)&addr);
97 /* analyze IPv4 specific data of net_device belonging to TCP socket */
98 rcu_read_lock();
99 in_dev = __in_dev_get_rcu(dst->dev);
100 for_ifa(in_dev) {
101 if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
102 continue;
103 *prefix_len = inet_mask_len(ifa->ifa_mask);
104 *subnet = ifa->ifa_address & ifa->ifa_mask;
105 rc = 0;
106 break;
107 } endfor_ifa(in_dev);
108 rcu_read_unlock();
109
110out_rel:
111 dst_release(dst);
112out:
113 return rc;
114}
115
Ursula Brauna046d572017-01-09 16:55:16 +0100116/* Wait for data on the tcp-socket, analyze received data
117 * Returns:
118 * 0 if success and it was not a decline that we received.
119 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
120 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
121 */
122int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
123 u8 expected_type)
124{
125 struct sock *clc_sk = smc->clcsock->sk;
126 struct smc_clc_msg_hdr *clcm = buf;
127 struct msghdr msg = {NULL, 0};
128 int reason_code = 0;
Al Virod63d2712017-09-20 20:21:22 -0400129 struct kvec vec = {buf, buflen};
Ursula Brauna046d572017-01-09 16:55:16 +0100130 int len, datlen;
131 int krflags;
132
133 /* peek the first few bytes to determine length of data to receive
134 * so we don't consume any subsequent CLC message or payload data
135 * in the TCP byte stream
136 */
Al Virod63d2712017-09-20 20:21:22 -0400137 /*
138 * Caller must make sure that buflen is no less than
139 * sizeof(struct smc_clc_msg_hdr)
140 */
Ursula Brauna046d572017-01-09 16:55:16 +0100141 krflags = MSG_PEEK | MSG_WAITALL;
142 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
Al Virod63d2712017-09-20 20:21:22 -0400143 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
144 sizeof(struct smc_clc_msg_hdr));
145 len = sock_recvmsg(smc->clcsock, &msg, krflags);
Ursula Brauna046d572017-01-09 16:55:16 +0100146 if (signal_pending(current)) {
147 reason_code = -EINTR;
148 clc_sk->sk_err = EINTR;
149 smc->sk.sk_err = EINTR;
150 goto out;
151 }
152 if (clc_sk->sk_err) {
153 reason_code = -clc_sk->sk_err;
154 smc->sk.sk_err = clc_sk->sk_err;
155 goto out;
156 }
157 if (!len) { /* peer has performed orderly shutdown */
158 smc->sk.sk_err = ECONNRESET;
159 reason_code = -ECONNRESET;
160 goto out;
161 }
162 if (len < 0) {
163 smc->sk.sk_err = -len;
164 reason_code = len;
165 goto out;
166 }
167 datlen = ntohs(clcm->length);
168 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
Ursula Braune7b7a642017-12-07 13:38:49 +0100169 (datlen > buflen) ||
Ursula Brauna046d572017-01-09 16:55:16 +0100170 ((clcm->type != SMC_CLC_DECLINE) &&
171 (clcm->type != expected_type))) {
172 smc->sk.sk_err = EPROTO;
173 reason_code = -EPROTO;
174 goto out;
175 }
176
177 /* receive the complete CLC message */
Ursula Brauna046d572017-01-09 16:55:16 +0100178 memset(&msg, 0, sizeof(struct msghdr));
Al Virod63d2712017-09-20 20:21:22 -0400179 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
Ursula Brauna046d572017-01-09 16:55:16 +0100180 krflags = MSG_WAITALL;
181 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
Al Virod63d2712017-09-20 20:21:22 -0400182 len = sock_recvmsg(smc->clcsock, &msg, krflags);
Ursula Braune7b7a642017-12-07 13:38:49 +0100183 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
Ursula Brauna046d572017-01-09 16:55:16 +0100184 smc->sk.sk_err = EPROTO;
185 reason_code = -EPROTO;
186 goto out;
187 }
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100188 if (clcm->type == SMC_CLC_DECLINE) {
Ursula Brauna046d572017-01-09 16:55:16 +0100189 reason_code = SMC_CLC_DECL_REPLY;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200190 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100191 smc->conn.lgr->sync_err = true;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200192 smc_lgr_terminate(smc->conn.lgr);
193 }
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100194 }
195
Ursula Brauna046d572017-01-09 16:55:16 +0100196out:
197 return reason_code;
198}
199
200/* send CLC DECLINE message across internal TCP socket */
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200201int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
Ursula Brauna046d572017-01-09 16:55:16 +0100202{
203 struct smc_clc_msg_decline dclc;
204 struct msghdr msg;
205 struct kvec vec;
206 int len;
207
208 memset(&dclc, 0, sizeof(dclc));
209 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
210 dclc.hdr.type = SMC_CLC_DECLINE;
211 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
212 dclc.hdr.version = SMC_CLC_V1;
Ursula Braunbfbedfd2017-09-21 09:16:32 +0200213 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
Ursula Brauna046d572017-01-09 16:55:16 +0100214 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
215 dclc.peer_diagnosis = htonl(peer_diag_info);
216 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
217
218 memset(&msg, 0, sizeof(msg));
219 vec.iov_base = &dclc;
220 vec.iov_len = sizeof(struct smc_clc_msg_decline);
221 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
222 sizeof(struct smc_clc_msg_decline));
223 if (len < sizeof(struct smc_clc_msg_decline))
224 smc->sk.sk_err = EPROTO;
225 if (len < 0)
226 smc->sk.sk_err = -len;
Ursula Braun0c9f1512017-12-07 13:38:45 +0100227 return sock_error(&smc->sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100228}
229
230/* send CLC PROPOSAL message across internal TCP socket */
231int smc_clc_send_proposal(struct smc_sock *smc,
232 struct smc_ib_device *smcibdev,
233 u8 ibport)
234{
Ursula Braune7b7a642017-12-07 13:38:49 +0100235 struct smc_clc_msg_proposal_prefix pclc_prfx;
Ursula Brauna046d572017-01-09 16:55:16 +0100236 struct smc_clc_msg_proposal pclc;
Ursula Braune7b7a642017-12-07 13:38:49 +0100237 struct smc_clc_msg_trail trl;
Ursula Brauna046d572017-01-09 16:55:16 +0100238 int reason_code = 0;
Ursula Braune7b7a642017-12-07 13:38:49 +0100239 struct kvec vec[3];
Ursula Brauna046d572017-01-09 16:55:16 +0100240 struct msghdr msg;
Ursula Braune7b7a642017-12-07 13:38:49 +0100241 int len, plen, rc;
Ursula Brauna046d572017-01-09 16:55:16 +0100242
243 /* send SMC Proposal CLC message */
Ursula Braune7b7a642017-12-07 13:38:49 +0100244 plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
Ursula Brauna046d572017-01-09 16:55:16 +0100245 memset(&pclc, 0, sizeof(pclc));
246 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
247 pclc.hdr.type = SMC_CLC_PROPOSAL;
Ursula Braune7b7a642017-12-07 13:38:49 +0100248 pclc.hdr.length = htons(plen);
Ursula Brauna046d572017-01-09 16:55:16 +0100249 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
250 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
251 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100252 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
Ursula Braune7b7a642017-12-07 13:38:49 +0100253 pclc.iparea_offset = htons(0);
Ursula Brauna046d572017-01-09 16:55:16 +0100254
Ursula Braune7b7a642017-12-07 13:38:49 +0100255 memset(&pclc_prfx, 0, sizeof(pclc_prfx));
Ursula Brauna046d572017-01-09 16:55:16 +0100256 /* determine subnet and mask from internal TCP socket */
Karsten Graul696cd302018-03-01 13:51:27 +0100257 rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
258 &pclc_prfx.prefix_len);
Ursula Brauna046d572017-01-09 16:55:16 +0100259 if (rc)
260 return SMC_CLC_DECL_CNFERR; /* configuration error */
Ursula Braune7b7a642017-12-07 13:38:49 +0100261 pclc_prfx.ipv6_prefixes_cnt = 0;
262 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
Ursula Brauna046d572017-01-09 16:55:16 +0100263 memset(&msg, 0, sizeof(msg));
Ursula Braune7b7a642017-12-07 13:38:49 +0100264 vec[0].iov_base = &pclc;
265 vec[0].iov_len = sizeof(pclc);
266 vec[1].iov_base = &pclc_prfx;
267 vec[1].iov_len = sizeof(pclc_prfx);
268 vec[2].iov_base = &trl;
269 vec[2].iov_len = sizeof(trl);
Ursula Brauna046d572017-01-09 16:55:16 +0100270 /* due to the few bytes needed for clc-handshake this cannot block */
Ursula Braune7b7a642017-12-07 13:38:49 +0100271 len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
Ursula Brauna046d572017-01-09 16:55:16 +0100272 if (len < sizeof(pclc)) {
273 if (len >= 0) {
274 reason_code = -ENETUNREACH;
275 smc->sk.sk_err = -reason_code;
276 } else {
277 smc->sk.sk_err = smc->clcsock->sk->sk_err;
278 reason_code = -smc->sk.sk_err;
279 }
280 }
281
282 return reason_code;
283}
284
285/* send CLC CONFIRM message across internal TCP socket */
286int smc_clc_send_confirm(struct smc_sock *smc)
287{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100288 struct smc_connection *conn = &smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100289 struct smc_clc_msg_accept_confirm cclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100290 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100291 int reason_code = 0;
292 struct msghdr msg;
293 struct kvec vec;
294 int len;
295
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100296 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100297 /* send SMC Confirm CLC msg */
298 memset(&cclc, 0, sizeof(cclc));
299 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
300 cclc.hdr.type = SMC_CLC_CONFIRM;
301 cclc.hdr.length = htons(sizeof(cclc));
302 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
303 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100304 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
305 SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100306 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100307 hton24(cclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100308 cclc.rmb_rkey =
Ursula Braun897e1c22017-07-28 13:56:16 +0200309 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100310 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100311 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
312 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100313 cclc.rmbe_size = conn->rmbe_size_short;
Ursula Brauna3fe3d02017-07-28 13:56:15 +0200314 cclc.rmb_dma_addr = cpu_to_be64(
315 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100316 hton24(cclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100317
318 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
319
320 memset(&msg, 0, sizeof(msg));
321 vec.iov_base = &cclc;
322 vec.iov_len = sizeof(cclc);
323 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
324 if (len < sizeof(cclc)) {
325 if (len >= 0) {
326 reason_code = -ENETUNREACH;
327 smc->sk.sk_err = -reason_code;
328 } else {
329 smc->sk.sk_err = smc->clcsock->sk->sk_err;
330 reason_code = -smc->sk.sk_err;
331 }
332 }
333 return reason_code;
334}
335
336/* send CLC ACCEPT message across internal TCP socket */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100337int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
Ursula Brauna046d572017-01-09 16:55:16 +0100338{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100339 struct smc_connection *conn = &new_smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100340 struct smc_clc_msg_accept_confirm aclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100341 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100342 struct msghdr msg;
343 struct kvec vec;
344 int rc = 0;
345 int len;
346
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100347 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100348 memset(&aclc, 0, sizeof(aclc));
349 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
350 aclc.hdr.type = SMC_CLC_ACCEPT;
351 aclc.hdr.length = htons(sizeof(aclc));
352 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100353 if (srv_first_contact)
354 aclc.hdr.flag = 1;
Ursula Brauna046d572017-01-09 16:55:16 +0100355 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100356 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
357 SMC_GID_SIZE);
Ursula Braun143c0172017-01-12 14:57:15 +0100358 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100359 hton24(aclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100360 aclc.rmb_rkey =
Ursula Braun897e1c22017-07-28 13:56:16 +0200361 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100362 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100363 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
364 aclc.qp_mtu = link->path_mtu;
Ursula Brauncd6851f2017-01-09 16:55:18 +0100365 aclc.rmbe_size = conn->rmbe_size_short,
Ursula Brauna3fe3d02017-07-28 13:56:15 +0200366 aclc.rmb_dma_addr = cpu_to_be64(
367 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100368 hton24(aclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100369 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
370
371 memset(&msg, 0, sizeof(msg));
372 vec.iov_base = &aclc;
373 vec.iov_len = sizeof(aclc);
374 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
375 if (len < sizeof(aclc)) {
376 if (len >= 0)
377 new_smc->sk.sk_err = EPROTO;
378 else
379 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
380 rc = sock_error(&new_smc->sk);
381 }
382
383 return rc;
384}