blob: e1e684c562b8135e5f5d42e359bc8d6de18c514b [file] [log] [blame]
Ursula Brauna046d572017-01-09 16:55:16 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * CLC (connection layer control) handshake over initial TCP socket to
5 * prepare for RDMA traffic
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12#include <linux/in.h>
13#include <net/sock.h>
14#include <net/tcp.h>
15
16#include "smc.h"
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010017#include "smc_core.h"
Ursula Brauna046d572017-01-09 16:55:16 +010018#include "smc_clc.h"
19#include "smc_ib.h"
20
21/* Wait for data on the tcp-socket, analyze received data
22 * Returns:
23 * 0 if success and it was not a decline that we received.
24 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
25 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
26 */
27int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
28 u8 expected_type)
29{
30 struct sock *clc_sk = smc->clcsock->sk;
31 struct smc_clc_msg_hdr *clcm = buf;
32 struct msghdr msg = {NULL, 0};
33 int reason_code = 0;
34 struct kvec vec;
35 int len, datlen;
36 int krflags;
37
38 /* peek the first few bytes to determine length of data to receive
39 * so we don't consume any subsequent CLC message or payload data
40 * in the TCP byte stream
41 */
42 vec.iov_base = buf;
43 vec.iov_len = buflen;
44 krflags = MSG_PEEK | MSG_WAITALL;
45 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
46 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
47 sizeof(struct smc_clc_msg_hdr), krflags);
48 if (signal_pending(current)) {
49 reason_code = -EINTR;
50 clc_sk->sk_err = EINTR;
51 smc->sk.sk_err = EINTR;
52 goto out;
53 }
54 if (clc_sk->sk_err) {
55 reason_code = -clc_sk->sk_err;
56 smc->sk.sk_err = clc_sk->sk_err;
57 goto out;
58 }
59 if (!len) { /* peer has performed orderly shutdown */
60 smc->sk.sk_err = ECONNRESET;
61 reason_code = -ECONNRESET;
62 goto out;
63 }
64 if (len < 0) {
65 smc->sk.sk_err = -len;
66 reason_code = len;
67 goto out;
68 }
69 datlen = ntohs(clcm->length);
70 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
71 (datlen < sizeof(struct smc_clc_msg_decline)) ||
72 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
73 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
74 ((clcm->type != SMC_CLC_DECLINE) &&
75 (clcm->type != expected_type))) {
76 smc->sk.sk_err = EPROTO;
77 reason_code = -EPROTO;
78 goto out;
79 }
80
81 /* receive the complete CLC message */
82 vec.iov_base = buf;
83 vec.iov_len = buflen;
84 memset(&msg, 0, sizeof(struct msghdr));
85 krflags = MSG_WAITALL;
86 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
87 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
88 if (len < datlen) {
89 smc->sk.sk_err = EPROTO;
90 reason_code = -EPROTO;
91 goto out;
92 }
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010093 if (clcm->type == SMC_CLC_DECLINE) {
Ursula Brauna046d572017-01-09 16:55:16 +010094 reason_code = SMC_CLC_DECL_REPLY;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010095 if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
96 == SMC_CLC_DECL_SYNCERR)
97 smc->conn.lgr->sync_err = true;
98 }
99
Ursula Brauna046d572017-01-09 16:55:16 +0100100out:
101 return reason_code;
102}
103
104/* send CLC DECLINE message across internal TCP socket */
105int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
106 u8 out_of_sync)
107{
108 struct smc_clc_msg_decline dclc;
109 struct msghdr msg;
110 struct kvec vec;
111 int len;
112
113 memset(&dclc, 0, sizeof(dclc));
114 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
115 dclc.hdr.type = SMC_CLC_DECLINE;
116 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
117 dclc.hdr.version = SMC_CLC_V1;
118 dclc.hdr.flag = out_of_sync ? 1 : 0;
119 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
120 dclc.peer_diagnosis = htonl(peer_diag_info);
121 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
122
123 memset(&msg, 0, sizeof(msg));
124 vec.iov_base = &dclc;
125 vec.iov_len = sizeof(struct smc_clc_msg_decline);
126 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
127 sizeof(struct smc_clc_msg_decline));
128 if (len < sizeof(struct smc_clc_msg_decline))
129 smc->sk.sk_err = EPROTO;
130 if (len < 0)
131 smc->sk.sk_err = -len;
132 return len;
133}
134
135/* send CLC PROPOSAL message across internal TCP socket */
136int smc_clc_send_proposal(struct smc_sock *smc,
137 struct smc_ib_device *smcibdev,
138 u8 ibport)
139{
140 struct smc_clc_msg_proposal pclc;
141 int reason_code = 0;
142 struct msghdr msg;
143 struct kvec vec;
144 int len, rc;
145
146 /* send SMC Proposal CLC message */
147 memset(&pclc, 0, sizeof(pclc));
148 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
149 pclc.hdr.type = SMC_CLC_PROPOSAL;
150 pclc.hdr.length = htons(sizeof(pclc));
151 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
152 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
153 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
154 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1],
155 sizeof(smcibdev->mac[ibport - 1]));
156
157 /* determine subnet and mask from internal TCP socket */
158 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
159 &pclc.prefix_len);
160 if (rc)
161 return SMC_CLC_DECL_CNFERR; /* configuration error */
162 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
163 memset(&msg, 0, sizeof(msg));
164 vec.iov_base = &pclc;
165 vec.iov_len = sizeof(pclc);
166 /* due to the few bytes needed for clc-handshake this cannot block */
167 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
168 if (len < sizeof(pclc)) {
169 if (len >= 0) {
170 reason_code = -ENETUNREACH;
171 smc->sk.sk_err = -reason_code;
172 } else {
173 smc->sk.sk_err = smc->clcsock->sk->sk_err;
174 reason_code = -smc->sk.sk_err;
175 }
176 }
177
178 return reason_code;
179}
180
181/* send CLC CONFIRM message across internal TCP socket */
182int smc_clc_send_confirm(struct smc_sock *smc)
183{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100184 struct smc_connection *conn = &smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100185 struct smc_clc_msg_accept_confirm cclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100186 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100187 int reason_code = 0;
188 struct msghdr msg;
189 struct kvec vec;
190 int len;
191
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100192 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100193 /* send SMC Confirm CLC msg */
194 memset(&cclc, 0, sizeof(cclc));
195 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
196 cclc.hdr.type = SMC_CLC_CONFIRM;
197 cclc.hdr.length = htons(sizeof(cclc));
198 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
199 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100200 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
201 SMC_GID_SIZE);
202 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
203 sizeof(link->smcibdev->mac));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100204 hton24(cclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100205 cclc.rmb_rkey =
206 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100207 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100208 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
209 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100210 cclc.rmbe_size = conn->rmbe_size_short;
211 cclc.rmb_dma_addr =
212 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100213 hton24(cclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100214
215 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
216
217 memset(&msg, 0, sizeof(msg));
218 vec.iov_base = &cclc;
219 vec.iov_len = sizeof(cclc);
220 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
221 if (len < sizeof(cclc)) {
222 if (len >= 0) {
223 reason_code = -ENETUNREACH;
224 smc->sk.sk_err = -reason_code;
225 } else {
226 smc->sk.sk_err = smc->clcsock->sk->sk_err;
227 reason_code = -smc->sk.sk_err;
228 }
229 }
230 return reason_code;
231}
232
233/* send CLC ACCEPT message across internal TCP socket */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100234int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
Ursula Brauna046d572017-01-09 16:55:16 +0100235{
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100236 struct smc_connection *conn = &new_smc->conn;
Ursula Brauna046d572017-01-09 16:55:16 +0100237 struct smc_clc_msg_accept_confirm aclc;
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100238 struct smc_link *link;
Ursula Brauna046d572017-01-09 16:55:16 +0100239 struct msghdr msg;
240 struct kvec vec;
241 int rc = 0;
242 int len;
243
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100244 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
Ursula Brauna046d572017-01-09 16:55:16 +0100245 memset(&aclc, 0, sizeof(aclc));
246 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
247 aclc.hdr.type = SMC_CLC_ACCEPT;
248 aclc.hdr.length = htons(sizeof(aclc));
249 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100250 if (srv_first_contact)
251 aclc.hdr.flag = 1;
Ursula Brauna046d572017-01-09 16:55:16 +0100252 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100253 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
254 SMC_GID_SIZE);
255 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
256 sizeof(link->smcibdev->mac[link->ibport - 1]));
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100257 hton24(aclc.qpn, link->roce_qp->qp_num);
Ursula Braunbd4ad572017-01-09 16:55:20 +0100258 aclc.rmb_rkey =
259 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
Ursula Brauna046d572017-01-09 16:55:16 +0100260 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100261 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
262 aclc.qp_mtu = link->path_mtu;
Ursula Brauncd6851f2017-01-09 16:55:18 +0100263 aclc.rmbe_size = conn->rmbe_size_short,
264 aclc.rmb_dma_addr =
265 cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100266 hton24(aclc.psn, link->psn_initial);
Ursula Brauna046d572017-01-09 16:55:16 +0100267 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
268
269 memset(&msg, 0, sizeof(msg));
270 vec.iov_base = &aclc;
271 vec.iov_len = sizeof(aclc);
272 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
273 if (len < sizeof(aclc)) {
274 if (len >= 0)
275 new_smc->sk.sk_err = EPROTO;
276 else
277 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
278 rc = sock_error(&new_smc->sk);
279 }
280
281 return rc;
282}