blob: 48530dab5c9411e40ad983da406bde1179afd6ea [file] [log] [blame]
Ursula Braunac713872017-01-09 16:55:13 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
Ursula Brauna046d572017-01-09 16:55:16 +01009 * Initial restrictions:
Ursula Brauna046d572017-01-09 16:55:16 +010010 * - support for alternate links postponed
11 * - partial support for non-blocking sockets only
12 * - support for urgent data postponed
13 *
Karsten Graulaaa4d332018-03-16 15:06:41 +010014 * Copyright IBM Corp. 2016, 2018
Ursula Braunac713872017-01-09 16:55:13 +010015 *
16 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
17 * based on prototype from Frank Blaschka
18 */
19
20#define KMSG_COMPONENT "smc"
21#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
22
23#include <linux/module.h>
24#include <linux/socket.h>
Ursula Brauna046d572017-01-09 16:55:16 +010025#include <linux/workqueue.h>
Ursula Braun5f083182017-01-09 16:55:22 +010026#include <linux/in.h>
Ingo Molnarc3edc402017-02-02 08:35:14 +010027#include <linux/sched/signal.h>
28
Ursula Braunac713872017-01-09 16:55:13 +010029#include <net/sock.h>
Ursula Brauna046d572017-01-09 16:55:16 +010030#include <net/tcp.h>
Ursula Braunf16a7dd2017-01-09 16:55:26 +010031#include <net/smc.h>
Ursula Braun9b67e262018-05-02 16:56:46 +020032#include <asm/ioctls.h>
Ursula Braunac713872017-01-09 16:55:13 +010033
34#include "smc.h"
Ursula Brauna046d572017-01-09 16:55:16 +010035#include "smc_clc.h"
Ursula Braun9bf9abe2017-01-09 16:55:21 +010036#include "smc_llc.h"
Ursula Braun5f083182017-01-09 16:55:22 +010037#include "smc_cdc.h"
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010038#include "smc_core.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010039#include "smc_ib.h"
Thomas Richter6812baa2017-01-09 16:55:15 +010040#include "smc_pnet.h"
Ursula Braune6727f32017-01-09 16:55:23 +010041#include "smc_tx.h"
Ursula Braun952310c2017-01-09 16:55:24 +010042#include "smc_rx.h"
Ursula Braunb38d7322017-01-09 16:55:25 +010043#include "smc_close.h"
Ursula Braunac713872017-01-09 16:55:13 +010044
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010045static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
46 * creation
47 */
48
Ursula Brauna046d572017-01-09 16:55:16 +010049static void smc_tcp_listen_work(struct work_struct *);
50
Ursula Braunac713872017-01-09 16:55:13 +010051static void smc_set_keepalive(struct sock *sk, int val)
52{
53 struct smc_sock *smc = smc_sk(sk);
54
55 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
56}
57
Ursula Braunf16a7dd2017-01-09 16:55:26 +010058static struct smc_hashinfo smc_v4_hashinfo = {
59 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
60};
61
Karsten Graulaaa4d332018-03-16 15:06:41 +010062static struct smc_hashinfo smc_v6_hashinfo = {
63 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
64};
65
Ursula Braunf16a7dd2017-01-09 16:55:26 +010066int smc_hash_sk(struct sock *sk)
67{
68 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
69 struct hlist_head *head;
70
71 head = &h->ht;
72
73 write_lock_bh(&h->lock);
74 sk_add_node(sk, head);
75 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
76 write_unlock_bh(&h->lock);
77
78 return 0;
79}
80EXPORT_SYMBOL_GPL(smc_hash_sk);
81
82void smc_unhash_sk(struct sock *sk)
83{
84 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
85
86 write_lock_bh(&h->lock);
87 if (sk_del_node_init(sk))
88 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
89 write_unlock_bh(&h->lock);
90}
91EXPORT_SYMBOL_GPL(smc_unhash_sk);
92
93struct proto smc_proto = {
Ursula Braunac713872017-01-09 16:55:13 +010094 .name = "SMC",
95 .owner = THIS_MODULE,
96 .keepalive = smc_set_keepalive,
Ursula Braunf16a7dd2017-01-09 16:55:26 +010097 .hash = smc_hash_sk,
98 .unhash = smc_unhash_sk,
Ursula Braunac713872017-01-09 16:55:13 +010099 .obj_size = sizeof(struct smc_sock),
Ursula Braunf16a7dd2017-01-09 16:55:26 +0100100 .h.smc_hash = &smc_v4_hashinfo,
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -0800101 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Ursula Braunac713872017-01-09 16:55:13 +0100102};
Ursula Braunf16a7dd2017-01-09 16:55:26 +0100103EXPORT_SYMBOL_GPL(smc_proto);
Ursula Braunac713872017-01-09 16:55:13 +0100104
Karsten Graulaaa4d332018-03-16 15:06:41 +0100105struct proto smc_proto6 = {
106 .name = "SMC6",
107 .owner = THIS_MODULE,
108 .keepalive = smc_set_keepalive,
109 .hash = smc_hash_sk,
110 .unhash = smc_unhash_sk,
111 .obj_size = sizeof(struct smc_sock),
112 .h.smc_hash = &smc_v6_hashinfo,
113 .slab_flags = SLAB_TYPESAFE_BY_RCU,
114};
115EXPORT_SYMBOL_GPL(smc_proto6);
116
Ursula Braunac713872017-01-09 16:55:13 +0100117static int smc_release(struct socket *sock)
118{
119 struct sock *sk = sock->sk;
120 struct smc_sock *smc;
Ursula Braunb38d7322017-01-09 16:55:25 +0100121 int rc = 0;
Ursula Braunac713872017-01-09 16:55:13 +0100122
123 if (!sk)
124 goto out;
125
126 smc = smc_sk(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +0100127 if (sk->sk_state == SMC_LISTEN)
128 /* smc_close_non_accepted() is called and acquires
129 * sock lock for child sockets again
130 */
131 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
132 else
133 lock_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +0100134
Ursula Braun51f1de72018-01-26 09:28:48 +0100135 if (!smc->use_fallback) {
Ursula Braunb38d7322017-01-09 16:55:25 +0100136 rc = smc_close_active(smc);
137 sock_set_flag(sk, SOCK_DEAD);
138 sk->sk_shutdown |= SHUTDOWN_MASK;
139 }
Ursula Braunac713872017-01-09 16:55:13 +0100140 if (smc->clcsock) {
141 sock_release(smc->clcsock);
142 smc->clcsock = NULL;
143 }
Ursula Braun51f1de72018-01-26 09:28:48 +0100144 if (smc->use_fallback) {
145 sock_put(sk); /* passive closing */
146 sk->sk_state = SMC_CLOSED;
147 sk->sk_state_change(sk);
148 }
Ursula Braunac713872017-01-09 16:55:13 +0100149
150 /* detach socket */
151 sock_orphan(sk);
152 sock->sk = NULL;
Ursula Braun51f1de72018-01-26 09:28:48 +0100153 if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
Ursula Braunb38d7322017-01-09 16:55:25 +0100154 smc_conn_free(&smc->conn);
Ursula Braunac713872017-01-09 16:55:13 +0100155 release_sock(sk);
156
Ursula Braun51f1de72018-01-26 09:28:48 +0100157 sk->sk_prot->unhash(sk);
158 sock_put(sk); /* final sock_put */
Ursula Braunac713872017-01-09 16:55:13 +0100159out:
Ursula Braunb38d7322017-01-09 16:55:25 +0100160 return rc;
Ursula Braunac713872017-01-09 16:55:13 +0100161}
162
163static void smc_destruct(struct sock *sk)
164{
165 if (sk->sk_state != SMC_CLOSED)
166 return;
167 if (!sock_flag(sk, SOCK_DEAD))
168 return;
169
170 sk_refcnt_debug_dec(sk);
171}
172
Karsten Graulaaa4d332018-03-16 15:06:41 +0100173static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
174 int protocol)
Ursula Braunac713872017-01-09 16:55:13 +0100175{
176 struct smc_sock *smc;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100177 struct proto *prot;
Ursula Braunac713872017-01-09 16:55:13 +0100178 struct sock *sk;
179
Karsten Graulaaa4d332018-03-16 15:06:41 +0100180 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
181 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
Ursula Braunac713872017-01-09 16:55:13 +0100182 if (!sk)
183 return NULL;
184
185 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
186 sk->sk_state = SMC_INIT;
187 sk->sk_destruct = smc_destruct;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100188 sk->sk_protocol = protocol;
Ursula Braunac713872017-01-09 16:55:13 +0100189 smc = smc_sk(sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100190 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
Eric Dumazetbe7f3e52018-05-17 03:54:21 -0700191 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
Ursula Brauna046d572017-01-09 16:55:16 +0100192 INIT_LIST_HEAD(&smc->accept_q);
193 spin_lock_init(&smc->accept_q_lock);
Eric Dumazetbe7f3e52018-05-17 03:54:21 -0700194 spin_lock_init(&smc->conn.send_lock);
Ursula Braunf16a7dd2017-01-09 16:55:26 +0100195 sk->sk_prot->hash(sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100196 sk_refcnt_debug_inc(sk);
Ursula Braunac713872017-01-09 16:55:13 +0100197
198 return sk;
199}
200
201static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
202 int addr_len)
203{
204 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
205 struct sock *sk = sock->sk;
206 struct smc_sock *smc;
207 int rc;
208
209 smc = smc_sk(sk);
210
211 /* replicate tests from inet_bind(), to be safe wrt. future changes */
212 rc = -EINVAL;
213 if (addr_len < sizeof(struct sockaddr_in))
214 goto out;
215
216 rc = -EAFNOSUPPORT;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100217 if (addr->sin_family != AF_INET &&
218 addr->sin_family != AF_INET6 &&
219 addr->sin_family != AF_UNSPEC)
220 goto out;
Ursula Braunac713872017-01-09 16:55:13 +0100221 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
Karsten Graulaaa4d332018-03-16 15:06:41 +0100222 if (addr->sin_family == AF_UNSPEC &&
223 addr->sin_addr.s_addr != htonl(INADDR_ANY))
Ursula Braunac713872017-01-09 16:55:13 +0100224 goto out;
225
226 lock_sock(sk);
227
228 /* Check if socket is already active */
229 rc = -EINVAL;
230 if (sk->sk_state != SMC_INIT)
231 goto out_rel;
232
233 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
234 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
235
236out_rel:
237 release_sock(sk);
238out:
239 return rc;
240}
241
242static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
243 unsigned long mask)
244{
245 /* options we don't get control via setsockopt for */
246 nsk->sk_type = osk->sk_type;
247 nsk->sk_sndbuf = osk->sk_sndbuf;
248 nsk->sk_rcvbuf = osk->sk_rcvbuf;
249 nsk->sk_sndtimeo = osk->sk_sndtimeo;
250 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
251 nsk->sk_mark = osk->sk_mark;
252 nsk->sk_priority = osk->sk_priority;
253 nsk->sk_rcvlowat = osk->sk_rcvlowat;
254 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
255 nsk->sk_err = osk->sk_err;
256
257 nsk->sk_flags &= ~mask;
258 nsk->sk_flags |= osk->sk_flags & mask;
259}
260
261#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
262 (1UL << SOCK_KEEPOPEN) | \
263 (1UL << SOCK_LINGER) | \
264 (1UL << SOCK_BROADCAST) | \
265 (1UL << SOCK_TIMESTAMP) | \
266 (1UL << SOCK_DBG) | \
267 (1UL << SOCK_RCVTSTAMP) | \
268 (1UL << SOCK_RCVTSTAMPNS) | \
269 (1UL << SOCK_LOCALROUTE) | \
270 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
271 (1UL << SOCK_RXQ_OVFL) | \
272 (1UL << SOCK_WIFI_STATUS) | \
273 (1UL << SOCK_NOFCS) | \
274 (1UL << SOCK_FILTER_LOCKED))
275/* copy only relevant settings and flags of SOL_SOCKET level from smc to
276 * clc socket (since smc is not called for these options from net/core)
277 */
278static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
279{
280 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
281}
282
283#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
284 (1UL << SOCK_KEEPOPEN) | \
285 (1UL << SOCK_LINGER) | \
286 (1UL << SOCK_DBG))
287/* copy only settings and flags relevant for smc from clc to smc socket */
288static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
289{
290 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
291}
292
Karsten Graul44aa81c2018-05-15 17:04:55 +0200293/* register a new rmb, optionally send confirm_rkey msg to register with peer */
294static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
295 bool conf_rkey)
Karsten Graule63a5f82018-05-03 17:57:37 +0200296{
297 /* register memory region for new rmb */
Karsten Graula6920d12018-05-03 17:57:38 +0200298 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
299 rmb_desc->regerr = 1;
Karsten Graule63a5f82018-05-03 17:57:37 +0200300 return -EFAULT;
Karsten Graula6920d12018-05-03 17:57:38 +0200301 }
Karsten Graul44aa81c2018-05-15 17:04:55 +0200302 if (!conf_rkey)
303 return 0;
304 /* exchange confirm_rkey msg with peer */
305 if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
306 rmb_desc->regerr = 1;
307 return -EFAULT;
308 }
Karsten Graule63a5f82018-05-03 17:57:37 +0200309 return 0;
310}
311
Stefan Raspl0f627122018-03-01 13:51:26 +0100312static int smc_clnt_conf_first_link(struct smc_sock *smc)
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100313{
Karsten Graul877ae5b2018-05-02 16:56:44 +0200314 struct net *net = sock_net(smc->clcsock->sk);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100315 struct smc_link_group *lgr = smc->conn.lgr;
316 struct smc_link *link;
317 int rest;
318 int rc;
319
320 link = &lgr->lnk[SMC_SINGLE_LINK];
321 /* receive CONFIRM LINK request from server over RoCE fabric */
322 rest = wait_for_completion_interruptible_timeout(
323 &link->llc_confirm,
324 SMC_LLC_WAIT_FIRST_TIME);
325 if (rest <= 0) {
326 struct smc_clc_msg_decline dclc;
327
328 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
329 SMC_CLC_DECLINE);
330 return rc;
331 }
332
Karsten Graul75d320d2018-03-01 13:51:31 +0100333 if (link->llc_confirm_rc)
334 return SMC_CLC_DECL_RMBE_EC;
335
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100336 rc = smc_ib_modify_qp_rts(link);
337 if (rc)
338 return SMC_CLC_DECL_INTERR;
339
340 smc_wr_remember_qp_attr(link);
Ursula Braun652a1e42017-07-28 13:56:17 +0200341
Karsten Graul44aa81c2018-05-15 17:04:55 +0200342 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
Ursula Braun652a1e42017-07-28 13:56:17 +0200343 return SMC_CLC_DECL_INTERR;
344
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100345 /* send CONFIRM LINK response over RoCE fabric */
346 rc = smc_llc_send_confirm_link(link,
347 link->smcibdev->mac[link->ibport - 1],
Stefan Raspl0f627122018-03-01 13:51:26 +0100348 &link->smcibdev->gid[link->ibport - 1],
349 SMC_LLC_RESP);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100350 if (rc < 0)
351 return SMC_CLC_DECL_TCL;
352
Karsten Graul52bedf32018-03-01 13:51:32 +0100353 /* receive ADD LINK request from server over RoCE fabric */
354 rest = wait_for_completion_interruptible_timeout(&link->llc_add,
355 SMC_LLC_WAIT_TIME);
356 if (rest <= 0) {
357 struct smc_clc_msg_decline dclc;
358
359 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
360 SMC_CLC_DECLINE);
361 return rc;
362 }
363
364 /* send add link reject message, only one link supported for now */
365 rc = smc_llc_send_add_link(link,
366 link->smcibdev->mac[link->ibport - 1],
367 &link->smcibdev->gid[link->ibport - 1],
368 SMC_LLC_RESP);
369 if (rc < 0)
370 return SMC_CLC_DECL_TCL;
371
Karsten Graul877ae5b2018-05-02 16:56:44 +0200372 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
Karsten Graul52bedf32018-03-01 13:51:32 +0100373
Karsten Graul75d320d2018-03-01 13:51:31 +0100374 return 0;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100375}
376
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100377static void smc_conn_save_peer_info(struct smc_sock *smc,
378 struct smc_clc_msg_accept_confirm *clc)
379{
Hans Wippel95d8d2632018-05-18 09:34:13 +0200380 int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
381
Hans Wippel92a138e2018-05-18 09:34:12 +0200382 smc->conn.peer_rmbe_idx = clc->rmbe_idx;
Ursula Braun5f083182017-01-09 16:55:22 +0100383 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
Hans Wippel95d8d2632018-05-18 09:34:13 +0200384 smc->conn.peer_rmbe_size = bufsize;
Ursula Brauncd6851f2017-01-09 16:55:18 +0100385 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
Hans Wippel95d8d2632018-05-18 09:34:13 +0200386 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100387}
388
389static void smc_link_save_peer_info(struct smc_link *link,
390 struct smc_clc_msg_accept_confirm *clc)
391{
392 link->peer_qpn = ntoh24(clc->qpn);
393 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
394 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
395 link->peer_psn = ntoh24(clc->psn);
396 link->peer_mtu = clc->qp_mtu;
397}
398
Hans Wippel3b2dec22018-05-18 09:34:18 +0200399/* fall back during connect */
400static int smc_connect_fallback(struct smc_sock *smc)
401{
402 smc->use_fallback = true;
403 smc_copy_sock_settings_to_clc(smc);
404 if (smc->sk.sk_state == SMC_INIT)
405 smc->sk.sk_state = SMC_ACTIVE;
406 return 0;
407}
408
409/* decline and fall back during connect */
410static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
411{
412 int rc;
413
414 if (reason_code < 0) /* error, fallback is not possible */
415 return reason_code;
416 if (reason_code != SMC_CLC_DECL_REPLY) {
417 rc = smc_clc_send_decline(smc, reason_code);
418 if (rc < 0)
419 return rc;
420 }
421 return smc_connect_fallback(smc);
422}
423
424/* abort connecting */
425static int smc_connect_abort(struct smc_sock *smc, int reason_code,
426 int local_contact)
427{
428 if (local_contact == SMC_FIRST_CONTACT)
429 smc_lgr_forget(smc->conn.lgr);
430 mutex_unlock(&smc_create_lgr_pending);
431 smc_conn_free(&smc->conn);
432 if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
433 sock_put(&smc->sk); /* passive closing */
434 return reason_code;
435}
436
437/* check if there is a rdma device available for this connection. */
438/* called for connect and listen */
439static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
440 u8 *ibport)
441{
442 int reason_code = 0;
443
444 /* PNET table look up: search active ib_device and port
445 * within same PNETID that also contains the ethernet device
446 * used for the internal TCP socket
447 */
448 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
449 if (!(*ibdev))
450 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
451
452 return reason_code;
453}
454
455/* CLC handshake during connect */
456static int smc_connect_clc(struct smc_sock *smc,
457 struct smc_clc_msg_accept_confirm *aclc,
458 struct smc_ib_device *ibdev, u8 ibport)
459{
460 int rc = 0;
461
462 /* do inband token exchange */
463 rc = smc_clc_send_proposal(smc, ibdev, ibport);
464 if (rc)
465 return rc;
466 /* receive SMC Accept CLC message */
467 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
468}
469
Ursula Brauna046d572017-01-09 16:55:16 +0100470/* setup for RDMA connection of client */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200471static int smc_connect_rdma(struct smc_sock *smc,
472 struct smc_clc_msg_accept_confirm *aclc,
473 struct smc_ib_device *ibdev, u8 ibport)
474{
475 int local_contact = SMC_FIRST_CONTACT;
476 struct smc_link *link;
477 int reason_code = 0;
478
479 mutex_lock(&smc_create_lgr_pending);
480 local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
481 aclc->hdr.flag);
482 if (local_contact < 0) {
483 if (local_contact == -ENOMEM)
484 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
485 else if (local_contact == -ENOLINK)
486 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
487 else
488 reason_code = SMC_CLC_DECL_INTERR; /* other error */
489 return smc_connect_abort(smc, reason_code, 0);
490 }
491 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
492
493 smc_conn_save_peer_info(smc, aclc);
494
495 /* create send buffer and rmb */
496 if (smc_buf_create(smc))
497 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
498
499 if (local_contact == SMC_FIRST_CONTACT)
500 smc_link_save_peer_info(link, aclc);
501
502 if (smc_rmb_rtoken_handling(&smc->conn, aclc))
503 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
504 local_contact);
505
506 smc_close_init(smc);
507 smc_rx_init(smc);
508
509 if (local_contact == SMC_FIRST_CONTACT) {
510 if (smc_ib_ready_link(link))
511 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
512 local_contact);
513 } else {
514 if (!smc->conn.rmb_desc->reused &&
515 smc_reg_rmb(link, smc->conn.rmb_desc, true))
516 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
517 local_contact);
518 }
519 smc_rmb_sync_sg_for_device(&smc->conn);
520
521 reason_code = smc_clc_send_confirm(smc);
522 if (reason_code)
523 return smc_connect_abort(smc, reason_code, local_contact);
524
525 smc_tx_init(smc);
526
527 if (local_contact == SMC_FIRST_CONTACT) {
528 /* QP confirmation over RoCE fabric */
529 reason_code = smc_clnt_conf_first_link(smc);
530 if (reason_code)
531 return smc_connect_abort(smc, reason_code,
532 local_contact);
533 }
534 mutex_unlock(&smc_create_lgr_pending);
535
536 smc_copy_sock_settings_to_clc(smc);
537 if (smc->sk.sk_state == SMC_INIT)
538 smc->sk.sk_state = SMC_ACTIVE;
539
540 return 0;
541}
542
543/* perform steps before actually connecting */
544static int __smc_connect(struct smc_sock *smc)
Ursula Brauna046d572017-01-09 16:55:16 +0100545{
546 struct smc_clc_msg_accept_confirm aclc;
Hans Wippel3b2dec22018-05-18 09:34:18 +0200547 struct smc_ib_device *ibdev;
Ursula Brauna046d572017-01-09 16:55:16 +0100548 int rc = 0;
549 u8 ibport;
550
Ursula Braun51f1de72018-01-26 09:28:48 +0100551 sock_hold(&smc->sk); /* sock put in passive closing */
552
Ursula Braunee9dfbe2018-04-26 17:18:21 +0200553 if (smc->use_fallback)
Hans Wippel3b2dec22018-05-18 09:34:18 +0200554 return smc_connect_fallback(smc);
Ursula Braunee9dfbe2018-04-26 17:18:21 +0200555
Hans Wippel3b2dec22018-05-18 09:34:18 +0200556 /* if peer has not signalled SMC-capability, fall back */
557 if (!tcp_sk(smc->clcsock->sk)->syn_smc)
558 return smc_connect_fallback(smc);
Ursula Braunc5c1cc92017-10-25 11:01:46 +0200559
Ursula Brauna046d572017-01-09 16:55:16 +0100560 /* IPSec connections opt out of SMC-R optimizations */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200561 if (using_ipsec(smc))
562 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
Ursula Brauna046d572017-01-09 16:55:16 +0100563
Hans Wippel3b2dec22018-05-18 09:34:18 +0200564 /* check if a RDMA device is available; if not, fall back */
565 if (smc_check_rdma(smc, &ibdev, &ibport))
566 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
Ursula Brauna046d572017-01-09 16:55:16 +0100567
Hans Wippel3b2dec22018-05-18 09:34:18 +0200568 /* perform CLC handshake */
569 rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
Ursula Brauna046d572017-01-09 16:55:16 +0100570 if (rc)
Hans Wippel3b2dec22018-05-18 09:34:18 +0200571 return smc_connect_decline_fallback(smc, rc);
Ursula Brauna046d572017-01-09 16:55:16 +0100572
Hans Wippel3b2dec22018-05-18 09:34:18 +0200573 /* connect using rdma */
574 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
575 if (rc)
576 return smc_connect_decline_fallback(smc, rc);
Ursula Brauna046d572017-01-09 16:55:16 +0100577
Hans Wippel3b2dec22018-05-18 09:34:18 +0200578 return 0;
Ursula Brauna046d572017-01-09 16:55:16 +0100579}
580
Ursula Braunac713872017-01-09 16:55:13 +0100581static int smc_connect(struct socket *sock, struct sockaddr *addr,
582 int alen, int flags)
583{
584 struct sock *sk = sock->sk;
585 struct smc_sock *smc;
586 int rc = -EINVAL;
587
588 smc = smc_sk(sk);
589
590 /* separate smc parameter checking to be safe */
591 if (alen < sizeof(addr->sa_family))
592 goto out_err;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100593 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
Ursula Braunac713872017-01-09 16:55:13 +0100594 goto out_err;
595
596 lock_sock(sk);
597 switch (sk->sk_state) {
598 default:
599 goto out;
600 case SMC_ACTIVE:
601 rc = -EISCONN;
602 goto out;
603 case SMC_INIT:
604 rc = 0;
605 break;
606 }
607
608 smc_copy_sock_settings_to_clc(smc);
Ursula Braunc5c1cc92017-10-25 11:01:46 +0200609 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
Ursula Braunac713872017-01-09 16:55:13 +0100610 rc = kernel_connect(smc->clcsock, addr, alen, flags);
611 if (rc)
612 goto out;
613
Hans Wippel3b2dec22018-05-18 09:34:18 +0200614 rc = __smc_connect(smc);
Ursula Brauna046d572017-01-09 16:55:16 +0100615 if (rc < 0)
616 goto out;
617 else
618 rc = 0; /* success cases including fallback */
Ursula Braunac713872017-01-09 16:55:13 +0100619
620out:
621 release_sock(sk);
622out_err:
623 return rc;
624}
625
626static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
627{
Ursula Braun3163c502018-01-24 10:28:12 +0100628 struct socket *new_clcsock = NULL;
629 struct sock *lsk = &lsmc->sk;
Ursula Braunac713872017-01-09 16:55:13 +0100630 struct sock *new_sk;
631 int rc;
632
Ursula Braun3163c502018-01-24 10:28:12 +0100633 release_sock(lsk);
Karsten Graulaaa4d332018-03-16 15:06:41 +0100634 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
Ursula Braunac713872017-01-09 16:55:13 +0100635 if (!new_sk) {
636 rc = -ENOMEM;
Ursula Braun3163c502018-01-24 10:28:12 +0100637 lsk->sk_err = ENOMEM;
Ursula Braunac713872017-01-09 16:55:13 +0100638 *new_smc = NULL;
Ursula Braun3163c502018-01-24 10:28:12 +0100639 lock_sock(lsk);
Ursula Braunac713872017-01-09 16:55:13 +0100640 goto out;
641 }
642 *new_smc = smc_sk(new_sk);
643
644 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
Ursula Braun3163c502018-01-24 10:28:12 +0100645 lock_sock(lsk);
Ursula Braun35a6b172018-01-24 10:28:13 +0100646 if (rc < 0)
Ursula Braun3163c502018-01-24 10:28:12 +0100647 lsk->sk_err = -rc;
Ursula Braun35a6b172018-01-24 10:28:13 +0100648 if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
Ursula Brauna046d572017-01-09 16:55:16 +0100649 if (new_clcsock)
650 sock_release(new_clcsock);
651 new_sk->sk_state = SMC_CLOSED;
652 sock_set_flag(new_sk, SOCK_DEAD);
Ursula Braun3163c502018-01-24 10:28:12 +0100653 new_sk->sk_prot->unhash(new_sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100654 sock_put(new_sk); /* final */
Ursula Braunac713872017-01-09 16:55:13 +0100655 *new_smc = NULL;
656 goto out;
657 }
658
659 (*new_smc)->clcsock = new_clcsock;
660out:
661 return rc;
662}
663
Ursula Brauna046d572017-01-09 16:55:16 +0100664/* add a just created sock to the accept queue of the listen sock as
665 * candidate for a following socket accept call from user space
666 */
667static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
668{
669 struct smc_sock *par = smc_sk(parent);
670
Ursula Braun51f1de72018-01-26 09:28:48 +0100671 sock_hold(sk); /* sock_put in smc_accept_unlink () */
Ursula Brauna046d572017-01-09 16:55:16 +0100672 spin_lock(&par->accept_q_lock);
673 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
674 spin_unlock(&par->accept_q_lock);
675 sk_acceptq_added(parent);
676}
677
678/* remove a socket from the accept queue of its parental listening socket */
679static void smc_accept_unlink(struct sock *sk)
680{
681 struct smc_sock *par = smc_sk(sk)->listen_smc;
682
683 spin_lock(&par->accept_q_lock);
684 list_del_init(&smc_sk(sk)->accept_q);
685 spin_unlock(&par->accept_q_lock);
686 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100687 sock_put(sk); /* sock_hold in smc_accept_enqueue */
Ursula Brauna046d572017-01-09 16:55:16 +0100688}
689
690/* remove a sock from the accept queue to bind it to a new socket created
691 * for a socket accept call from user space
692 */
Ursula Braunb38d7322017-01-09 16:55:25 +0100693struct sock *smc_accept_dequeue(struct sock *parent,
694 struct socket *new_sock)
Ursula Brauna046d572017-01-09 16:55:16 +0100695{
696 struct smc_sock *isk, *n;
697 struct sock *new_sk;
698
699 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
700 new_sk = (struct sock *)isk;
701
702 smc_accept_unlink(new_sk);
703 if (new_sk->sk_state == SMC_CLOSED) {
Ursula Braun127f4972018-01-26 09:28:49 +0100704 if (isk->clcsock) {
705 sock_release(isk->clcsock);
706 isk->clcsock = NULL;
707 }
Ursula Braun288c83902017-04-10 14:58:04 +0200708 new_sk->sk_prot->unhash(new_sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100709 sock_put(new_sk); /* final */
Ursula Brauna046d572017-01-09 16:55:16 +0100710 continue;
711 }
712 if (new_sock)
713 sock_graft(new_sk, new_sock);
714 return new_sk;
715 }
716 return NULL;
717}
718
719/* clean up for a created but never accepted sock */
Ursula Braunb38d7322017-01-09 16:55:25 +0100720void smc_close_non_accepted(struct sock *sk)
Ursula Brauna046d572017-01-09 16:55:16 +0100721{
722 struct smc_sock *smc = smc_sk(sk);
723
Ursula Braunb38d7322017-01-09 16:55:25 +0100724 lock_sock(sk);
725 if (!sk->sk_lingertime)
726 /* wait for peer closing */
727 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
Ursula Braun51f1de72018-01-26 09:28:48 +0100728 if (!smc->use_fallback) {
Ursula Braunb38d7322017-01-09 16:55:25 +0100729 smc_close_active(smc);
Ursula Braun288c83902017-04-10 14:58:04 +0200730 sock_set_flag(sk, SOCK_DEAD);
731 sk->sk_shutdown |= SHUTDOWN_MASK;
732 }
Ursula Brauna046d572017-01-09 16:55:16 +0100733 if (smc->clcsock) {
734 struct socket *tcp;
735
736 tcp = smc->clcsock;
737 smc->clcsock = NULL;
738 sock_release(tcp);
739 }
Ursula Braunb38d7322017-01-09 16:55:25 +0100740 if (smc->use_fallback) {
Ursula Braun51f1de72018-01-26 09:28:48 +0100741 sock_put(sk); /* passive closing */
742 sk->sk_state = SMC_CLOSED;
743 } else {
744 if (sk->sk_state == SMC_CLOSED)
745 smc_conn_free(&smc->conn);
Ursula Braunb38d7322017-01-09 16:55:25 +0100746 }
747 release_sock(sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100748 sk->sk_prot->unhash(sk);
749 sock_put(sk); /* final sock_put */
Ursula Brauna046d572017-01-09 16:55:16 +0100750}
751
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100752static int smc_serv_conf_first_link(struct smc_sock *smc)
753{
Karsten Graul877ae5b2018-05-02 16:56:44 +0200754 struct net *net = sock_net(smc->clcsock->sk);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100755 struct smc_link_group *lgr = smc->conn.lgr;
756 struct smc_link *link;
757 int rest;
758 int rc;
759
760 link = &lgr->lnk[SMC_SINGLE_LINK];
Ursula Braun652a1e42017-07-28 13:56:17 +0200761
Karsten Graul44aa81c2018-05-15 17:04:55 +0200762 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
Ursula Braun652a1e42017-07-28 13:56:17 +0200763 return SMC_CLC_DECL_INTERR;
764
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100765 /* send CONFIRM LINK request to client over the RoCE fabric */
766 rc = smc_llc_send_confirm_link(link,
767 link->smcibdev->mac[link->ibport - 1],
768 &link->smcibdev->gid[link->ibport - 1],
769 SMC_LLC_REQ);
770 if (rc < 0)
771 return SMC_CLC_DECL_TCL;
772
773 /* receive CONFIRM LINK response from client over the RoCE fabric */
774 rest = wait_for_completion_interruptible_timeout(
775 &link->llc_confirm_resp,
776 SMC_LLC_WAIT_FIRST_TIME);
777 if (rest <= 0) {
778 struct smc_clc_msg_decline dclc;
779
780 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
781 SMC_CLC_DECLINE);
Karsten Graul75d320d2018-03-01 13:51:31 +0100782 return rc;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100783 }
784
Karsten Graul75d320d2018-03-01 13:51:31 +0100785 if (link->llc_confirm_resp_rc)
786 return SMC_CLC_DECL_RMBE_EC;
787
Karsten Graul52bedf32018-03-01 13:51:32 +0100788 /* send ADD LINK request to client over the RoCE fabric */
789 rc = smc_llc_send_add_link(link,
790 link->smcibdev->mac[link->ibport - 1],
791 &link->smcibdev->gid[link->ibport - 1],
792 SMC_LLC_REQ);
793 if (rc < 0)
794 return SMC_CLC_DECL_TCL;
795
796 /* receive ADD LINK response from client over the RoCE fabric */
797 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
798 SMC_LLC_WAIT_TIME);
799 if (rest <= 0) {
800 struct smc_clc_msg_decline dclc;
801
802 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
803 SMC_CLC_DECLINE);
804 return rc;
805 }
806
Karsten Graul877ae5b2018-05-02 16:56:44 +0200807 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
Karsten Graul52bedf32018-03-01 13:51:32 +0100808
Karsten Graul75d320d2018-03-01 13:51:31 +0100809 return 0;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100810}
811
Hans Wippel3b2dec22018-05-18 09:34:18 +0200812/* listen worker: finish */
813static void smc_listen_out(struct smc_sock *new_smc)
Ursula Brauna046d572017-01-09 16:55:16 +0100814{
Ursula Brauna046d572017-01-09 16:55:16 +0100815 struct smc_sock *lsmc = new_smc->listen_smc;
Ursula Brauna046d572017-01-09 16:55:16 +0100816 struct sock *newsmcsk = &new_smc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +0100817
Ursula Braunb38d7322017-01-09 16:55:25 +0100818 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
Ursula Brauna046d572017-01-09 16:55:16 +0100819 if (lsmc->sk.sk_state == SMC_LISTEN) {
820 smc_accept_enqueue(&lsmc->sk, newsmcsk);
821 } else { /* no longer listening */
822 smc_close_non_accepted(newsmcsk);
823 }
824 release_sock(&lsmc->sk);
825
826 /* Wake up accept */
827 lsmc->sk.sk_data_ready(&lsmc->sk);
828 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200829}
Ursula Brauna046d572017-01-09 16:55:16 +0100830
Hans Wippel3b2dec22018-05-18 09:34:18 +0200831/* listen worker: finish in state connected */
832static void smc_listen_out_connected(struct smc_sock *new_smc)
833{
834 struct sock *newsmcsk = &new_smc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +0100835
Hans Wippel3b2dec22018-05-18 09:34:18 +0200836 sk_refcnt_debug_inc(newsmcsk);
837 if (newsmcsk->sk_state == SMC_INIT)
838 newsmcsk->sk_state = SMC_ACTIVE;
839
840 smc_listen_out(new_smc);
841}
842
843/* listen worker: finish in error state */
844static void smc_listen_out_err(struct smc_sock *new_smc)
845{
846 struct sock *newsmcsk = &new_smc->sk;
847
Ursula Braun51f1de72018-01-26 09:28:48 +0100848 if (newsmcsk->sk_state == SMC_INIT)
849 sock_put(&new_smc->sk); /* passive closing */
Ursula Brauna046d572017-01-09 16:55:16 +0100850 newsmcsk->sk_state = SMC_CLOSED;
Ursula Braunb38d7322017-01-09 16:55:25 +0100851 smc_conn_free(&new_smc->conn);
Hans Wippel3b2dec22018-05-18 09:34:18 +0200852
853 smc_listen_out(new_smc);
854}
855
856/* listen worker: decline and fall back if possible */
857static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
858 int local_contact)
859{
860 /* RDMA setup failed, switch back to TCP */
861 if (local_contact == SMC_FIRST_CONTACT)
862 smc_lgr_forget(new_smc->conn.lgr);
863 if (reason_code < 0) { /* error, no fallback possible */
864 smc_listen_out_err(new_smc);
865 return;
866 }
867 smc_conn_free(&new_smc->conn);
868 new_smc->use_fallback = true;
869 if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
870 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
871 smc_listen_out_err(new_smc);
872 return;
873 }
874 }
875 smc_listen_out_connected(new_smc);
876}
877
878/* listen worker: check prefixes */
879static int smc_listen_rdma_check(struct smc_sock *new_smc,
880 struct smc_clc_msg_proposal *pclc)
881{
882 struct smc_clc_msg_proposal_prefix *pclc_prfx;
883 struct socket *newclcsock = new_smc->clcsock;
884
885 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
886 if (smc_clc_prfx_match(newclcsock, pclc_prfx))
887 return SMC_CLC_DECL_CNFERR;
888
889 return 0;
890}
891
892/* listen worker: initialize connection and buffers */
893static int smc_listen_rdma_init(struct smc_sock *new_smc,
894 struct smc_clc_msg_proposal *pclc,
895 struct smc_ib_device *ibdev, u8 ibport,
896 int *local_contact)
897{
898 /* allocate connection / link group */
899 *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
900 if (*local_contact < 0) {
901 if (*local_contact == -ENOMEM)
902 return SMC_CLC_DECL_MEM;/* insufficient memory*/
903 return SMC_CLC_DECL_INTERR; /* other error */
904 }
905
906 /* create send buffer and rmb */
907 if (smc_buf_create(new_smc))
908 return SMC_CLC_DECL_MEM;
909
910 return 0;
911}
912
913/* listen worker: register buffers */
914static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
915{
916 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
917
918 if (local_contact != SMC_FIRST_CONTACT) {
919 if (!new_smc->conn.rmb_desc->reused) {
920 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
921 return SMC_CLC_DECL_INTERR;
922 }
923 }
924 smc_rmb_sync_sg_for_device(&new_smc->conn);
925
926 return 0;
927}
928
929/* listen worker: finish RDMA setup */
930static void smc_listen_rdma_finish(struct smc_sock *new_smc,
931 struct smc_clc_msg_accept_confirm *cclc,
932 int local_contact)
933{
934 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
935 int reason_code = 0;
936
937 if (local_contact == SMC_FIRST_CONTACT)
938 smc_link_save_peer_info(link, cclc);
939
940 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
941 reason_code = SMC_CLC_DECL_INTERR;
942 goto decline;
943 }
944
945 if (local_contact == SMC_FIRST_CONTACT) {
946 if (smc_ib_ready_link(link)) {
947 reason_code = SMC_CLC_DECL_INTERR;
948 goto decline;
949 }
950 /* QP confirmation over RoCE fabric */
951 reason_code = smc_serv_conf_first_link(new_smc);
952 if (reason_code)
953 goto decline;
954 }
955 return;
956
957decline:
958 mutex_unlock(&smc_create_lgr_pending);
959 smc_listen_decline(new_smc, reason_code, local_contact);
960}
961
962/* setup for RDMA connection of server */
963static void smc_listen_work(struct work_struct *work)
964{
965 struct smc_sock *new_smc = container_of(work, struct smc_sock,
966 smc_listen_work);
967 struct socket *newclcsock = new_smc->clcsock;
968 struct smc_clc_msg_accept_confirm cclc;
969 struct smc_clc_msg_proposal *pclc;
970 struct smc_ib_device *ibdev;
971 u8 buf[SMC_CLC_MAX_LEN];
972 int local_contact = 0;
973 int reason_code = 0;
974 int rc = 0;
975 u8 ibport;
976
977 if (new_smc->use_fallback) {
978 smc_listen_out_connected(new_smc);
979 return;
980 }
981
982 /* check if peer is smc capable */
983 if (!tcp_sk(newclcsock->sk)->syn_smc) {
984 new_smc->use_fallback = true;
985 smc_listen_out_connected(new_smc);
986 return;
987 }
988
989 /* do inband token exchange -
990 * wait for and receive SMC Proposal CLC message
991 */
992 pclc = (struct smc_clc_msg_proposal *)&buf;
993 reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
994 SMC_CLC_PROPOSAL);
995 if (reason_code) {
996 smc_listen_decline(new_smc, reason_code, 0);
997 return;
998 }
999
1000 /* IPSec connections opt out of SMC-R optimizations */
1001 if (using_ipsec(new_smc)) {
1002 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1003 return;
1004 }
1005
1006 mutex_lock(&smc_create_lgr_pending);
1007 smc_close_init(new_smc);
1008 smc_rx_init(new_smc);
1009 smc_tx_init(new_smc);
1010
1011 /* check if RDMA is available */
1012 if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
1013 smc_listen_rdma_check(new_smc, pclc) ||
1014 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1015 &local_contact) ||
1016 smc_listen_rdma_reg(new_smc, local_contact)) {
1017 /* SMC not supported, decline */
1018 mutex_unlock(&smc_create_lgr_pending);
1019 smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
1020 return;
1021 }
1022
1023 /* send SMC Accept CLC message */
1024 rc = smc_clc_send_accept(new_smc, local_contact);
1025 if (rc) {
1026 mutex_unlock(&smc_create_lgr_pending);
1027 smc_listen_decline(new_smc, rc, local_contact);
1028 return;
1029 }
1030
1031 /* receive SMC Confirm CLC message */
1032 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1033 SMC_CLC_CONFIRM);
1034 if (reason_code) {
1035 mutex_unlock(&smc_create_lgr_pending);
1036 smc_listen_decline(new_smc, reason_code, local_contact);
1037 return;
1038 }
1039
1040 /* finish worker */
1041 smc_listen_rdma_finish(new_smc, &cclc, local_contact);
1042 smc_conn_save_peer_info(new_smc, &cclc);
1043 mutex_unlock(&smc_create_lgr_pending);
1044 smc_listen_out_connected(new_smc);
Ursula Brauna046d572017-01-09 16:55:16 +01001045}
1046
1047static void smc_tcp_listen_work(struct work_struct *work)
1048{
1049 struct smc_sock *lsmc = container_of(work, struct smc_sock,
1050 tcp_listen_work);
Ursula Braun3163c502018-01-24 10:28:12 +01001051 struct sock *lsk = &lsmc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +01001052 struct smc_sock *new_smc;
1053 int rc = 0;
1054
Ursula Braun3163c502018-01-24 10:28:12 +01001055 lock_sock(lsk);
1056 while (lsk->sk_state == SMC_LISTEN) {
Ursula Brauna046d572017-01-09 16:55:16 +01001057 rc = smc_clcsock_accept(lsmc, &new_smc);
1058 if (rc)
1059 goto out;
1060 if (!new_smc)
1061 continue;
1062
1063 new_smc->listen_smc = lsmc;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001064 new_smc->use_fallback = lsmc->use_fallback;
Ursula Braun3163c502018-01-24 10:28:12 +01001065 sock_hold(lsk); /* sock_put in smc_listen_work */
Ursula Brauna046d572017-01-09 16:55:16 +01001066 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1067 smc_copy_sock_settings_to_smc(new_smc);
Ursula Braun51f1de72018-01-26 09:28:48 +01001068 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1069 if (!schedule_work(&new_smc->smc_listen_work))
1070 sock_put(&new_smc->sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001071 }
1072
1073out:
Ursula Braun3163c502018-01-24 10:28:12 +01001074 release_sock(lsk);
Ursula Braun51f1de72018-01-26 09:28:48 +01001075 sock_put(&lsmc->sk); /* sock_hold in smc_listen */
Ursula Brauna046d572017-01-09 16:55:16 +01001076}
1077
Ursula Braunac713872017-01-09 16:55:13 +01001078static int smc_listen(struct socket *sock, int backlog)
1079{
1080 struct sock *sk = sock->sk;
1081 struct smc_sock *smc;
1082 int rc;
1083
1084 smc = smc_sk(sk);
1085 lock_sock(sk);
1086
1087 rc = -EINVAL;
1088 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1089 goto out;
1090
1091 rc = 0;
1092 if (sk->sk_state == SMC_LISTEN) {
1093 sk->sk_max_ack_backlog = backlog;
1094 goto out;
1095 }
1096 /* some socket options are handled in core, so we could not apply
1097 * them to the clc socket -- copy smc socket options to clc socket
1098 */
1099 smc_copy_sock_settings_to_clc(smc);
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001100 if (!smc->use_fallback)
1101 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
Ursula Braunac713872017-01-09 16:55:13 +01001102
1103 rc = kernel_listen(smc->clcsock, backlog);
1104 if (rc)
1105 goto out;
1106 sk->sk_max_ack_backlog = backlog;
1107 sk->sk_ack_backlog = 0;
1108 sk->sk_state = SMC_LISTEN;
Ursula Brauna046d572017-01-09 16:55:16 +01001109 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
Ursula Braun51f1de72018-01-26 09:28:48 +01001110 sock_hold(sk); /* sock_hold in tcp_listen_worker */
1111 if (!schedule_work(&smc->tcp_listen_work))
1112 sock_put(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001113
1114out:
1115 release_sock(sk);
1116 return rc;
1117}
1118
1119static int smc_accept(struct socket *sock, struct socket *new_sock,
David Howellscdfbabf2017-03-09 08:09:05 +00001120 int flags, bool kern)
Ursula Braunac713872017-01-09 16:55:13 +01001121{
Ursula Brauna046d572017-01-09 16:55:16 +01001122 struct sock *sk = sock->sk, *nsk;
1123 DECLARE_WAITQUEUE(wait, current);
Ursula Braunac713872017-01-09 16:55:13 +01001124 struct smc_sock *lsmc;
Ursula Brauna046d572017-01-09 16:55:16 +01001125 long timeo;
1126 int rc = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001127
1128 lsmc = smc_sk(sk);
Ursula Braun51f1de72018-01-26 09:28:48 +01001129 sock_hold(sk); /* sock_put below */
Ursula Braunac713872017-01-09 16:55:13 +01001130 lock_sock(sk);
1131
1132 if (lsmc->sk.sk_state != SMC_LISTEN) {
1133 rc = -EINVAL;
Ursula Braunabb190f2018-04-26 17:18:23 +02001134 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001135 goto out;
1136 }
1137
Ursula Brauna046d572017-01-09 16:55:16 +01001138 /* Wait for an incoming connection */
1139 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1140 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1141 while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1142 set_current_state(TASK_INTERRUPTIBLE);
1143 if (!timeo) {
1144 rc = -EAGAIN;
1145 break;
1146 }
1147 release_sock(sk);
1148 timeo = schedule_timeout(timeo);
1149 /* wakeup by sk_data_ready in smc_listen_work() */
1150 sched_annotate_sleep();
1151 lock_sock(sk);
1152 if (signal_pending(current)) {
1153 rc = sock_intr_errno(timeo);
1154 break;
1155 }
1156 }
1157 set_current_state(TASK_RUNNING);
1158 remove_wait_queue(sk_sleep(sk), &wait);
Ursula Braunac713872017-01-09 16:55:13 +01001159
Ursula Brauna046d572017-01-09 16:55:16 +01001160 if (!rc)
1161 rc = sock_error(nsk);
Ursula Braunabb190f2018-04-26 17:18:23 +02001162 release_sock(sk);
1163 if (rc)
1164 goto out;
1165
1166 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1167 /* wait till data arrives on the socket */
1168 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1169 MSEC_PER_SEC);
1170 if (smc_sk(nsk)->use_fallback) {
1171 struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1172
1173 lock_sock(clcsk);
1174 if (skb_queue_empty(&clcsk->sk_receive_queue))
1175 sk_wait_data(clcsk, &timeo, NULL);
1176 release_sock(clcsk);
1177 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1178 lock_sock(nsk);
Stefan Rasplb51fa1b2018-05-03 18:12:37 +02001179 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
Ursula Braunabb190f2018-04-26 17:18:23 +02001180 release_sock(nsk);
1181 }
1182 }
Ursula Braunac713872017-01-09 16:55:13 +01001183
1184out:
Ursula Braun51f1de72018-01-26 09:28:48 +01001185 sock_put(sk); /* sock_hold above */
Ursula Braunac713872017-01-09 16:55:13 +01001186 return rc;
1187}
1188
1189static int smc_getname(struct socket *sock, struct sockaddr *addr,
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001190 int peer)
Ursula Braunac713872017-01-09 16:55:13 +01001191{
1192 struct smc_sock *smc;
1193
Ursula Braunb38d7322017-01-09 16:55:25 +01001194 if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1195 (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
Ursula Braunac713872017-01-09 16:55:13 +01001196 return -ENOTCONN;
1197
1198 smc = smc_sk(sock->sk);
1199
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001200 return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
Ursula Braunac713872017-01-09 16:55:13 +01001201}
1202
1203static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1204{
1205 struct sock *sk = sock->sk;
1206 struct smc_sock *smc;
1207 int rc = -EPIPE;
1208
1209 smc = smc_sk(sk);
1210 lock_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001211 if ((sk->sk_state != SMC_ACTIVE) &&
1212 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1213 (sk->sk_state != SMC_INIT))
Ursula Braunac713872017-01-09 16:55:13 +01001214 goto out;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001215
1216 if (msg->msg_flags & MSG_FASTOPEN) {
1217 if (sk->sk_state == SMC_INIT) {
1218 smc->use_fallback = true;
1219 } else {
1220 rc = -EINVAL;
1221 goto out;
1222 }
1223 }
1224
Ursula Braunac713872017-01-09 16:55:13 +01001225 if (smc->use_fallback)
1226 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1227 else
Ursula Braune6727f32017-01-09 16:55:23 +01001228 rc = smc_tx_sendmsg(smc, msg, len);
Ursula Braunac713872017-01-09 16:55:13 +01001229out:
1230 release_sock(sk);
1231 return rc;
1232}
1233
1234static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1235 int flags)
1236{
1237 struct sock *sk = sock->sk;
1238 struct smc_sock *smc;
1239 int rc = -ENOTCONN;
1240
1241 smc = smc_sk(sk);
1242 lock_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001243 if ((sk->sk_state == SMC_INIT) ||
1244 (sk->sk_state == SMC_LISTEN) ||
1245 (sk->sk_state == SMC_CLOSED))
Ursula Braunac713872017-01-09 16:55:13 +01001246 goto out;
1247
Ursula Braunb38d7322017-01-09 16:55:25 +01001248 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1249 rc = 0;
1250 goto out;
1251 }
1252
Stefan Raspl9014db22018-05-03 18:12:39 +02001253 if (smc->use_fallback) {
Ursula Braunac713872017-01-09 16:55:13 +01001254 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
Stefan Raspl9014db22018-05-03 18:12:39 +02001255 } else {
1256 msg->msg_namelen = 0;
1257 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1258 }
Ursula Braunb38d7322017-01-09 16:55:25 +01001259
Ursula Braunac713872017-01-09 16:55:13 +01001260out:
1261 release_sock(sk);
1262 return rc;
1263}
1264
Al Viroade994f2017-07-03 00:01:49 -04001265static __poll_t smc_accept_poll(struct sock *parent)
Ursula Brauna046d572017-01-09 16:55:16 +01001266{
Ursula Braun8dce2782018-01-26 09:28:47 +01001267 struct smc_sock *isk = smc_sk(parent);
Al Viro63e24802018-02-01 10:02:53 -05001268 __poll_t mask = 0;
Ursula Brauna046d572017-01-09 16:55:16 +01001269
Ursula Braun8dce2782018-01-26 09:28:47 +01001270 spin_lock(&isk->accept_q_lock);
1271 if (!list_empty(&isk->accept_q))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001272 mask = EPOLLIN | EPOLLRDNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001273 spin_unlock(&isk->accept_q_lock);
Ursula Brauna046d572017-01-09 16:55:16 +01001274
Ursula Braun8dce2782018-01-26 09:28:47 +01001275 return mask;
Ursula Brauna046d572017-01-09 16:55:16 +01001276}
1277
Al Viroade994f2017-07-03 00:01:49 -04001278static __poll_t smc_poll(struct file *file, struct socket *sock,
Ursula Braunac713872017-01-09 16:55:13 +01001279 poll_table *wait)
1280{
1281 struct sock *sk = sock->sk;
Al Viroe6c8adc2017-07-03 22:25:56 -04001282 __poll_t mask = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001283 struct smc_sock *smc;
Ursula Brauna046d572017-01-09 16:55:16 +01001284 int rc;
Ursula Braunac713872017-01-09 16:55:13 +01001285
Ursula Braun8dce2782018-01-26 09:28:47 +01001286 if (!sk)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001287 return EPOLLNVAL;
Ursula Braun8dce2782018-01-26 09:28:47 +01001288
Ursula Braunac713872017-01-09 16:55:13 +01001289 smc = smc_sk(sock->sk);
Ursula Braun8dce2782018-01-26 09:28:47 +01001290 sock_hold(sk);
1291 lock_sock(sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001292 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1293 /* delegate to CLC child sock */
Ursula Braun8dce2782018-01-26 09:28:47 +01001294 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001295 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
Ursula Braunac713872017-01-09 16:55:13 +01001296 lock_sock(sk);
Ursula Braun784813a2018-05-02 16:53:56 +02001297 sk->sk_err = smc->clcsock->sk->sk_err;
1298 if (sk->sk_err) {
1299 mask |= EPOLLERR;
1300 } else {
1301 /* if non-blocking connect finished ... */
1302 if (sk->sk_state == SMC_INIT &&
1303 mask & EPOLLOUT &&
1304 smc->clcsock->sk->sk_state != TCP_CLOSE) {
Hans Wippel3b2dec22018-05-18 09:34:18 +02001305 rc = __smc_connect(smc);
Ursula Brauna046d572017-01-09 16:55:16 +01001306 if (rc < 0)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001307 mask |= EPOLLERR;
Ursula Braun8dce2782018-01-26 09:28:47 +01001308 /* success cases including fallback */
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001309 mask |= EPOLLOUT | EPOLLWRNORM;
Ursula Brauna046d572017-01-09 16:55:16 +01001310 }
Ursula Braunac713872017-01-09 16:55:13 +01001311 }
Ursula Braunac713872017-01-09 16:55:13 +01001312 } else {
Ursula Braun8dce2782018-01-26 09:28:47 +01001313 if (sk->sk_state != SMC_CLOSED) {
1314 release_sock(sk);
1315 sock_poll_wait(file, sk_sleep(sk), wait);
1316 lock_sock(sk);
1317 }
Ursula Brauna046d572017-01-09 16:55:16 +01001318 if (sk->sk_err)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001319 mask |= EPOLLERR;
Ursula Braunb38d7322017-01-09 16:55:25 +01001320 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1321 (sk->sk_state == SMC_CLOSED))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001322 mask |= EPOLLHUP;
Ursula Braun8dce2782018-01-26 09:28:47 +01001323 if (sk->sk_state == SMC_LISTEN) {
1324 /* woken up by sk_data_ready in smc_listen_work() */
1325 mask = smc_accept_poll(sk);
1326 } else {
1327 if (atomic_read(&smc->conn.sndbuf_space) ||
1328 sk->sk_shutdown & SEND_SHUTDOWN) {
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001329 mask |= EPOLLOUT | EPOLLWRNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001330 } else {
1331 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1332 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1333 }
1334 if (atomic_read(&smc->conn.bytes_to_rcv))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001335 mask |= EPOLLIN | EPOLLRDNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001336 if (sk->sk_shutdown & RCV_SHUTDOWN)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001337 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
Ursula Braun8dce2782018-01-26 09:28:47 +01001338 if (sk->sk_state == SMC_APPCLOSEWAIT1)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001339 mask |= EPOLLIN;
Ursula Braun8dce2782018-01-26 09:28:47 +01001340 }
Ursula Braunb38d7322017-01-09 16:55:25 +01001341
Ursula Braunac713872017-01-09 16:55:13 +01001342 }
Ursula Braun8dce2782018-01-26 09:28:47 +01001343 release_sock(sk);
1344 sock_put(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001345
1346 return mask;
1347}
1348
1349static int smc_shutdown(struct socket *sock, int how)
1350{
1351 struct sock *sk = sock->sk;
1352 struct smc_sock *smc;
1353 int rc = -EINVAL;
Ursula Braunb38d7322017-01-09 16:55:25 +01001354 int rc1 = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001355
1356 smc = smc_sk(sk);
1357
1358 if ((how < SHUT_RD) || (how > SHUT_RDWR))
Ursula Braunb38d7322017-01-09 16:55:25 +01001359 return rc;
Ursula Braunac713872017-01-09 16:55:13 +01001360
1361 lock_sock(sk);
1362
1363 rc = -ENOTCONN;
Ursula Braunb38d7322017-01-09 16:55:25 +01001364 if ((sk->sk_state != SMC_LISTEN) &&
1365 (sk->sk_state != SMC_ACTIVE) &&
1366 (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1367 (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1368 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1369 (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1370 (sk->sk_state != SMC_APPFINCLOSEWAIT))
Ursula Braunac713872017-01-09 16:55:13 +01001371 goto out;
1372 if (smc->use_fallback) {
1373 rc = kernel_sock_shutdown(smc->clcsock, how);
1374 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1375 if (sk->sk_shutdown == SHUTDOWN_MASK)
1376 sk->sk_state = SMC_CLOSED;
Ursula Braunb38d7322017-01-09 16:55:25 +01001377 goto out;
Ursula Braunac713872017-01-09 16:55:13 +01001378 }
Ursula Braunb38d7322017-01-09 16:55:25 +01001379 switch (how) {
1380 case SHUT_RDWR: /* shutdown in both directions */
1381 rc = smc_close_active(smc);
1382 break;
1383 case SHUT_WR:
1384 rc = smc_close_shutdown_write(smc);
1385 break;
1386 case SHUT_RD:
Ursula Braun1255fcb2018-04-19 15:56:40 +02001387 rc = 0;
1388 /* nothing more to do because peer is not involved */
Ursula Braunb38d7322017-01-09 16:55:25 +01001389 break;
1390 }
Ursula Braun1255fcb2018-04-19 15:56:40 +02001391 if (smc->clcsock)
1392 rc1 = kernel_sock_shutdown(smc->clcsock, how);
Ursula Braunb38d7322017-01-09 16:55:25 +01001393 /* map sock_shutdown_cmd constants to sk_shutdown value range */
1394 sk->sk_shutdown |= how + 1;
Ursula Braunac713872017-01-09 16:55:13 +01001395
1396out:
1397 release_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001398 return rc ? rc : rc1;
Ursula Braunac713872017-01-09 16:55:13 +01001399}
1400
1401static int smc_setsockopt(struct socket *sock, int level, int optname,
1402 char __user *optval, unsigned int optlen)
1403{
1404 struct sock *sk = sock->sk;
1405 struct smc_sock *smc;
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001406 int val, rc;
Ursula Braunac713872017-01-09 16:55:13 +01001407
1408 smc = smc_sk(sk);
1409
1410 /* generic setsockopts reaching us here always apply to the
1411 * CLC socket
1412 */
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001413 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1414 optval, optlen);
1415 if (smc->clcsock->sk->sk_err) {
1416 sk->sk_err = smc->clcsock->sk->sk_err;
1417 sk->sk_error_report(sk);
1418 }
1419 if (rc)
1420 return rc;
1421
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001422 if (optlen < sizeof(int))
1423 return rc;
1424 get_user(val, (int __user *)optval);
1425
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001426 lock_sock(sk);
1427 switch (optname) {
1428 case TCP_ULP:
1429 case TCP_FASTOPEN:
1430 case TCP_FASTOPEN_CONNECT:
1431 case TCP_FASTOPEN_KEY:
1432 case TCP_FASTOPEN_NO_COOKIE:
1433 /* option not supported by SMC */
1434 if (sk->sk_state == SMC_INIT) {
1435 smc->use_fallback = true;
1436 } else {
1437 if (!smc->use_fallback)
1438 rc = -EINVAL;
1439 }
1440 break;
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001441 case TCP_NODELAY:
1442 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
Ursula Braun569bc642018-05-15 17:04:54 +02001443 if (val && !smc->use_fallback)
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001444 mod_delayed_work(system_wq, &smc->conn.tx_work,
1445 0);
1446 }
1447 break;
1448 case TCP_CORK:
1449 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
Ursula Braun569bc642018-05-15 17:04:54 +02001450 if (!val && !smc->use_fallback)
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001451 mod_delayed_work(system_wq, &smc->conn.tx_work,
1452 0);
1453 }
1454 break;
Ursula Braunabb190f2018-04-26 17:18:23 +02001455 case TCP_DEFER_ACCEPT:
1456 smc->sockopt_defer_accept = val;
1457 break;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001458 default:
1459 break;
1460 }
1461 release_sock(sk);
1462
1463 return rc;
Ursula Braunac713872017-01-09 16:55:13 +01001464}
1465
1466static int smc_getsockopt(struct socket *sock, int level, int optname,
1467 char __user *optval, int __user *optlen)
1468{
1469 struct smc_sock *smc;
1470
1471 smc = smc_sk(sock->sk);
1472 /* socket options apply to the CLC socket */
1473 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1474 optval, optlen);
1475}
1476
1477static int smc_ioctl(struct socket *sock, unsigned int cmd,
1478 unsigned long arg)
1479{
1480 struct smc_sock *smc;
Ursula Braun9b67e262018-05-02 16:56:46 +02001481 int answ;
Ursula Braunac713872017-01-09 16:55:13 +01001482
1483 smc = smc_sk(sock->sk);
Ursula Braun9b67e262018-05-02 16:56:46 +02001484 if (smc->use_fallback) {
1485 if (!smc->clcsock)
1486 return -EBADF;
Ursula Braunac713872017-01-09 16:55:13 +01001487 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
Ursula Braun9b67e262018-05-02 16:56:46 +02001488 }
1489 switch (cmd) {
1490 case SIOCINQ: /* same as FIONREAD */
1491 if (smc->sk.sk_state == SMC_LISTEN)
1492 return -EINVAL;
1493 answ = atomic_read(&smc->conn.bytes_to_rcv);
1494 break;
1495 case SIOCOUTQ:
1496 /* output queue size (not send + not acked) */
1497 if (smc->sk.sk_state == SMC_LISTEN)
1498 return -EINVAL;
Hans Wippel69cb7dc2018-05-18 09:34:10 +02001499 answ = smc->conn.sndbuf_desc->len -
Ursula Braun9b67e262018-05-02 16:56:46 +02001500 atomic_read(&smc->conn.sndbuf_space);
1501 break;
1502 case SIOCOUTQNSD:
1503 /* output queue size (not send only) */
1504 if (smc->sk.sk_state == SMC_LISTEN)
1505 return -EINVAL;
1506 answ = smc_tx_prepared_sends(&smc->conn);
1507 break;
1508 default:
1509 return -ENOIOCTLCMD;
1510 }
1511
1512 return put_user(answ, (int __user *)arg);
Ursula Braunac713872017-01-09 16:55:13 +01001513}
1514
1515static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1516 int offset, size_t size, int flags)
1517{
1518 struct sock *sk = sock->sk;
1519 struct smc_sock *smc;
1520 int rc = -EPIPE;
1521
1522 smc = smc_sk(sk);
1523 lock_sock(sk);
Stefan Rasplbda27ff2018-05-03 17:57:39 +02001524 if (sk->sk_state != SMC_ACTIVE) {
1525 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001526 goto out;
Stefan Rasplbda27ff2018-05-03 17:57:39 +02001527 }
1528 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001529 if (smc->use_fallback)
1530 rc = kernel_sendpage(smc->clcsock, page, offset,
1531 size, flags);
1532 else
1533 rc = sock_no_sendpage(sock, page, offset, size, flags);
1534
1535out:
Ursula Braunac713872017-01-09 16:55:13 +01001536 return rc;
1537}
1538
Stefan Raspl9014db22018-05-03 18:12:39 +02001539/* Map the affected portions of the rmbe into an spd, note the number of bytes
1540 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1541 * updates till whenever a respective page has been fully processed.
1542 * Note that subsequent recv() calls have to wait till all splice() processing
1543 * completed.
1544 */
Ursula Braunac713872017-01-09 16:55:13 +01001545static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1546 struct pipe_inode_info *pipe, size_t len,
Stefan Raspl9014db22018-05-03 18:12:39 +02001547 unsigned int flags)
Ursula Braunac713872017-01-09 16:55:13 +01001548{
1549 struct sock *sk = sock->sk;
1550 struct smc_sock *smc;
1551 int rc = -ENOTCONN;
1552
1553 smc = smc_sk(sk);
1554 lock_sock(sk);
Stefan Raspl9014db22018-05-03 18:12:39 +02001555
1556 if (sk->sk_state == SMC_INIT ||
1557 sk->sk_state == SMC_LISTEN ||
1558 sk->sk_state == SMC_CLOSED)
Ursula Braunac713872017-01-09 16:55:13 +01001559 goto out;
Stefan Raspl9014db22018-05-03 18:12:39 +02001560
1561 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1562 rc = 0;
1563 goto out;
1564 }
1565
Ursula Braunac713872017-01-09 16:55:13 +01001566 if (smc->use_fallback) {
1567 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1568 pipe, len, flags);
1569 } else {
Stefan Raspl9014db22018-05-03 18:12:39 +02001570 if (*ppos) {
1571 rc = -ESPIPE;
1572 goto out;
1573 }
1574 if (flags & SPLICE_F_NONBLOCK)
1575 flags = MSG_DONTWAIT;
1576 else
1577 flags = 0;
1578 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
Ursula Braunac713872017-01-09 16:55:13 +01001579 }
1580out:
1581 release_sock(sk);
Stefan Raspl9014db22018-05-03 18:12:39 +02001582
Ursula Braunac713872017-01-09 16:55:13 +01001583 return rc;
1584}
1585
1586/* must look like tcp */
1587static const struct proto_ops smc_sock_ops = {
1588 .family = PF_SMC,
1589 .owner = THIS_MODULE,
1590 .release = smc_release,
1591 .bind = smc_bind,
1592 .connect = smc_connect,
1593 .socketpair = sock_no_socketpair,
1594 .accept = smc_accept,
1595 .getname = smc_getname,
1596 .poll = smc_poll,
1597 .ioctl = smc_ioctl,
1598 .listen = smc_listen,
1599 .shutdown = smc_shutdown,
1600 .setsockopt = smc_setsockopt,
1601 .getsockopt = smc_getsockopt,
1602 .sendmsg = smc_sendmsg,
1603 .recvmsg = smc_recvmsg,
1604 .mmap = sock_no_mmap,
1605 .sendpage = smc_sendpage,
1606 .splice_read = smc_splice_read,
1607};
1608
1609static int smc_create(struct net *net, struct socket *sock, int protocol,
1610 int kern)
1611{
Karsten Graulaaa4d332018-03-16 15:06:41 +01001612 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
Ursula Braunac713872017-01-09 16:55:13 +01001613 struct smc_sock *smc;
1614 struct sock *sk;
1615 int rc;
1616
1617 rc = -ESOCKTNOSUPPORT;
1618 if (sock->type != SOCK_STREAM)
1619 goto out;
1620
1621 rc = -EPROTONOSUPPORT;
Karsten Graulaaa4d332018-03-16 15:06:41 +01001622 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
Ursula Braunac713872017-01-09 16:55:13 +01001623 goto out;
1624
1625 rc = -ENOBUFS;
1626 sock->ops = &smc_sock_ops;
Karsten Graulaaa4d332018-03-16 15:06:41 +01001627 sk = smc_sock_alloc(net, sock, protocol);
Ursula Braunac713872017-01-09 16:55:13 +01001628 if (!sk)
1629 goto out;
1630
1631 /* create internal TCP socket for CLC handshake and fallback */
1632 smc = smc_sk(sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001633 smc->use_fallback = false; /* assume rdma capability first */
Karsten Graulaaa4d332018-03-16 15:06:41 +01001634 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1635 &smc->clcsock);
Davide Carattia5dcb732018-02-28 12:44:09 +01001636 if (rc) {
Ursula Braunac713872017-01-09 16:55:13 +01001637 sk_common_release(sk);
Davide Carattia5dcb732018-02-28 12:44:09 +01001638 goto out;
1639 }
Ursula Brauncd6851f2017-01-09 16:55:18 +01001640 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1641 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
Ursula Braunac713872017-01-09 16:55:13 +01001642
1643out:
1644 return rc;
1645}
1646
1647static const struct net_proto_family smc_sock_family_ops = {
1648 .family = PF_SMC,
1649 .owner = THIS_MODULE,
1650 .create = smc_create,
1651};
1652
1653static int __init smc_init(void)
1654{
1655 int rc;
1656
Thomas Richter6812baa2017-01-09 16:55:15 +01001657 rc = smc_pnet_init();
1658 if (rc)
1659 return rc;
1660
Ursula Braun9bf9abe2017-01-09 16:55:21 +01001661 rc = smc_llc_init();
1662 if (rc) {
1663 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1664 goto out_pnet;
1665 }
1666
Ursula Braun5f083182017-01-09 16:55:22 +01001667 rc = smc_cdc_init();
1668 if (rc) {
1669 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1670 goto out_pnet;
1671 }
1672
Ursula Braunac713872017-01-09 16:55:13 +01001673 rc = proto_register(&smc_proto, 1);
1674 if (rc) {
Karsten Graulaaa4d332018-03-16 15:06:41 +01001675 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
Thomas Richter6812baa2017-01-09 16:55:15 +01001676 goto out_pnet;
Ursula Braunac713872017-01-09 16:55:13 +01001677 }
1678
Karsten Graulaaa4d332018-03-16 15:06:41 +01001679 rc = proto_register(&smc_proto6, 1);
1680 if (rc) {
1681 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1682 goto out_proto;
1683 }
1684
Ursula Braunac713872017-01-09 16:55:13 +01001685 rc = sock_register(&smc_sock_family_ops);
1686 if (rc) {
1687 pr_err("%s: sock_register fails with %d\n", __func__, rc);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001688 goto out_proto6;
Ursula Braunac713872017-01-09 16:55:13 +01001689 }
Ursula Braunf16a7dd2017-01-09 16:55:26 +01001690 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001691 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
Ursula Braunac713872017-01-09 16:55:13 +01001692
Ursula Brauna4cf0442017-01-09 16:55:14 +01001693 rc = smc_ib_register_client();
1694 if (rc) {
1695 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1696 goto out_sock;
1697 }
1698
Ursula Braunc5c1cc92017-10-25 11:01:46 +02001699 static_branch_enable(&tcp_have_smc);
Ursula Braunac713872017-01-09 16:55:13 +01001700 return 0;
1701
Ursula Brauna4cf0442017-01-09 16:55:14 +01001702out_sock:
1703 sock_unregister(PF_SMC);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001704out_proto6:
1705 proto_unregister(&smc_proto6);
Ursula Braunac713872017-01-09 16:55:13 +01001706out_proto:
1707 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +01001708out_pnet:
1709 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +01001710 return rc;
1711}
1712
1713static void __exit smc_exit(void)
1714{
Hans Wippel9fda3512018-05-18 09:34:11 +02001715 smc_core_exit();
Ursula Braunc5c1cc92017-10-25 11:01:46 +02001716 static_branch_disable(&tcp_have_smc);
Ursula Brauna4cf0442017-01-09 16:55:14 +01001717 smc_ib_unregister_client();
Ursula Braunac713872017-01-09 16:55:13 +01001718 sock_unregister(PF_SMC);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001719 proto_unregister(&smc_proto6);
Ursula Braunac713872017-01-09 16:55:13 +01001720 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +01001721 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +01001722}
1723
1724module_init(smc_init);
1725module_exit(smc_exit);
1726
1727MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
1728MODULE_DESCRIPTION("smc socket address family");
1729MODULE_LICENSE("GPL");
1730MODULE_ALIAS_NETPROTO(PF_SMC);