blob: da7f02edcd374c44437e34a2705f410317ea536d [file] [log] [blame]
Ursula Braunac713872017-01-09 16:55:13 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
Ursula Brauna046d572017-01-09 16:55:16 +01009 * Initial restrictions:
Ursula Brauna046d572017-01-09 16:55:16 +010010 * - support for alternate links postponed
Ursula Brauna046d572017-01-09 16:55:16 +010011 *
Karsten Graulaaa4d332018-03-16 15:06:41 +010012 * Copyright IBM Corp. 2016, 2018
Ursula Braunac713872017-01-09 16:55:13 +010013 *
14 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
15 * based on prototype from Frank Blaschka
16 */
17
18#define KMSG_COMPONENT "smc"
19#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21#include <linux/module.h>
22#include <linux/socket.h>
Ursula Brauna046d572017-01-09 16:55:16 +010023#include <linux/workqueue.h>
Ursula Braun5f083182017-01-09 16:55:22 +010024#include <linux/in.h>
Ingo Molnarc3edc402017-02-02 08:35:14 +010025#include <linux/sched/signal.h>
26
Ursula Braunac713872017-01-09 16:55:13 +010027#include <net/sock.h>
Ursula Brauna046d572017-01-09 16:55:16 +010028#include <net/tcp.h>
Ursula Braunf16a7dd2017-01-09 16:55:26 +010029#include <net/smc.h>
Ursula Braun9b67e262018-05-02 16:56:46 +020030#include <asm/ioctls.h>
Ursula Braunac713872017-01-09 16:55:13 +010031
32#include "smc.h"
Ursula Brauna046d572017-01-09 16:55:16 +010033#include "smc_clc.h"
Ursula Braun9bf9abe2017-01-09 16:55:21 +010034#include "smc_llc.h"
Ursula Braun5f083182017-01-09 16:55:22 +010035#include "smc_cdc.h"
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010036#include "smc_core.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010037#include "smc_ib.h"
Thomas Richter6812baa2017-01-09 16:55:15 +010038#include "smc_pnet.h"
Ursula Braune6727f32017-01-09 16:55:23 +010039#include "smc_tx.h"
Ursula Braun952310c2017-01-09 16:55:24 +010040#include "smc_rx.h"
Ursula Braunb38d7322017-01-09 16:55:25 +010041#include "smc_close.h"
Ursula Braunac713872017-01-09 16:55:13 +010042
Ursula Braun0cfdd8f2017-01-09 16:55:17 +010043static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
44 * creation
45 */
46
Ursula Brauna046d572017-01-09 16:55:16 +010047static void smc_tcp_listen_work(struct work_struct *);
48
Ursula Braunac713872017-01-09 16:55:13 +010049static void smc_set_keepalive(struct sock *sk, int val)
50{
51 struct smc_sock *smc = smc_sk(sk);
52
53 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
54}
55
Ursula Braunf16a7dd2017-01-09 16:55:26 +010056static struct smc_hashinfo smc_v4_hashinfo = {
57 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
58};
59
Karsten Graulaaa4d332018-03-16 15:06:41 +010060static struct smc_hashinfo smc_v6_hashinfo = {
61 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
62};
63
Ursula Braunf16a7dd2017-01-09 16:55:26 +010064int smc_hash_sk(struct sock *sk)
65{
66 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
67 struct hlist_head *head;
68
69 head = &h->ht;
70
71 write_lock_bh(&h->lock);
72 sk_add_node(sk, head);
73 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
74 write_unlock_bh(&h->lock);
75
76 return 0;
77}
78EXPORT_SYMBOL_GPL(smc_hash_sk);
79
80void smc_unhash_sk(struct sock *sk)
81{
82 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
83
84 write_lock_bh(&h->lock);
85 if (sk_del_node_init(sk))
86 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
87 write_unlock_bh(&h->lock);
88}
89EXPORT_SYMBOL_GPL(smc_unhash_sk);
90
91struct proto smc_proto = {
Ursula Braunac713872017-01-09 16:55:13 +010092 .name = "SMC",
93 .owner = THIS_MODULE,
94 .keepalive = smc_set_keepalive,
Ursula Braunf16a7dd2017-01-09 16:55:26 +010095 .hash = smc_hash_sk,
96 .unhash = smc_unhash_sk,
Ursula Braunac713872017-01-09 16:55:13 +010097 .obj_size = sizeof(struct smc_sock),
Ursula Braunf16a7dd2017-01-09 16:55:26 +010098 .h.smc_hash = &smc_v4_hashinfo,
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -080099 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Ursula Braunac713872017-01-09 16:55:13 +0100100};
Ursula Braunf16a7dd2017-01-09 16:55:26 +0100101EXPORT_SYMBOL_GPL(smc_proto);
Ursula Braunac713872017-01-09 16:55:13 +0100102
Karsten Graulaaa4d332018-03-16 15:06:41 +0100103struct proto smc_proto6 = {
104 .name = "SMC6",
105 .owner = THIS_MODULE,
106 .keepalive = smc_set_keepalive,
107 .hash = smc_hash_sk,
108 .unhash = smc_unhash_sk,
109 .obj_size = sizeof(struct smc_sock),
110 .h.smc_hash = &smc_v6_hashinfo,
111 .slab_flags = SLAB_TYPESAFE_BY_RCU,
112};
113EXPORT_SYMBOL_GPL(smc_proto6);
114
Ursula Braunac713872017-01-09 16:55:13 +0100115static int smc_release(struct socket *sock)
116{
117 struct sock *sk = sock->sk;
118 struct smc_sock *smc;
Ursula Braunb38d7322017-01-09 16:55:25 +0100119 int rc = 0;
Ursula Braunac713872017-01-09 16:55:13 +0100120
121 if (!sk)
122 goto out;
123
124 smc = smc_sk(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +0100125 if (sk->sk_state == SMC_LISTEN)
126 /* smc_close_non_accepted() is called and acquires
127 * sock lock for child sockets again
128 */
129 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
130 else
131 lock_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +0100132
Ursula Braun51f1de72018-01-26 09:28:48 +0100133 if (!smc->use_fallback) {
Ursula Braunb38d7322017-01-09 16:55:25 +0100134 rc = smc_close_active(smc);
135 sock_set_flag(sk, SOCK_DEAD);
136 sk->sk_shutdown |= SHUTDOWN_MASK;
137 }
Ursula Braunac713872017-01-09 16:55:13 +0100138 if (smc->clcsock) {
139 sock_release(smc->clcsock);
140 smc->clcsock = NULL;
141 }
Ursula Braun51f1de72018-01-26 09:28:48 +0100142 if (smc->use_fallback) {
143 sock_put(sk); /* passive closing */
144 sk->sk_state = SMC_CLOSED;
145 sk->sk_state_change(sk);
146 }
Ursula Braunac713872017-01-09 16:55:13 +0100147
148 /* detach socket */
149 sock_orphan(sk);
150 sock->sk = NULL;
Ursula Braun51f1de72018-01-26 09:28:48 +0100151 if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
Ursula Braunb38d7322017-01-09 16:55:25 +0100152 smc_conn_free(&smc->conn);
Ursula Braunac713872017-01-09 16:55:13 +0100153 release_sock(sk);
154
Ursula Braun51f1de72018-01-26 09:28:48 +0100155 sk->sk_prot->unhash(sk);
156 sock_put(sk); /* final sock_put */
Ursula Braunac713872017-01-09 16:55:13 +0100157out:
Ursula Braunb38d7322017-01-09 16:55:25 +0100158 return rc;
Ursula Braunac713872017-01-09 16:55:13 +0100159}
160
161static void smc_destruct(struct sock *sk)
162{
163 if (sk->sk_state != SMC_CLOSED)
164 return;
165 if (!sock_flag(sk, SOCK_DEAD))
166 return;
167
168 sk_refcnt_debug_dec(sk);
169}
170
Karsten Graulaaa4d332018-03-16 15:06:41 +0100171static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
172 int protocol)
Ursula Braunac713872017-01-09 16:55:13 +0100173{
174 struct smc_sock *smc;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100175 struct proto *prot;
Ursula Braunac713872017-01-09 16:55:13 +0100176 struct sock *sk;
177
Karsten Graulaaa4d332018-03-16 15:06:41 +0100178 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
179 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
Ursula Braunac713872017-01-09 16:55:13 +0100180 if (!sk)
181 return NULL;
182
183 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
184 sk->sk_state = SMC_INIT;
185 sk->sk_destruct = smc_destruct;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100186 sk->sk_protocol = protocol;
Ursula Braunac713872017-01-09 16:55:13 +0100187 smc = smc_sk(sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100188 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
Eric Dumazetbe7f3e52018-05-17 03:54:21 -0700189 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
Ursula Brauna046d572017-01-09 16:55:16 +0100190 INIT_LIST_HEAD(&smc->accept_q);
191 spin_lock_init(&smc->accept_q_lock);
Eric Dumazetbe7f3e52018-05-17 03:54:21 -0700192 spin_lock_init(&smc->conn.send_lock);
Ursula Braunf16a7dd2017-01-09 16:55:26 +0100193 sk->sk_prot->hash(sk);
Ursula Brauna046d572017-01-09 16:55:16 +0100194 sk_refcnt_debug_inc(sk);
Ursula Braunac713872017-01-09 16:55:13 +0100195
196 return sk;
197}
198
199static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
200 int addr_len)
201{
202 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
203 struct sock *sk = sock->sk;
204 struct smc_sock *smc;
205 int rc;
206
207 smc = smc_sk(sk);
208
209 /* replicate tests from inet_bind(), to be safe wrt. future changes */
210 rc = -EINVAL;
211 if (addr_len < sizeof(struct sockaddr_in))
212 goto out;
213
214 rc = -EAFNOSUPPORT;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100215 if (addr->sin_family != AF_INET &&
216 addr->sin_family != AF_INET6 &&
217 addr->sin_family != AF_UNSPEC)
218 goto out;
Ursula Braunac713872017-01-09 16:55:13 +0100219 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
Karsten Graulaaa4d332018-03-16 15:06:41 +0100220 if (addr->sin_family == AF_UNSPEC &&
221 addr->sin_addr.s_addr != htonl(INADDR_ANY))
Ursula Braunac713872017-01-09 16:55:13 +0100222 goto out;
223
224 lock_sock(sk);
225
226 /* Check if socket is already active */
227 rc = -EINVAL;
228 if (sk->sk_state != SMC_INIT)
229 goto out_rel;
230
231 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
232 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
233
234out_rel:
235 release_sock(sk);
236out:
237 return rc;
238}
239
240static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
241 unsigned long mask)
242{
243 /* options we don't get control via setsockopt for */
244 nsk->sk_type = osk->sk_type;
245 nsk->sk_sndbuf = osk->sk_sndbuf;
246 nsk->sk_rcvbuf = osk->sk_rcvbuf;
247 nsk->sk_sndtimeo = osk->sk_sndtimeo;
248 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
249 nsk->sk_mark = osk->sk_mark;
250 nsk->sk_priority = osk->sk_priority;
251 nsk->sk_rcvlowat = osk->sk_rcvlowat;
252 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
253 nsk->sk_err = osk->sk_err;
254
255 nsk->sk_flags &= ~mask;
256 nsk->sk_flags |= osk->sk_flags & mask;
257}
258
259#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
260 (1UL << SOCK_KEEPOPEN) | \
261 (1UL << SOCK_LINGER) | \
262 (1UL << SOCK_BROADCAST) | \
263 (1UL << SOCK_TIMESTAMP) | \
264 (1UL << SOCK_DBG) | \
265 (1UL << SOCK_RCVTSTAMP) | \
266 (1UL << SOCK_RCVTSTAMPNS) | \
267 (1UL << SOCK_LOCALROUTE) | \
268 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
269 (1UL << SOCK_RXQ_OVFL) | \
270 (1UL << SOCK_WIFI_STATUS) | \
271 (1UL << SOCK_NOFCS) | \
272 (1UL << SOCK_FILTER_LOCKED))
273/* copy only relevant settings and flags of SOL_SOCKET level from smc to
274 * clc socket (since smc is not called for these options from net/core)
275 */
276static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
277{
278 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
279}
280
281#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
282 (1UL << SOCK_KEEPOPEN) | \
283 (1UL << SOCK_LINGER) | \
284 (1UL << SOCK_DBG))
285/* copy only settings and flags relevant for smc from clc to smc socket */
286static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
287{
288 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
289}
290
Karsten Graul44aa81c2018-05-15 17:04:55 +0200291/* register a new rmb, optionally send confirm_rkey msg to register with peer */
292static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
293 bool conf_rkey)
Karsten Graule63a5f82018-05-03 17:57:37 +0200294{
295 /* register memory region for new rmb */
Karsten Graula6920d12018-05-03 17:57:38 +0200296 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
297 rmb_desc->regerr = 1;
Karsten Graule63a5f82018-05-03 17:57:37 +0200298 return -EFAULT;
Karsten Graula6920d12018-05-03 17:57:38 +0200299 }
Karsten Graul44aa81c2018-05-15 17:04:55 +0200300 if (!conf_rkey)
301 return 0;
302 /* exchange confirm_rkey msg with peer */
303 if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
304 rmb_desc->regerr = 1;
305 return -EFAULT;
306 }
Karsten Graule63a5f82018-05-03 17:57:37 +0200307 return 0;
308}
309
Stefan Raspl0f627122018-03-01 13:51:26 +0100310static int smc_clnt_conf_first_link(struct smc_sock *smc)
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100311{
Karsten Graul877ae5b2018-05-02 16:56:44 +0200312 struct net *net = sock_net(smc->clcsock->sk);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100313 struct smc_link_group *lgr = smc->conn.lgr;
314 struct smc_link *link;
315 int rest;
316 int rc;
317
318 link = &lgr->lnk[SMC_SINGLE_LINK];
319 /* receive CONFIRM LINK request from server over RoCE fabric */
320 rest = wait_for_completion_interruptible_timeout(
321 &link->llc_confirm,
322 SMC_LLC_WAIT_FIRST_TIME);
323 if (rest <= 0) {
324 struct smc_clc_msg_decline dclc;
325
326 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
327 SMC_CLC_DECLINE);
328 return rc;
329 }
330
Karsten Graul75d320d2018-03-01 13:51:31 +0100331 if (link->llc_confirm_rc)
332 return SMC_CLC_DECL_RMBE_EC;
333
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100334 rc = smc_ib_modify_qp_rts(link);
335 if (rc)
336 return SMC_CLC_DECL_INTERR;
337
338 smc_wr_remember_qp_attr(link);
Ursula Braun652a1e42017-07-28 13:56:17 +0200339
Karsten Graul44aa81c2018-05-15 17:04:55 +0200340 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
Ursula Braun652a1e42017-07-28 13:56:17 +0200341 return SMC_CLC_DECL_INTERR;
342
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100343 /* send CONFIRM LINK response over RoCE fabric */
344 rc = smc_llc_send_confirm_link(link,
345 link->smcibdev->mac[link->ibport - 1],
Stefan Raspl0f627122018-03-01 13:51:26 +0100346 &link->smcibdev->gid[link->ibport - 1],
347 SMC_LLC_RESP);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100348 if (rc < 0)
349 return SMC_CLC_DECL_TCL;
350
Karsten Graul52bedf32018-03-01 13:51:32 +0100351 /* receive ADD LINK request from server over RoCE fabric */
352 rest = wait_for_completion_interruptible_timeout(&link->llc_add,
353 SMC_LLC_WAIT_TIME);
354 if (rest <= 0) {
355 struct smc_clc_msg_decline dclc;
356
357 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
358 SMC_CLC_DECLINE);
359 return rc;
360 }
361
362 /* send add link reject message, only one link supported for now */
363 rc = smc_llc_send_add_link(link,
364 link->smcibdev->mac[link->ibport - 1],
365 &link->smcibdev->gid[link->ibport - 1],
366 SMC_LLC_RESP);
367 if (rc < 0)
368 return SMC_CLC_DECL_TCL;
369
Karsten Graul877ae5b2018-05-02 16:56:44 +0200370 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
Karsten Graul52bedf32018-03-01 13:51:32 +0100371
Karsten Graul75d320d2018-03-01 13:51:31 +0100372 return 0;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100373}
374
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100375static void smc_conn_save_peer_info(struct smc_sock *smc,
376 struct smc_clc_msg_accept_confirm *clc)
377{
Hans Wippel95d8d2632018-05-18 09:34:13 +0200378 int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
379
Hans Wippel92a138e2018-05-18 09:34:12 +0200380 smc->conn.peer_rmbe_idx = clc->rmbe_idx;
Ursula Braun5f083182017-01-09 16:55:22 +0100381 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
Hans Wippel95d8d2632018-05-18 09:34:13 +0200382 smc->conn.peer_rmbe_size = bufsize;
Ursula Brauncd6851f2017-01-09 16:55:18 +0100383 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
Hans Wippel95d8d2632018-05-18 09:34:13 +0200384 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
Ursula Braun0cfdd8f2017-01-09 16:55:17 +0100385}
386
387static void smc_link_save_peer_info(struct smc_link *link,
388 struct smc_clc_msg_accept_confirm *clc)
389{
390 link->peer_qpn = ntoh24(clc->qpn);
391 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
392 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
393 link->peer_psn = ntoh24(clc->psn);
394 link->peer_mtu = clc->qp_mtu;
395}
396
Hans Wippel3b2dec22018-05-18 09:34:18 +0200397/* fall back during connect */
398static int smc_connect_fallback(struct smc_sock *smc)
399{
400 smc->use_fallback = true;
401 smc_copy_sock_settings_to_clc(smc);
402 if (smc->sk.sk_state == SMC_INIT)
403 smc->sk.sk_state = SMC_ACTIVE;
404 return 0;
405}
406
407/* decline and fall back during connect */
408static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
409{
410 int rc;
411
412 if (reason_code < 0) /* error, fallback is not possible */
413 return reason_code;
414 if (reason_code != SMC_CLC_DECL_REPLY) {
415 rc = smc_clc_send_decline(smc, reason_code);
416 if (rc < 0)
417 return rc;
418 }
419 return smc_connect_fallback(smc);
420}
421
422/* abort connecting */
423static int smc_connect_abort(struct smc_sock *smc, int reason_code,
424 int local_contact)
425{
426 if (local_contact == SMC_FIRST_CONTACT)
427 smc_lgr_forget(smc->conn.lgr);
428 mutex_unlock(&smc_create_lgr_pending);
429 smc_conn_free(&smc->conn);
430 if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
431 sock_put(&smc->sk); /* passive closing */
432 return reason_code;
433}
434
435/* check if there is a rdma device available for this connection. */
436/* called for connect and listen */
437static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
438 u8 *ibport)
439{
440 int reason_code = 0;
441
442 /* PNET table look up: search active ib_device and port
443 * within same PNETID that also contains the ethernet device
444 * used for the internal TCP socket
445 */
446 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
447 if (!(*ibdev))
448 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
449
450 return reason_code;
451}
452
453/* CLC handshake during connect */
454static int smc_connect_clc(struct smc_sock *smc,
455 struct smc_clc_msg_accept_confirm *aclc,
456 struct smc_ib_device *ibdev, u8 ibport)
457{
458 int rc = 0;
459
460 /* do inband token exchange */
461 rc = smc_clc_send_proposal(smc, ibdev, ibport);
462 if (rc)
463 return rc;
464 /* receive SMC Accept CLC message */
465 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
466}
467
Ursula Brauna046d572017-01-09 16:55:16 +0100468/* setup for RDMA connection of client */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200469static int smc_connect_rdma(struct smc_sock *smc,
470 struct smc_clc_msg_accept_confirm *aclc,
471 struct smc_ib_device *ibdev, u8 ibport)
472{
473 int local_contact = SMC_FIRST_CONTACT;
474 struct smc_link *link;
475 int reason_code = 0;
476
477 mutex_lock(&smc_create_lgr_pending);
478 local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
479 aclc->hdr.flag);
480 if (local_contact < 0) {
481 if (local_contact == -ENOMEM)
482 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
483 else if (local_contact == -ENOLINK)
484 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
485 else
486 reason_code = SMC_CLC_DECL_INTERR; /* other error */
487 return smc_connect_abort(smc, reason_code, 0);
488 }
489 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
490
491 smc_conn_save_peer_info(smc, aclc);
492
493 /* create send buffer and rmb */
494 if (smc_buf_create(smc))
495 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
496
497 if (local_contact == SMC_FIRST_CONTACT)
498 smc_link_save_peer_info(link, aclc);
499
500 if (smc_rmb_rtoken_handling(&smc->conn, aclc))
501 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
502 local_contact);
503
504 smc_close_init(smc);
505 smc_rx_init(smc);
506
507 if (local_contact == SMC_FIRST_CONTACT) {
508 if (smc_ib_ready_link(link))
509 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
510 local_contact);
511 } else {
512 if (!smc->conn.rmb_desc->reused &&
513 smc_reg_rmb(link, smc->conn.rmb_desc, true))
514 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
515 local_contact);
516 }
517 smc_rmb_sync_sg_for_device(&smc->conn);
518
519 reason_code = smc_clc_send_confirm(smc);
520 if (reason_code)
521 return smc_connect_abort(smc, reason_code, local_contact);
522
523 smc_tx_init(smc);
524
525 if (local_contact == SMC_FIRST_CONTACT) {
526 /* QP confirmation over RoCE fabric */
527 reason_code = smc_clnt_conf_first_link(smc);
528 if (reason_code)
529 return smc_connect_abort(smc, reason_code,
530 local_contact);
531 }
532 mutex_unlock(&smc_create_lgr_pending);
533
534 smc_copy_sock_settings_to_clc(smc);
535 if (smc->sk.sk_state == SMC_INIT)
536 smc->sk.sk_state = SMC_ACTIVE;
537
538 return 0;
539}
540
541/* perform steps before actually connecting */
542static int __smc_connect(struct smc_sock *smc)
Ursula Brauna046d572017-01-09 16:55:16 +0100543{
544 struct smc_clc_msg_accept_confirm aclc;
Hans Wippel3b2dec22018-05-18 09:34:18 +0200545 struct smc_ib_device *ibdev;
Ursula Brauna046d572017-01-09 16:55:16 +0100546 int rc = 0;
547 u8 ibport;
548
Ursula Braun51f1de72018-01-26 09:28:48 +0100549 sock_hold(&smc->sk); /* sock put in passive closing */
550
Ursula Braunee9dfbe2018-04-26 17:18:21 +0200551 if (smc->use_fallback)
Hans Wippel3b2dec22018-05-18 09:34:18 +0200552 return smc_connect_fallback(smc);
Ursula Braunee9dfbe2018-04-26 17:18:21 +0200553
Hans Wippel3b2dec22018-05-18 09:34:18 +0200554 /* if peer has not signalled SMC-capability, fall back */
555 if (!tcp_sk(smc->clcsock->sk)->syn_smc)
556 return smc_connect_fallback(smc);
Ursula Braunc5c1cc92017-10-25 11:01:46 +0200557
Ursula Brauna046d572017-01-09 16:55:16 +0100558 /* IPSec connections opt out of SMC-R optimizations */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200559 if (using_ipsec(smc))
560 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
Ursula Brauna046d572017-01-09 16:55:16 +0100561
Hans Wippel3b2dec22018-05-18 09:34:18 +0200562 /* check if a RDMA device is available; if not, fall back */
563 if (smc_check_rdma(smc, &ibdev, &ibport))
564 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
Ursula Brauna046d572017-01-09 16:55:16 +0100565
Hans Wippel3b2dec22018-05-18 09:34:18 +0200566 /* perform CLC handshake */
567 rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
Ursula Brauna046d572017-01-09 16:55:16 +0100568 if (rc)
Hans Wippel3b2dec22018-05-18 09:34:18 +0200569 return smc_connect_decline_fallback(smc, rc);
Ursula Brauna046d572017-01-09 16:55:16 +0100570
Hans Wippel3b2dec22018-05-18 09:34:18 +0200571 /* connect using rdma */
572 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
573 if (rc)
574 return smc_connect_decline_fallback(smc, rc);
Ursula Brauna046d572017-01-09 16:55:16 +0100575
Hans Wippel3b2dec22018-05-18 09:34:18 +0200576 return 0;
Ursula Brauna046d572017-01-09 16:55:16 +0100577}
578
Ursula Braunac713872017-01-09 16:55:13 +0100579static int smc_connect(struct socket *sock, struct sockaddr *addr,
580 int alen, int flags)
581{
582 struct sock *sk = sock->sk;
583 struct smc_sock *smc;
584 int rc = -EINVAL;
585
586 smc = smc_sk(sk);
587
588 /* separate smc parameter checking to be safe */
589 if (alen < sizeof(addr->sa_family))
590 goto out_err;
Karsten Graulaaa4d332018-03-16 15:06:41 +0100591 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
Ursula Braunac713872017-01-09 16:55:13 +0100592 goto out_err;
593
594 lock_sock(sk);
595 switch (sk->sk_state) {
596 default:
597 goto out;
598 case SMC_ACTIVE:
599 rc = -EISCONN;
600 goto out;
601 case SMC_INIT:
602 rc = 0;
603 break;
604 }
605
606 smc_copy_sock_settings_to_clc(smc);
Ursula Braunc5c1cc92017-10-25 11:01:46 +0200607 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
Ursula Braunac713872017-01-09 16:55:13 +0100608 rc = kernel_connect(smc->clcsock, addr, alen, flags);
609 if (rc)
610 goto out;
611
Hans Wippel3b2dec22018-05-18 09:34:18 +0200612 rc = __smc_connect(smc);
Ursula Brauna046d572017-01-09 16:55:16 +0100613 if (rc < 0)
614 goto out;
615 else
616 rc = 0; /* success cases including fallback */
Ursula Braunac713872017-01-09 16:55:13 +0100617
618out:
619 release_sock(sk);
620out_err:
621 return rc;
622}
623
624static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
625{
Ursula Braun3163c502018-01-24 10:28:12 +0100626 struct socket *new_clcsock = NULL;
627 struct sock *lsk = &lsmc->sk;
Ursula Braunac713872017-01-09 16:55:13 +0100628 struct sock *new_sk;
629 int rc;
630
Ursula Braun3163c502018-01-24 10:28:12 +0100631 release_sock(lsk);
Karsten Graulaaa4d332018-03-16 15:06:41 +0100632 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
Ursula Braunac713872017-01-09 16:55:13 +0100633 if (!new_sk) {
634 rc = -ENOMEM;
Ursula Braun3163c502018-01-24 10:28:12 +0100635 lsk->sk_err = ENOMEM;
Ursula Braunac713872017-01-09 16:55:13 +0100636 *new_smc = NULL;
Ursula Braun3163c502018-01-24 10:28:12 +0100637 lock_sock(lsk);
Ursula Braunac713872017-01-09 16:55:13 +0100638 goto out;
639 }
640 *new_smc = smc_sk(new_sk);
641
642 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
Ursula Braun3163c502018-01-24 10:28:12 +0100643 lock_sock(lsk);
Ursula Braun35a6b172018-01-24 10:28:13 +0100644 if (rc < 0)
Ursula Braun3163c502018-01-24 10:28:12 +0100645 lsk->sk_err = -rc;
Ursula Braun35a6b172018-01-24 10:28:13 +0100646 if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
Ursula Brauna046d572017-01-09 16:55:16 +0100647 if (new_clcsock)
648 sock_release(new_clcsock);
649 new_sk->sk_state = SMC_CLOSED;
650 sock_set_flag(new_sk, SOCK_DEAD);
Ursula Braun3163c502018-01-24 10:28:12 +0100651 new_sk->sk_prot->unhash(new_sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100652 sock_put(new_sk); /* final */
Ursula Braunac713872017-01-09 16:55:13 +0100653 *new_smc = NULL;
654 goto out;
655 }
656
657 (*new_smc)->clcsock = new_clcsock;
658out:
659 return rc;
660}
661
Ursula Brauna046d572017-01-09 16:55:16 +0100662/* add a just created sock to the accept queue of the listen sock as
663 * candidate for a following socket accept call from user space
664 */
665static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
666{
667 struct smc_sock *par = smc_sk(parent);
668
Ursula Braun51f1de72018-01-26 09:28:48 +0100669 sock_hold(sk); /* sock_put in smc_accept_unlink () */
Ursula Brauna046d572017-01-09 16:55:16 +0100670 spin_lock(&par->accept_q_lock);
671 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
672 spin_unlock(&par->accept_q_lock);
673 sk_acceptq_added(parent);
674}
675
676/* remove a socket from the accept queue of its parental listening socket */
677static void smc_accept_unlink(struct sock *sk)
678{
679 struct smc_sock *par = smc_sk(sk)->listen_smc;
680
681 spin_lock(&par->accept_q_lock);
682 list_del_init(&smc_sk(sk)->accept_q);
683 spin_unlock(&par->accept_q_lock);
684 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100685 sock_put(sk); /* sock_hold in smc_accept_enqueue */
Ursula Brauna046d572017-01-09 16:55:16 +0100686}
687
688/* remove a sock from the accept queue to bind it to a new socket created
689 * for a socket accept call from user space
690 */
Ursula Braunb38d7322017-01-09 16:55:25 +0100691struct sock *smc_accept_dequeue(struct sock *parent,
692 struct socket *new_sock)
Ursula Brauna046d572017-01-09 16:55:16 +0100693{
694 struct smc_sock *isk, *n;
695 struct sock *new_sk;
696
697 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
698 new_sk = (struct sock *)isk;
699
700 smc_accept_unlink(new_sk);
701 if (new_sk->sk_state == SMC_CLOSED) {
Ursula Braun127f4972018-01-26 09:28:49 +0100702 if (isk->clcsock) {
703 sock_release(isk->clcsock);
704 isk->clcsock = NULL;
705 }
Ursula Braun288c83902017-04-10 14:58:04 +0200706 new_sk->sk_prot->unhash(new_sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100707 sock_put(new_sk); /* final */
Ursula Brauna046d572017-01-09 16:55:16 +0100708 continue;
709 }
710 if (new_sock)
711 sock_graft(new_sk, new_sock);
712 return new_sk;
713 }
714 return NULL;
715}
716
717/* clean up for a created but never accepted sock */
Ursula Braunb38d7322017-01-09 16:55:25 +0100718void smc_close_non_accepted(struct sock *sk)
Ursula Brauna046d572017-01-09 16:55:16 +0100719{
720 struct smc_sock *smc = smc_sk(sk);
721
Ursula Braunb38d7322017-01-09 16:55:25 +0100722 lock_sock(sk);
723 if (!sk->sk_lingertime)
724 /* wait for peer closing */
725 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
Ursula Braun51f1de72018-01-26 09:28:48 +0100726 if (!smc->use_fallback) {
Ursula Braunb38d7322017-01-09 16:55:25 +0100727 smc_close_active(smc);
Ursula Braun288c83902017-04-10 14:58:04 +0200728 sock_set_flag(sk, SOCK_DEAD);
729 sk->sk_shutdown |= SHUTDOWN_MASK;
730 }
Ursula Brauna046d572017-01-09 16:55:16 +0100731 if (smc->clcsock) {
732 struct socket *tcp;
733
734 tcp = smc->clcsock;
735 smc->clcsock = NULL;
736 sock_release(tcp);
737 }
Ursula Braunb38d7322017-01-09 16:55:25 +0100738 if (smc->use_fallback) {
Ursula Braun51f1de72018-01-26 09:28:48 +0100739 sock_put(sk); /* passive closing */
740 sk->sk_state = SMC_CLOSED;
741 } else {
742 if (sk->sk_state == SMC_CLOSED)
743 smc_conn_free(&smc->conn);
Ursula Braunb38d7322017-01-09 16:55:25 +0100744 }
745 release_sock(sk);
Ursula Braun51f1de72018-01-26 09:28:48 +0100746 sk->sk_prot->unhash(sk);
747 sock_put(sk); /* final sock_put */
Ursula Brauna046d572017-01-09 16:55:16 +0100748}
749
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100750static int smc_serv_conf_first_link(struct smc_sock *smc)
751{
Karsten Graul877ae5b2018-05-02 16:56:44 +0200752 struct net *net = sock_net(smc->clcsock->sk);
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100753 struct smc_link_group *lgr = smc->conn.lgr;
754 struct smc_link *link;
755 int rest;
756 int rc;
757
758 link = &lgr->lnk[SMC_SINGLE_LINK];
Ursula Braun652a1e42017-07-28 13:56:17 +0200759
Karsten Graul44aa81c2018-05-15 17:04:55 +0200760 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
Ursula Braun652a1e42017-07-28 13:56:17 +0200761 return SMC_CLC_DECL_INTERR;
762
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100763 /* send CONFIRM LINK request to client over the RoCE fabric */
764 rc = smc_llc_send_confirm_link(link,
765 link->smcibdev->mac[link->ibport - 1],
766 &link->smcibdev->gid[link->ibport - 1],
767 SMC_LLC_REQ);
768 if (rc < 0)
769 return SMC_CLC_DECL_TCL;
770
771 /* receive CONFIRM LINK response from client over the RoCE fabric */
772 rest = wait_for_completion_interruptible_timeout(
773 &link->llc_confirm_resp,
774 SMC_LLC_WAIT_FIRST_TIME);
775 if (rest <= 0) {
776 struct smc_clc_msg_decline dclc;
777
778 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
779 SMC_CLC_DECLINE);
Karsten Graul75d320d2018-03-01 13:51:31 +0100780 return rc;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100781 }
782
Karsten Graul75d320d2018-03-01 13:51:31 +0100783 if (link->llc_confirm_resp_rc)
784 return SMC_CLC_DECL_RMBE_EC;
785
Karsten Graul52bedf32018-03-01 13:51:32 +0100786 /* send ADD LINK request to client over the RoCE fabric */
787 rc = smc_llc_send_add_link(link,
788 link->smcibdev->mac[link->ibport - 1],
789 &link->smcibdev->gid[link->ibport - 1],
790 SMC_LLC_REQ);
791 if (rc < 0)
792 return SMC_CLC_DECL_TCL;
793
794 /* receive ADD LINK response from client over the RoCE fabric */
795 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
796 SMC_LLC_WAIT_TIME);
797 if (rest <= 0) {
798 struct smc_clc_msg_decline dclc;
799
800 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
801 SMC_CLC_DECLINE);
802 return rc;
803 }
804
Karsten Graul877ae5b2018-05-02 16:56:44 +0200805 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
Karsten Graul52bedf32018-03-01 13:51:32 +0100806
Karsten Graul75d320d2018-03-01 13:51:31 +0100807 return 0;
Ursula Braun9bf9abe2017-01-09 16:55:21 +0100808}
809
Hans Wippel3b2dec22018-05-18 09:34:18 +0200810/* listen worker: finish */
811static void smc_listen_out(struct smc_sock *new_smc)
Ursula Brauna046d572017-01-09 16:55:16 +0100812{
Ursula Brauna046d572017-01-09 16:55:16 +0100813 struct smc_sock *lsmc = new_smc->listen_smc;
Ursula Brauna046d572017-01-09 16:55:16 +0100814 struct sock *newsmcsk = &new_smc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +0100815
Ursula Braunb38d7322017-01-09 16:55:25 +0100816 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
Ursula Brauna046d572017-01-09 16:55:16 +0100817 if (lsmc->sk.sk_state == SMC_LISTEN) {
818 smc_accept_enqueue(&lsmc->sk, newsmcsk);
819 } else { /* no longer listening */
820 smc_close_non_accepted(newsmcsk);
821 }
822 release_sock(&lsmc->sk);
823
824 /* Wake up accept */
825 lsmc->sk.sk_data_ready(&lsmc->sk);
826 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
Hans Wippel3b2dec22018-05-18 09:34:18 +0200827}
Ursula Brauna046d572017-01-09 16:55:16 +0100828
Hans Wippel3b2dec22018-05-18 09:34:18 +0200829/* listen worker: finish in state connected */
830static void smc_listen_out_connected(struct smc_sock *new_smc)
831{
832 struct sock *newsmcsk = &new_smc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +0100833
Hans Wippel3b2dec22018-05-18 09:34:18 +0200834 sk_refcnt_debug_inc(newsmcsk);
835 if (newsmcsk->sk_state == SMC_INIT)
836 newsmcsk->sk_state = SMC_ACTIVE;
837
838 smc_listen_out(new_smc);
839}
840
841/* listen worker: finish in error state */
842static void smc_listen_out_err(struct smc_sock *new_smc)
843{
844 struct sock *newsmcsk = &new_smc->sk;
845
Ursula Braun51f1de72018-01-26 09:28:48 +0100846 if (newsmcsk->sk_state == SMC_INIT)
847 sock_put(&new_smc->sk); /* passive closing */
Ursula Brauna046d572017-01-09 16:55:16 +0100848 newsmcsk->sk_state = SMC_CLOSED;
Ursula Braunb38d7322017-01-09 16:55:25 +0100849 smc_conn_free(&new_smc->conn);
Hans Wippel3b2dec22018-05-18 09:34:18 +0200850
851 smc_listen_out(new_smc);
852}
853
854/* listen worker: decline and fall back if possible */
855static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
856 int local_contact)
857{
858 /* RDMA setup failed, switch back to TCP */
859 if (local_contact == SMC_FIRST_CONTACT)
860 smc_lgr_forget(new_smc->conn.lgr);
861 if (reason_code < 0) { /* error, no fallback possible */
862 smc_listen_out_err(new_smc);
863 return;
864 }
865 smc_conn_free(&new_smc->conn);
866 new_smc->use_fallback = true;
867 if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
868 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
869 smc_listen_out_err(new_smc);
870 return;
871 }
872 }
873 smc_listen_out_connected(new_smc);
874}
875
876/* listen worker: check prefixes */
877static int smc_listen_rdma_check(struct smc_sock *new_smc,
878 struct smc_clc_msg_proposal *pclc)
879{
880 struct smc_clc_msg_proposal_prefix *pclc_prfx;
881 struct socket *newclcsock = new_smc->clcsock;
882
883 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
884 if (smc_clc_prfx_match(newclcsock, pclc_prfx))
885 return SMC_CLC_DECL_CNFERR;
886
887 return 0;
888}
889
890/* listen worker: initialize connection and buffers */
891static int smc_listen_rdma_init(struct smc_sock *new_smc,
892 struct smc_clc_msg_proposal *pclc,
893 struct smc_ib_device *ibdev, u8 ibport,
894 int *local_contact)
895{
896 /* allocate connection / link group */
897 *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
898 if (*local_contact < 0) {
899 if (*local_contact == -ENOMEM)
900 return SMC_CLC_DECL_MEM;/* insufficient memory*/
901 return SMC_CLC_DECL_INTERR; /* other error */
902 }
903
904 /* create send buffer and rmb */
905 if (smc_buf_create(new_smc))
906 return SMC_CLC_DECL_MEM;
907
908 return 0;
909}
910
911/* listen worker: register buffers */
912static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
913{
914 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
915
916 if (local_contact != SMC_FIRST_CONTACT) {
917 if (!new_smc->conn.rmb_desc->reused) {
918 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
919 return SMC_CLC_DECL_INTERR;
920 }
921 }
922 smc_rmb_sync_sg_for_device(&new_smc->conn);
923
924 return 0;
925}
926
927/* listen worker: finish RDMA setup */
928static void smc_listen_rdma_finish(struct smc_sock *new_smc,
929 struct smc_clc_msg_accept_confirm *cclc,
930 int local_contact)
931{
932 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
933 int reason_code = 0;
934
935 if (local_contact == SMC_FIRST_CONTACT)
936 smc_link_save_peer_info(link, cclc);
937
938 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
939 reason_code = SMC_CLC_DECL_INTERR;
940 goto decline;
941 }
942
943 if (local_contact == SMC_FIRST_CONTACT) {
944 if (smc_ib_ready_link(link)) {
945 reason_code = SMC_CLC_DECL_INTERR;
946 goto decline;
947 }
948 /* QP confirmation over RoCE fabric */
949 reason_code = smc_serv_conf_first_link(new_smc);
950 if (reason_code)
951 goto decline;
952 }
953 return;
954
955decline:
956 mutex_unlock(&smc_create_lgr_pending);
957 smc_listen_decline(new_smc, reason_code, local_contact);
958}
959
960/* setup for RDMA connection of server */
961static void smc_listen_work(struct work_struct *work)
962{
963 struct smc_sock *new_smc = container_of(work, struct smc_sock,
964 smc_listen_work);
965 struct socket *newclcsock = new_smc->clcsock;
966 struct smc_clc_msg_accept_confirm cclc;
967 struct smc_clc_msg_proposal *pclc;
968 struct smc_ib_device *ibdev;
969 u8 buf[SMC_CLC_MAX_LEN];
970 int local_contact = 0;
971 int reason_code = 0;
972 int rc = 0;
973 u8 ibport;
974
975 if (new_smc->use_fallback) {
976 smc_listen_out_connected(new_smc);
977 return;
978 }
979
980 /* check if peer is smc capable */
981 if (!tcp_sk(newclcsock->sk)->syn_smc) {
982 new_smc->use_fallback = true;
983 smc_listen_out_connected(new_smc);
984 return;
985 }
986
987 /* do inband token exchange -
988 * wait for and receive SMC Proposal CLC message
989 */
990 pclc = (struct smc_clc_msg_proposal *)&buf;
991 reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
992 SMC_CLC_PROPOSAL);
993 if (reason_code) {
994 smc_listen_decline(new_smc, reason_code, 0);
995 return;
996 }
997
998 /* IPSec connections opt out of SMC-R optimizations */
999 if (using_ipsec(new_smc)) {
1000 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1001 return;
1002 }
1003
1004 mutex_lock(&smc_create_lgr_pending);
1005 smc_close_init(new_smc);
1006 smc_rx_init(new_smc);
1007 smc_tx_init(new_smc);
1008
1009 /* check if RDMA is available */
1010 if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
1011 smc_listen_rdma_check(new_smc, pclc) ||
1012 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1013 &local_contact) ||
1014 smc_listen_rdma_reg(new_smc, local_contact)) {
1015 /* SMC not supported, decline */
1016 mutex_unlock(&smc_create_lgr_pending);
1017 smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
1018 return;
1019 }
1020
1021 /* send SMC Accept CLC message */
1022 rc = smc_clc_send_accept(new_smc, local_contact);
1023 if (rc) {
1024 mutex_unlock(&smc_create_lgr_pending);
1025 smc_listen_decline(new_smc, rc, local_contact);
1026 return;
1027 }
1028
1029 /* receive SMC Confirm CLC message */
1030 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1031 SMC_CLC_CONFIRM);
1032 if (reason_code) {
1033 mutex_unlock(&smc_create_lgr_pending);
1034 smc_listen_decline(new_smc, reason_code, local_contact);
1035 return;
1036 }
1037
1038 /* finish worker */
1039 smc_listen_rdma_finish(new_smc, &cclc, local_contact);
1040 smc_conn_save_peer_info(new_smc, &cclc);
1041 mutex_unlock(&smc_create_lgr_pending);
1042 smc_listen_out_connected(new_smc);
Ursula Brauna046d572017-01-09 16:55:16 +01001043}
1044
1045static void smc_tcp_listen_work(struct work_struct *work)
1046{
1047 struct smc_sock *lsmc = container_of(work, struct smc_sock,
1048 tcp_listen_work);
Ursula Braun3163c502018-01-24 10:28:12 +01001049 struct sock *lsk = &lsmc->sk;
Ursula Brauna046d572017-01-09 16:55:16 +01001050 struct smc_sock *new_smc;
1051 int rc = 0;
1052
Ursula Braun3163c502018-01-24 10:28:12 +01001053 lock_sock(lsk);
1054 while (lsk->sk_state == SMC_LISTEN) {
Ursula Brauna046d572017-01-09 16:55:16 +01001055 rc = smc_clcsock_accept(lsmc, &new_smc);
1056 if (rc)
1057 goto out;
1058 if (!new_smc)
1059 continue;
1060
1061 new_smc->listen_smc = lsmc;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001062 new_smc->use_fallback = lsmc->use_fallback;
Ursula Braun3163c502018-01-24 10:28:12 +01001063 sock_hold(lsk); /* sock_put in smc_listen_work */
Ursula Brauna046d572017-01-09 16:55:16 +01001064 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1065 smc_copy_sock_settings_to_smc(new_smc);
Ursula Braun51f1de72018-01-26 09:28:48 +01001066 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1067 if (!schedule_work(&new_smc->smc_listen_work))
1068 sock_put(&new_smc->sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001069 }
1070
1071out:
Ursula Braun3163c502018-01-24 10:28:12 +01001072 release_sock(lsk);
Ursula Braun51f1de72018-01-26 09:28:48 +01001073 sock_put(&lsmc->sk); /* sock_hold in smc_listen */
Ursula Brauna046d572017-01-09 16:55:16 +01001074}
1075
Ursula Braunac713872017-01-09 16:55:13 +01001076static int smc_listen(struct socket *sock, int backlog)
1077{
1078 struct sock *sk = sock->sk;
1079 struct smc_sock *smc;
1080 int rc;
1081
1082 smc = smc_sk(sk);
1083 lock_sock(sk);
1084
1085 rc = -EINVAL;
1086 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1087 goto out;
1088
1089 rc = 0;
1090 if (sk->sk_state == SMC_LISTEN) {
1091 sk->sk_max_ack_backlog = backlog;
1092 goto out;
1093 }
1094 /* some socket options are handled in core, so we could not apply
1095 * them to the clc socket -- copy smc socket options to clc socket
1096 */
1097 smc_copy_sock_settings_to_clc(smc);
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001098 if (!smc->use_fallback)
1099 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
Ursula Braunac713872017-01-09 16:55:13 +01001100
1101 rc = kernel_listen(smc->clcsock, backlog);
1102 if (rc)
1103 goto out;
1104 sk->sk_max_ack_backlog = backlog;
1105 sk->sk_ack_backlog = 0;
1106 sk->sk_state = SMC_LISTEN;
Ursula Brauna046d572017-01-09 16:55:16 +01001107 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
Ursula Braun51f1de72018-01-26 09:28:48 +01001108 sock_hold(sk); /* sock_hold in tcp_listen_worker */
1109 if (!schedule_work(&smc->tcp_listen_work))
1110 sock_put(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001111
1112out:
1113 release_sock(sk);
1114 return rc;
1115}
1116
1117static int smc_accept(struct socket *sock, struct socket *new_sock,
David Howellscdfbabf2017-03-09 08:09:05 +00001118 int flags, bool kern)
Ursula Braunac713872017-01-09 16:55:13 +01001119{
Ursula Brauna046d572017-01-09 16:55:16 +01001120 struct sock *sk = sock->sk, *nsk;
1121 DECLARE_WAITQUEUE(wait, current);
Ursula Braunac713872017-01-09 16:55:13 +01001122 struct smc_sock *lsmc;
Ursula Brauna046d572017-01-09 16:55:16 +01001123 long timeo;
1124 int rc = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001125
1126 lsmc = smc_sk(sk);
Ursula Braun51f1de72018-01-26 09:28:48 +01001127 sock_hold(sk); /* sock_put below */
Ursula Braunac713872017-01-09 16:55:13 +01001128 lock_sock(sk);
1129
1130 if (lsmc->sk.sk_state != SMC_LISTEN) {
1131 rc = -EINVAL;
Ursula Braunabb190f2018-04-26 17:18:23 +02001132 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001133 goto out;
1134 }
1135
Ursula Brauna046d572017-01-09 16:55:16 +01001136 /* Wait for an incoming connection */
1137 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1138 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1139 while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1140 set_current_state(TASK_INTERRUPTIBLE);
1141 if (!timeo) {
1142 rc = -EAGAIN;
1143 break;
1144 }
1145 release_sock(sk);
1146 timeo = schedule_timeout(timeo);
1147 /* wakeup by sk_data_ready in smc_listen_work() */
1148 sched_annotate_sleep();
1149 lock_sock(sk);
1150 if (signal_pending(current)) {
1151 rc = sock_intr_errno(timeo);
1152 break;
1153 }
1154 }
1155 set_current_state(TASK_RUNNING);
1156 remove_wait_queue(sk_sleep(sk), &wait);
Ursula Braunac713872017-01-09 16:55:13 +01001157
Ursula Brauna046d572017-01-09 16:55:16 +01001158 if (!rc)
1159 rc = sock_error(nsk);
Ursula Braunabb190f2018-04-26 17:18:23 +02001160 release_sock(sk);
1161 if (rc)
1162 goto out;
1163
1164 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1165 /* wait till data arrives on the socket */
1166 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1167 MSEC_PER_SEC);
1168 if (smc_sk(nsk)->use_fallback) {
1169 struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1170
1171 lock_sock(clcsk);
1172 if (skb_queue_empty(&clcsk->sk_receive_queue))
1173 sk_wait_data(clcsk, &timeo, NULL);
1174 release_sock(clcsk);
1175 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1176 lock_sock(nsk);
Stefan Rasplb51fa1b2018-05-03 18:12:37 +02001177 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
Ursula Braunabb190f2018-04-26 17:18:23 +02001178 release_sock(nsk);
1179 }
1180 }
Ursula Braunac713872017-01-09 16:55:13 +01001181
1182out:
Ursula Braun51f1de72018-01-26 09:28:48 +01001183 sock_put(sk); /* sock_hold above */
Ursula Braunac713872017-01-09 16:55:13 +01001184 return rc;
1185}
1186
1187static int smc_getname(struct socket *sock, struct sockaddr *addr,
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001188 int peer)
Ursula Braunac713872017-01-09 16:55:13 +01001189{
1190 struct smc_sock *smc;
1191
Ursula Braunb38d7322017-01-09 16:55:25 +01001192 if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1193 (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
Ursula Braunac713872017-01-09 16:55:13 +01001194 return -ENOTCONN;
1195
1196 smc = smc_sk(sock->sk);
1197
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001198 return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
Ursula Braunac713872017-01-09 16:55:13 +01001199}
1200
1201static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1202{
1203 struct sock *sk = sock->sk;
1204 struct smc_sock *smc;
1205 int rc = -EPIPE;
1206
1207 smc = smc_sk(sk);
1208 lock_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001209 if ((sk->sk_state != SMC_ACTIVE) &&
1210 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1211 (sk->sk_state != SMC_INIT))
Ursula Braunac713872017-01-09 16:55:13 +01001212 goto out;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001213
1214 if (msg->msg_flags & MSG_FASTOPEN) {
1215 if (sk->sk_state == SMC_INIT) {
1216 smc->use_fallback = true;
1217 } else {
1218 rc = -EINVAL;
1219 goto out;
1220 }
1221 }
1222
Ursula Braunac713872017-01-09 16:55:13 +01001223 if (smc->use_fallback)
1224 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1225 else
Ursula Braune6727f32017-01-09 16:55:23 +01001226 rc = smc_tx_sendmsg(smc, msg, len);
Ursula Braunac713872017-01-09 16:55:13 +01001227out:
1228 release_sock(sk);
1229 return rc;
1230}
1231
1232static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1233 int flags)
1234{
1235 struct sock *sk = sock->sk;
1236 struct smc_sock *smc;
1237 int rc = -ENOTCONN;
1238
1239 smc = smc_sk(sk);
1240 lock_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001241 if ((sk->sk_state == SMC_INIT) ||
1242 (sk->sk_state == SMC_LISTEN) ||
1243 (sk->sk_state == SMC_CLOSED))
Ursula Braunac713872017-01-09 16:55:13 +01001244 goto out;
1245
Ursula Braunb38d7322017-01-09 16:55:25 +01001246 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1247 rc = 0;
1248 goto out;
1249 }
1250
Stefan Raspl9014db22018-05-03 18:12:39 +02001251 if (smc->use_fallback) {
Ursula Braunac713872017-01-09 16:55:13 +01001252 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
Stefan Raspl9014db22018-05-03 18:12:39 +02001253 } else {
1254 msg->msg_namelen = 0;
1255 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1256 }
Ursula Braunb38d7322017-01-09 16:55:25 +01001257
Ursula Braunac713872017-01-09 16:55:13 +01001258out:
1259 release_sock(sk);
1260 return rc;
1261}
1262
Al Viroade994f2017-07-03 00:01:49 -04001263static __poll_t smc_accept_poll(struct sock *parent)
Ursula Brauna046d572017-01-09 16:55:16 +01001264{
Ursula Braun8dce2782018-01-26 09:28:47 +01001265 struct smc_sock *isk = smc_sk(parent);
Al Viro63e24802018-02-01 10:02:53 -05001266 __poll_t mask = 0;
Ursula Brauna046d572017-01-09 16:55:16 +01001267
Ursula Braun8dce2782018-01-26 09:28:47 +01001268 spin_lock(&isk->accept_q_lock);
1269 if (!list_empty(&isk->accept_q))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001270 mask = EPOLLIN | EPOLLRDNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001271 spin_unlock(&isk->accept_q_lock);
Ursula Brauna046d572017-01-09 16:55:16 +01001272
Ursula Braun8dce2782018-01-26 09:28:47 +01001273 return mask;
Ursula Brauna046d572017-01-09 16:55:16 +01001274}
1275
Cong Wangc0129a02018-06-11 14:07:14 -07001276static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
Ursula Braunac713872017-01-09 16:55:13 +01001277{
1278 struct sock *sk = sock->sk;
Al Viroe6c8adc2017-07-03 22:25:56 -04001279 __poll_t mask = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001280 struct smc_sock *smc;
Ursula Brauna046d572017-01-09 16:55:16 +01001281 int rc;
Ursula Braunac713872017-01-09 16:55:13 +01001282
Ursula Braun8dce2782018-01-26 09:28:47 +01001283 if (!sk)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001284 return EPOLLNVAL;
Ursula Braun8dce2782018-01-26 09:28:47 +01001285
Ursula Braunac713872017-01-09 16:55:13 +01001286 smc = smc_sk(sock->sk);
Ursula Braun8dce2782018-01-26 09:28:47 +01001287 sock_hold(sk);
1288 lock_sock(sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001289 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
1290 /* delegate to CLC child sock */
Ursula Braun8dce2782018-01-26 09:28:47 +01001291 release_sock(sk);
Cong Wangc0129a02018-06-11 14:07:14 -07001292 mask = smc->clcsock->ops->poll_mask(smc->clcsock, events);
Ursula Braunac713872017-01-09 16:55:13 +01001293 lock_sock(sk);
Ursula Braun784813a2018-05-02 16:53:56 +02001294 sk->sk_err = smc->clcsock->sk->sk_err;
1295 if (sk->sk_err) {
1296 mask |= EPOLLERR;
1297 } else {
1298 /* if non-blocking connect finished ... */
1299 if (sk->sk_state == SMC_INIT &&
1300 mask & EPOLLOUT &&
1301 smc->clcsock->sk->sk_state != TCP_CLOSE) {
Hans Wippel3b2dec22018-05-18 09:34:18 +02001302 rc = __smc_connect(smc);
Ursula Brauna046d572017-01-09 16:55:16 +01001303 if (rc < 0)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001304 mask |= EPOLLERR;
Ursula Braun8dce2782018-01-26 09:28:47 +01001305 /* success cases including fallback */
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001306 mask |= EPOLLOUT | EPOLLWRNORM;
Ursula Brauna046d572017-01-09 16:55:16 +01001307 }
Ursula Braunac713872017-01-09 16:55:13 +01001308 }
Ursula Braunac713872017-01-09 16:55:13 +01001309 } else {
Ursula Brauna046d572017-01-09 16:55:16 +01001310 if (sk->sk_err)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001311 mask |= EPOLLERR;
Ursula Braunb38d7322017-01-09 16:55:25 +01001312 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1313 (sk->sk_state == SMC_CLOSED))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001314 mask |= EPOLLHUP;
Ursula Braun8dce2782018-01-26 09:28:47 +01001315 if (sk->sk_state == SMC_LISTEN) {
1316 /* woken up by sk_data_ready in smc_listen_work() */
1317 mask = smc_accept_poll(sk);
1318 } else {
1319 if (atomic_read(&smc->conn.sndbuf_space) ||
1320 sk->sk_shutdown & SEND_SHUTDOWN) {
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001321 mask |= EPOLLOUT | EPOLLWRNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001322 } else {
1323 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1324 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1325 }
1326 if (atomic_read(&smc->conn.bytes_to_rcv))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001327 mask |= EPOLLIN | EPOLLRDNORM;
Ursula Braun8dce2782018-01-26 09:28:47 +01001328 if (sk->sk_shutdown & RCV_SHUTDOWN)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001329 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
Ursula Braun8dce2782018-01-26 09:28:47 +01001330 if (sk->sk_state == SMC_APPCLOSEWAIT1)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08001331 mask |= EPOLLIN;
Ursula Braun8dce2782018-01-26 09:28:47 +01001332 }
Stefan Rasplde8474e2018-05-23 16:38:11 +02001333 if (smc->conn.urg_state == SMC_URG_VALID)
1334 mask |= EPOLLPRI;
Ursula Braunb38d7322017-01-09 16:55:25 +01001335
Ursula Braunac713872017-01-09 16:55:13 +01001336 }
Ursula Braun8dce2782018-01-26 09:28:47 +01001337 release_sock(sk);
1338 sock_put(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001339
1340 return mask;
1341}
1342
1343static int smc_shutdown(struct socket *sock, int how)
1344{
1345 struct sock *sk = sock->sk;
1346 struct smc_sock *smc;
1347 int rc = -EINVAL;
Ursula Braunb38d7322017-01-09 16:55:25 +01001348 int rc1 = 0;
Ursula Braunac713872017-01-09 16:55:13 +01001349
1350 smc = smc_sk(sk);
1351
1352 if ((how < SHUT_RD) || (how > SHUT_RDWR))
Ursula Braunb38d7322017-01-09 16:55:25 +01001353 return rc;
Ursula Braunac713872017-01-09 16:55:13 +01001354
1355 lock_sock(sk);
1356
1357 rc = -ENOTCONN;
Ursula Braunb38d7322017-01-09 16:55:25 +01001358 if ((sk->sk_state != SMC_LISTEN) &&
1359 (sk->sk_state != SMC_ACTIVE) &&
1360 (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1361 (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1362 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1363 (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1364 (sk->sk_state != SMC_APPFINCLOSEWAIT))
Ursula Braunac713872017-01-09 16:55:13 +01001365 goto out;
1366 if (smc->use_fallback) {
1367 rc = kernel_sock_shutdown(smc->clcsock, how);
1368 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1369 if (sk->sk_shutdown == SHUTDOWN_MASK)
1370 sk->sk_state = SMC_CLOSED;
Ursula Braunb38d7322017-01-09 16:55:25 +01001371 goto out;
Ursula Braunac713872017-01-09 16:55:13 +01001372 }
Ursula Braunb38d7322017-01-09 16:55:25 +01001373 switch (how) {
1374 case SHUT_RDWR: /* shutdown in both directions */
1375 rc = smc_close_active(smc);
1376 break;
1377 case SHUT_WR:
1378 rc = smc_close_shutdown_write(smc);
1379 break;
1380 case SHUT_RD:
Ursula Braun1255fcb2018-04-19 15:56:40 +02001381 rc = 0;
1382 /* nothing more to do because peer is not involved */
Ursula Braunb38d7322017-01-09 16:55:25 +01001383 break;
1384 }
Ursula Braun1255fcb2018-04-19 15:56:40 +02001385 if (smc->clcsock)
1386 rc1 = kernel_sock_shutdown(smc->clcsock, how);
Ursula Braunb38d7322017-01-09 16:55:25 +01001387 /* map sock_shutdown_cmd constants to sk_shutdown value range */
1388 sk->sk_shutdown |= how + 1;
Ursula Braunac713872017-01-09 16:55:13 +01001389
1390out:
1391 release_sock(sk);
Ursula Braunb38d7322017-01-09 16:55:25 +01001392 return rc ? rc : rc1;
Ursula Braunac713872017-01-09 16:55:13 +01001393}
1394
1395static int smc_setsockopt(struct socket *sock, int level, int optname,
1396 char __user *optval, unsigned int optlen)
1397{
1398 struct sock *sk = sock->sk;
1399 struct smc_sock *smc;
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001400 int val, rc;
Ursula Braunac713872017-01-09 16:55:13 +01001401
1402 smc = smc_sk(sk);
1403
1404 /* generic setsockopts reaching us here always apply to the
1405 * CLC socket
1406 */
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001407 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1408 optval, optlen);
1409 if (smc->clcsock->sk->sk_err) {
1410 sk->sk_err = smc->clcsock->sk->sk_err;
1411 sk->sk_error_report(sk);
1412 }
1413 if (rc)
1414 return rc;
1415
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001416 if (optlen < sizeof(int))
Wei Yongjun3dc9f552018-05-31 02:31:22 +00001417 return -EINVAL;
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001418 get_user(val, (int __user *)optval);
1419
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001420 lock_sock(sk);
1421 switch (optname) {
1422 case TCP_ULP:
1423 case TCP_FASTOPEN:
1424 case TCP_FASTOPEN_CONNECT:
1425 case TCP_FASTOPEN_KEY:
1426 case TCP_FASTOPEN_NO_COOKIE:
1427 /* option not supported by SMC */
1428 if (sk->sk_state == SMC_INIT) {
1429 smc->use_fallback = true;
1430 } else {
1431 if (!smc->use_fallback)
1432 rc = -EINVAL;
1433 }
1434 break;
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001435 case TCP_NODELAY:
1436 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
Ursula Braun569bc642018-05-15 17:04:54 +02001437 if (val && !smc->use_fallback)
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001438 mod_delayed_work(system_wq, &smc->conn.tx_work,
1439 0);
1440 }
1441 break;
1442 case TCP_CORK:
1443 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
Ursula Braun569bc642018-05-15 17:04:54 +02001444 if (!val && !smc->use_fallback)
Ursula Braun01d2f7e2018-04-26 17:18:22 +02001445 mod_delayed_work(system_wq, &smc->conn.tx_work,
1446 0);
1447 }
1448 break;
Ursula Braunabb190f2018-04-26 17:18:23 +02001449 case TCP_DEFER_ACCEPT:
1450 smc->sockopt_defer_accept = val;
1451 break;
Ursula Braunee9dfbe2018-04-26 17:18:21 +02001452 default:
1453 break;
1454 }
1455 release_sock(sk);
1456
1457 return rc;
Ursula Braunac713872017-01-09 16:55:13 +01001458}
1459
1460static int smc_getsockopt(struct socket *sock, int level, int optname,
1461 char __user *optval, int __user *optlen)
1462{
1463 struct smc_sock *smc;
1464
1465 smc = smc_sk(sock->sk);
1466 /* socket options apply to the CLC socket */
1467 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1468 optval, optlen);
1469}
1470
1471static int smc_ioctl(struct socket *sock, unsigned int cmd,
1472 unsigned long arg)
1473{
Stefan Rasplde8474e2018-05-23 16:38:11 +02001474 union smc_host_cursor cons, urg;
1475 struct smc_connection *conn;
Ursula Braunac713872017-01-09 16:55:13 +01001476 struct smc_sock *smc;
Ursula Braun9b67e262018-05-02 16:56:46 +02001477 int answ;
Ursula Braunac713872017-01-09 16:55:13 +01001478
1479 smc = smc_sk(sock->sk);
Stefan Rasplde8474e2018-05-23 16:38:11 +02001480 conn = &smc->conn;
Ursula Braun9b67e262018-05-02 16:56:46 +02001481 if (smc->use_fallback) {
1482 if (!smc->clcsock)
1483 return -EBADF;
Ursula Braunac713872017-01-09 16:55:13 +01001484 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
Ursula Braun9b67e262018-05-02 16:56:46 +02001485 }
1486 switch (cmd) {
1487 case SIOCINQ: /* same as FIONREAD */
1488 if (smc->sk.sk_state == SMC_LISTEN)
1489 return -EINVAL;
Ursula Braun2351abe2018-05-23 16:38:09 +02001490 if (smc->sk.sk_state == SMC_INIT ||
1491 smc->sk.sk_state == SMC_CLOSED)
1492 answ = 0;
1493 else
1494 answ = atomic_read(&smc->conn.bytes_to_rcv);
Ursula Braun9b67e262018-05-02 16:56:46 +02001495 break;
1496 case SIOCOUTQ:
1497 /* output queue size (not send + not acked) */
1498 if (smc->sk.sk_state == SMC_LISTEN)
1499 return -EINVAL;
Ursula Braun2351abe2018-05-23 16:38:09 +02001500 if (smc->sk.sk_state == SMC_INIT ||
1501 smc->sk.sk_state == SMC_CLOSED)
1502 answ = 0;
1503 else
1504 answ = smc->conn.sndbuf_desc->len -
Ursula Braun9b67e262018-05-02 16:56:46 +02001505 atomic_read(&smc->conn.sndbuf_space);
1506 break;
1507 case SIOCOUTQNSD:
1508 /* output queue size (not send only) */
1509 if (smc->sk.sk_state == SMC_LISTEN)
1510 return -EINVAL;
Ursula Braun2351abe2018-05-23 16:38:09 +02001511 if (smc->sk.sk_state == SMC_INIT ||
1512 smc->sk.sk_state == SMC_CLOSED)
1513 answ = 0;
1514 else
1515 answ = smc_tx_prepared_sends(&smc->conn);
Ursula Braun9b67e262018-05-02 16:56:46 +02001516 break;
Stefan Rasplde8474e2018-05-23 16:38:11 +02001517 case SIOCATMARK:
1518 if (smc->sk.sk_state == SMC_LISTEN)
1519 return -EINVAL;
1520 if (smc->sk.sk_state == SMC_INIT ||
1521 smc->sk.sk_state == SMC_CLOSED) {
1522 answ = 0;
1523 } else {
1524 smc_curs_write(&cons,
1525 smc_curs_read(&conn->local_tx_ctrl.cons, conn),
1526 conn);
1527 smc_curs_write(&urg,
1528 smc_curs_read(&conn->urg_curs, conn),
1529 conn);
1530 answ = smc_curs_diff(conn->rmb_desc->len,
1531 &cons, &urg) == 1;
1532 }
1533 break;
Ursula Braun9b67e262018-05-02 16:56:46 +02001534 default:
1535 return -ENOIOCTLCMD;
1536 }
1537
1538 return put_user(answ, (int __user *)arg);
Ursula Braunac713872017-01-09 16:55:13 +01001539}
1540
1541static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1542 int offset, size_t size, int flags)
1543{
1544 struct sock *sk = sock->sk;
1545 struct smc_sock *smc;
1546 int rc = -EPIPE;
1547
1548 smc = smc_sk(sk);
1549 lock_sock(sk);
Stefan Rasplbda27ff2018-05-03 17:57:39 +02001550 if (sk->sk_state != SMC_ACTIVE) {
1551 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001552 goto out;
Stefan Rasplbda27ff2018-05-03 17:57:39 +02001553 }
1554 release_sock(sk);
Ursula Braunac713872017-01-09 16:55:13 +01001555 if (smc->use_fallback)
1556 rc = kernel_sendpage(smc->clcsock, page, offset,
1557 size, flags);
1558 else
1559 rc = sock_no_sendpage(sock, page, offset, size, flags);
1560
1561out:
Ursula Braunac713872017-01-09 16:55:13 +01001562 return rc;
1563}
1564
Stefan Raspl9014db22018-05-03 18:12:39 +02001565/* Map the affected portions of the rmbe into an spd, note the number of bytes
1566 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1567 * updates till whenever a respective page has been fully processed.
1568 * Note that subsequent recv() calls have to wait till all splice() processing
1569 * completed.
1570 */
Ursula Braunac713872017-01-09 16:55:13 +01001571static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1572 struct pipe_inode_info *pipe, size_t len,
Stefan Raspl9014db22018-05-03 18:12:39 +02001573 unsigned int flags)
Ursula Braunac713872017-01-09 16:55:13 +01001574{
1575 struct sock *sk = sock->sk;
1576 struct smc_sock *smc;
1577 int rc = -ENOTCONN;
1578
1579 smc = smc_sk(sk);
1580 lock_sock(sk);
Stefan Raspl9014db22018-05-03 18:12:39 +02001581
1582 if (sk->sk_state == SMC_INIT ||
1583 sk->sk_state == SMC_LISTEN ||
1584 sk->sk_state == SMC_CLOSED)
Ursula Braunac713872017-01-09 16:55:13 +01001585 goto out;
Stefan Raspl9014db22018-05-03 18:12:39 +02001586
1587 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1588 rc = 0;
1589 goto out;
1590 }
1591
Ursula Braunac713872017-01-09 16:55:13 +01001592 if (smc->use_fallback) {
1593 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1594 pipe, len, flags);
1595 } else {
Stefan Raspl9014db22018-05-03 18:12:39 +02001596 if (*ppos) {
1597 rc = -ESPIPE;
1598 goto out;
1599 }
1600 if (flags & SPLICE_F_NONBLOCK)
1601 flags = MSG_DONTWAIT;
1602 else
1603 flags = 0;
1604 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
Ursula Braunac713872017-01-09 16:55:13 +01001605 }
1606out:
1607 release_sock(sk);
Stefan Raspl9014db22018-05-03 18:12:39 +02001608
Ursula Braunac713872017-01-09 16:55:13 +01001609 return rc;
1610}
1611
1612/* must look like tcp */
1613static const struct proto_ops smc_sock_ops = {
1614 .family = PF_SMC,
1615 .owner = THIS_MODULE,
1616 .release = smc_release,
1617 .bind = smc_bind,
1618 .connect = smc_connect,
1619 .socketpair = sock_no_socketpair,
1620 .accept = smc_accept,
1621 .getname = smc_getname,
Cong Wangc0129a02018-06-11 14:07:14 -07001622 .poll_mask = smc_poll_mask,
Ursula Braunac713872017-01-09 16:55:13 +01001623 .ioctl = smc_ioctl,
1624 .listen = smc_listen,
1625 .shutdown = smc_shutdown,
1626 .setsockopt = smc_setsockopt,
1627 .getsockopt = smc_getsockopt,
1628 .sendmsg = smc_sendmsg,
1629 .recvmsg = smc_recvmsg,
1630 .mmap = sock_no_mmap,
1631 .sendpage = smc_sendpage,
1632 .splice_read = smc_splice_read,
1633};
1634
1635static int smc_create(struct net *net, struct socket *sock, int protocol,
1636 int kern)
1637{
Karsten Graulaaa4d332018-03-16 15:06:41 +01001638 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
Ursula Braunac713872017-01-09 16:55:13 +01001639 struct smc_sock *smc;
1640 struct sock *sk;
1641 int rc;
1642
1643 rc = -ESOCKTNOSUPPORT;
1644 if (sock->type != SOCK_STREAM)
1645 goto out;
1646
1647 rc = -EPROTONOSUPPORT;
Karsten Graulaaa4d332018-03-16 15:06:41 +01001648 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
Ursula Braunac713872017-01-09 16:55:13 +01001649 goto out;
1650
1651 rc = -ENOBUFS;
1652 sock->ops = &smc_sock_ops;
Karsten Graulaaa4d332018-03-16 15:06:41 +01001653 sk = smc_sock_alloc(net, sock, protocol);
Ursula Braunac713872017-01-09 16:55:13 +01001654 if (!sk)
1655 goto out;
1656
1657 /* create internal TCP socket for CLC handshake and fallback */
1658 smc = smc_sk(sk);
Ursula Brauna046d572017-01-09 16:55:16 +01001659 smc->use_fallback = false; /* assume rdma capability first */
Karsten Graulaaa4d332018-03-16 15:06:41 +01001660 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1661 &smc->clcsock);
Davide Carattia5dcb732018-02-28 12:44:09 +01001662 if (rc) {
Ursula Braunac713872017-01-09 16:55:13 +01001663 sk_common_release(sk);
Davide Carattia5dcb732018-02-28 12:44:09 +01001664 goto out;
1665 }
Ursula Brauncd6851f2017-01-09 16:55:18 +01001666 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1667 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
Ursula Braunac713872017-01-09 16:55:13 +01001668
1669out:
1670 return rc;
1671}
1672
1673static const struct net_proto_family smc_sock_family_ops = {
1674 .family = PF_SMC,
1675 .owner = THIS_MODULE,
1676 .create = smc_create,
1677};
1678
1679static int __init smc_init(void)
1680{
1681 int rc;
1682
Thomas Richter6812baa2017-01-09 16:55:15 +01001683 rc = smc_pnet_init();
1684 if (rc)
1685 return rc;
1686
Ursula Braun9bf9abe2017-01-09 16:55:21 +01001687 rc = smc_llc_init();
1688 if (rc) {
1689 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
1690 goto out_pnet;
1691 }
1692
Ursula Braun5f083182017-01-09 16:55:22 +01001693 rc = smc_cdc_init();
1694 if (rc) {
1695 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
1696 goto out_pnet;
1697 }
1698
Ursula Braunac713872017-01-09 16:55:13 +01001699 rc = proto_register(&smc_proto, 1);
1700 if (rc) {
Karsten Graulaaa4d332018-03-16 15:06:41 +01001701 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
Thomas Richter6812baa2017-01-09 16:55:15 +01001702 goto out_pnet;
Ursula Braunac713872017-01-09 16:55:13 +01001703 }
1704
Karsten Graulaaa4d332018-03-16 15:06:41 +01001705 rc = proto_register(&smc_proto6, 1);
1706 if (rc) {
1707 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
1708 goto out_proto;
1709 }
1710
Ursula Braunac713872017-01-09 16:55:13 +01001711 rc = sock_register(&smc_sock_family_ops);
1712 if (rc) {
1713 pr_err("%s: sock_register fails with %d\n", __func__, rc);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001714 goto out_proto6;
Ursula Braunac713872017-01-09 16:55:13 +01001715 }
Ursula Braunf16a7dd2017-01-09 16:55:26 +01001716 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001717 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
Ursula Braunac713872017-01-09 16:55:13 +01001718
Ursula Brauna4cf0442017-01-09 16:55:14 +01001719 rc = smc_ib_register_client();
1720 if (rc) {
1721 pr_err("%s: ib_register fails with %d\n", __func__, rc);
1722 goto out_sock;
1723 }
1724
Ursula Braunc5c1cc92017-10-25 11:01:46 +02001725 static_branch_enable(&tcp_have_smc);
Ursula Braunac713872017-01-09 16:55:13 +01001726 return 0;
1727
Ursula Brauna4cf0442017-01-09 16:55:14 +01001728out_sock:
1729 sock_unregister(PF_SMC);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001730out_proto6:
1731 proto_unregister(&smc_proto6);
Ursula Braunac713872017-01-09 16:55:13 +01001732out_proto:
1733 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +01001734out_pnet:
1735 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +01001736 return rc;
1737}
1738
1739static void __exit smc_exit(void)
1740{
Hans Wippel9fda3512018-05-18 09:34:11 +02001741 smc_core_exit();
Ursula Braunc5c1cc92017-10-25 11:01:46 +02001742 static_branch_disable(&tcp_have_smc);
Ursula Brauna4cf0442017-01-09 16:55:14 +01001743 smc_ib_unregister_client();
Ursula Braunac713872017-01-09 16:55:13 +01001744 sock_unregister(PF_SMC);
Karsten Graulaaa4d332018-03-16 15:06:41 +01001745 proto_unregister(&smc_proto6);
Ursula Braunac713872017-01-09 16:55:13 +01001746 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +01001747 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +01001748}
1749
1750module_init(smc_init);
1751module_exit(smc_exit);
1752
1753MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
1754MODULE_DESCRIPTION("smc socket address family");
1755MODULE_LICENSE("GPL");
1756MODULE_ALIAS_NETPROTO(PF_SMC);