blob: 8b059b2fc34dcbf5e46dbd65aa86fd54fe3898fa [file] [log] [blame]
Ursula Braunac713872017-01-09 16:55:13 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
9 * Copyright IBM Corp. 2016
10 *
11 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 * based on prototype from Frank Blaschka
13 */
14
15#define KMSG_COMPONENT "smc"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/socket.h>
20#include <net/sock.h>
21
22#include "smc.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010023#include "smc_ib.h"
Thomas Richter6812baa2017-01-09 16:55:15 +010024#include "smc_pnet.h"
Ursula Braunac713872017-01-09 16:55:13 +010025
26static void smc_set_keepalive(struct sock *sk, int val)
27{
28 struct smc_sock *smc = smc_sk(sk);
29
30 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
31}
32
33static struct proto smc_proto = {
34 .name = "SMC",
35 .owner = THIS_MODULE,
36 .keepalive = smc_set_keepalive,
37 .obj_size = sizeof(struct smc_sock),
38 .slab_flags = SLAB_DESTROY_BY_RCU,
39};
40
41static int smc_release(struct socket *sock)
42{
43 struct sock *sk = sock->sk;
44 struct smc_sock *smc;
45
46 if (!sk)
47 goto out;
48
49 smc = smc_sk(sk);
50 lock_sock(sk);
51
52 sk->sk_state = SMC_CLOSED;
53 if (smc->clcsock) {
54 sock_release(smc->clcsock);
55 smc->clcsock = NULL;
56 }
57
58 /* detach socket */
59 sock_orphan(sk);
60 sock->sk = NULL;
61 release_sock(sk);
62
63 sock_put(sk);
64out:
65 return 0;
66}
67
68static void smc_destruct(struct sock *sk)
69{
70 if (sk->sk_state != SMC_CLOSED)
71 return;
72 if (!sock_flag(sk, SOCK_DEAD))
73 return;
74
75 sk_refcnt_debug_dec(sk);
76}
77
78static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
79{
80 struct smc_sock *smc;
81 struct sock *sk;
82
83 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
84 if (!sk)
85 return NULL;
86
87 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
88 sk->sk_state = SMC_INIT;
89 sk->sk_destruct = smc_destruct;
90 sk->sk_protocol = SMCPROTO_SMC;
91 sk_refcnt_debug_inc(sk);
92
93 smc = smc_sk(sk);
94
95 return sk;
96}
97
98static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
99 int addr_len)
100{
101 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
102 struct sock *sk = sock->sk;
103 struct smc_sock *smc;
104 int rc;
105
106 smc = smc_sk(sk);
107
108 /* replicate tests from inet_bind(), to be safe wrt. future changes */
109 rc = -EINVAL;
110 if (addr_len < sizeof(struct sockaddr_in))
111 goto out;
112
113 rc = -EAFNOSUPPORT;
114 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
115 if ((addr->sin_family != AF_INET) &&
116 ((addr->sin_family != AF_UNSPEC) ||
117 (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
118 goto out;
119
120 lock_sock(sk);
121
122 /* Check if socket is already active */
123 rc = -EINVAL;
124 if (sk->sk_state != SMC_INIT)
125 goto out_rel;
126
127 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
128 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
129
130out_rel:
131 release_sock(sk);
132out:
133 return rc;
134}
135
136static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
137 unsigned long mask)
138{
139 /* options we don't get control via setsockopt for */
140 nsk->sk_type = osk->sk_type;
141 nsk->sk_sndbuf = osk->sk_sndbuf;
142 nsk->sk_rcvbuf = osk->sk_rcvbuf;
143 nsk->sk_sndtimeo = osk->sk_sndtimeo;
144 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
145 nsk->sk_mark = osk->sk_mark;
146 nsk->sk_priority = osk->sk_priority;
147 nsk->sk_rcvlowat = osk->sk_rcvlowat;
148 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
149 nsk->sk_err = osk->sk_err;
150
151 nsk->sk_flags &= ~mask;
152 nsk->sk_flags |= osk->sk_flags & mask;
153}
154
155#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
156 (1UL << SOCK_KEEPOPEN) | \
157 (1UL << SOCK_LINGER) | \
158 (1UL << SOCK_BROADCAST) | \
159 (1UL << SOCK_TIMESTAMP) | \
160 (1UL << SOCK_DBG) | \
161 (1UL << SOCK_RCVTSTAMP) | \
162 (1UL << SOCK_RCVTSTAMPNS) | \
163 (1UL << SOCK_LOCALROUTE) | \
164 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
165 (1UL << SOCK_RXQ_OVFL) | \
166 (1UL << SOCK_WIFI_STATUS) | \
167 (1UL << SOCK_NOFCS) | \
168 (1UL << SOCK_FILTER_LOCKED))
169/* copy only relevant settings and flags of SOL_SOCKET level from smc to
170 * clc socket (since smc is not called for these options from net/core)
171 */
172static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
173{
174 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
175}
176
177#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
178 (1UL << SOCK_KEEPOPEN) | \
179 (1UL << SOCK_LINGER) | \
180 (1UL << SOCK_DBG))
181/* copy only settings and flags relevant for smc from clc to smc socket */
182static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
183{
184 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
185}
186
187static int smc_connect(struct socket *sock, struct sockaddr *addr,
188 int alen, int flags)
189{
190 struct sock *sk = sock->sk;
191 struct smc_sock *smc;
192 int rc = -EINVAL;
193
194 smc = smc_sk(sk);
195
196 /* separate smc parameter checking to be safe */
197 if (alen < sizeof(addr->sa_family))
198 goto out_err;
199 if (addr->sa_family != AF_INET)
200 goto out_err;
201
202 lock_sock(sk);
203 switch (sk->sk_state) {
204 default:
205 goto out;
206 case SMC_ACTIVE:
207 rc = -EISCONN;
208 goto out;
209 case SMC_INIT:
210 rc = 0;
211 break;
212 }
213
214 smc_copy_sock_settings_to_clc(smc);
215 rc = kernel_connect(smc->clcsock, addr, alen, flags);
216 if (rc)
217 goto out;
218
219 sk->sk_state = SMC_ACTIVE;
220
221 /* always use TCP fallback as transport mechanism for now;
222 * This will change once RDMA transport is implemented
223 */
224 smc->use_fallback = true;
225
226out:
227 release_sock(sk);
228out_err:
229 return rc;
230}
231
232static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
233{
234 struct sock *sk = &lsmc->sk;
235 struct socket *new_clcsock;
236 struct sock *new_sk;
237 int rc;
238
239 new_sk = smc_sock_alloc(sock_net(sk), NULL);
240 if (!new_sk) {
241 rc = -ENOMEM;
242 lsmc->sk.sk_err = ENOMEM;
243 *new_smc = NULL;
244 goto out;
245 }
246 *new_smc = smc_sk(new_sk);
247
248 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
249 if (rc) {
250 sock_put(new_sk);
251 *new_smc = NULL;
252 goto out;
253 }
254
255 (*new_smc)->clcsock = new_clcsock;
256out:
257 return rc;
258}
259
260static int smc_listen(struct socket *sock, int backlog)
261{
262 struct sock *sk = sock->sk;
263 struct smc_sock *smc;
264 int rc;
265
266 smc = smc_sk(sk);
267 lock_sock(sk);
268
269 rc = -EINVAL;
270 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
271 goto out;
272
273 rc = 0;
274 if (sk->sk_state == SMC_LISTEN) {
275 sk->sk_max_ack_backlog = backlog;
276 goto out;
277 }
278 /* some socket options are handled in core, so we could not apply
279 * them to the clc socket -- copy smc socket options to clc socket
280 */
281 smc_copy_sock_settings_to_clc(smc);
282
283 rc = kernel_listen(smc->clcsock, backlog);
284 if (rc)
285 goto out;
286 sk->sk_max_ack_backlog = backlog;
287 sk->sk_ack_backlog = 0;
288 sk->sk_state = SMC_LISTEN;
289
290out:
291 release_sock(sk);
292 return rc;
293}
294
295static int smc_accept(struct socket *sock, struct socket *new_sock,
296 int flags)
297{
298 struct smc_sock *new_smc;
299 struct sock *sk = sock->sk;
300 struct smc_sock *lsmc;
301 int rc;
302
303 lsmc = smc_sk(sk);
304 lock_sock(sk);
305
306 if (lsmc->sk.sk_state != SMC_LISTEN) {
307 rc = -EINVAL;
308 goto out;
309 }
310
311 rc = smc_clcsock_accept(lsmc, &new_smc);
312 if (rc)
313 goto out;
314 sock_graft(&new_smc->sk, new_sock);
315 new_smc->sk.sk_state = SMC_ACTIVE;
316
317 smc_copy_sock_settings_to_smc(new_smc);
318
319 /* always use TCP fallback as transport mechanism for now;
320 * This will change once RDMA transport is implemented
321 */
322 new_smc->use_fallback = true;
323
324out:
325 release_sock(sk);
326 return rc;
327}
328
329static int smc_getname(struct socket *sock, struct sockaddr *addr,
330 int *len, int peer)
331{
332 struct smc_sock *smc;
333
334 if (peer && (sock->sk->sk_state != SMC_ACTIVE))
335 return -ENOTCONN;
336
337 smc = smc_sk(sock->sk);
338
339 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
340}
341
342static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
343{
344 struct sock *sk = sock->sk;
345 struct smc_sock *smc;
346 int rc = -EPIPE;
347
348 smc = smc_sk(sk);
349 lock_sock(sk);
350 if (sk->sk_state != SMC_ACTIVE)
351 goto out;
352 if (smc->use_fallback)
353 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
354 else
355 rc = sock_no_sendmsg(sock, msg, len);
356out:
357 release_sock(sk);
358 return rc;
359}
360
361static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
362 int flags)
363{
364 struct sock *sk = sock->sk;
365 struct smc_sock *smc;
366 int rc = -ENOTCONN;
367
368 smc = smc_sk(sk);
369 lock_sock(sk);
370 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
371 goto out;
372
373 if (smc->use_fallback)
374 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
375 else
376 rc = sock_no_recvmsg(sock, msg, len, flags);
377out:
378 release_sock(sk);
379 return rc;
380}
381
382static unsigned int smc_poll(struct file *file, struct socket *sock,
383 poll_table *wait)
384{
385 struct sock *sk = sock->sk;
386 unsigned int mask = 0;
387 struct smc_sock *smc;
388
389 smc = smc_sk(sock->sk);
390 if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) ||
391 smc->use_fallback) {
392 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
393 /* if non-blocking connect finished ... */
394 lock_sock(sk);
395 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
396 sk->sk_state = SMC_ACTIVE;
397 /* always use TCP fallback as transport mechanism;
398 * This will change once RDMA transport is implemented
399 */
400 smc->use_fallback = true;
401 }
402 release_sock(sk);
403 } else {
404 mask = sock_no_poll(file, sock, wait);
405 }
406
407 return mask;
408}
409
410static int smc_shutdown(struct socket *sock, int how)
411{
412 struct sock *sk = sock->sk;
413 struct smc_sock *smc;
414 int rc = -EINVAL;
415
416 smc = smc_sk(sk);
417
418 if ((how < SHUT_RD) || (how > SHUT_RDWR))
419 goto out_err;
420
421 lock_sock(sk);
422
423 rc = -ENOTCONN;
424 if (sk->sk_state == SMC_CLOSED)
425 goto out;
426 if (smc->use_fallback) {
427 rc = kernel_sock_shutdown(smc->clcsock, how);
428 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
429 if (sk->sk_shutdown == SHUTDOWN_MASK)
430 sk->sk_state = SMC_CLOSED;
431 } else {
432 rc = sock_no_shutdown(sock, how);
433 }
434
435out:
436 release_sock(sk);
437
438out_err:
439 return rc;
440}
441
442static int smc_setsockopt(struct socket *sock, int level, int optname,
443 char __user *optval, unsigned int optlen)
444{
445 struct sock *sk = sock->sk;
446 struct smc_sock *smc;
447
448 smc = smc_sk(sk);
449
450 /* generic setsockopts reaching us here always apply to the
451 * CLC socket
452 */
453 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
454 optval, optlen);
455}
456
457static int smc_getsockopt(struct socket *sock, int level, int optname,
458 char __user *optval, int __user *optlen)
459{
460 struct smc_sock *smc;
461
462 smc = smc_sk(sock->sk);
463 /* socket options apply to the CLC socket */
464 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
465 optval, optlen);
466}
467
468static int smc_ioctl(struct socket *sock, unsigned int cmd,
469 unsigned long arg)
470{
471 struct smc_sock *smc;
472
473 smc = smc_sk(sock->sk);
474 if (smc->use_fallback)
475 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
476 else
477 return sock_no_ioctl(sock, cmd, arg);
478}
479
480static ssize_t smc_sendpage(struct socket *sock, struct page *page,
481 int offset, size_t size, int flags)
482{
483 struct sock *sk = sock->sk;
484 struct smc_sock *smc;
485 int rc = -EPIPE;
486
487 smc = smc_sk(sk);
488 lock_sock(sk);
489 if (sk->sk_state != SMC_ACTIVE)
490 goto out;
491 if (smc->use_fallback)
492 rc = kernel_sendpage(smc->clcsock, page, offset,
493 size, flags);
494 else
495 rc = sock_no_sendpage(sock, page, offset, size, flags);
496
497out:
498 release_sock(sk);
499 return rc;
500}
501
502static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
503 struct pipe_inode_info *pipe, size_t len,
504 unsigned int flags)
505{
506 struct sock *sk = sock->sk;
507 struct smc_sock *smc;
508 int rc = -ENOTCONN;
509
510 smc = smc_sk(sk);
511 lock_sock(sk);
512 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
513 goto out;
514 if (smc->use_fallback) {
515 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
516 pipe, len, flags);
517 } else {
518 rc = -EOPNOTSUPP;
519 }
520out:
521 release_sock(sk);
522 return rc;
523}
524
525/* must look like tcp */
526static const struct proto_ops smc_sock_ops = {
527 .family = PF_SMC,
528 .owner = THIS_MODULE,
529 .release = smc_release,
530 .bind = smc_bind,
531 .connect = smc_connect,
532 .socketpair = sock_no_socketpair,
533 .accept = smc_accept,
534 .getname = smc_getname,
535 .poll = smc_poll,
536 .ioctl = smc_ioctl,
537 .listen = smc_listen,
538 .shutdown = smc_shutdown,
539 .setsockopt = smc_setsockopt,
540 .getsockopt = smc_getsockopt,
541 .sendmsg = smc_sendmsg,
542 .recvmsg = smc_recvmsg,
543 .mmap = sock_no_mmap,
544 .sendpage = smc_sendpage,
545 .splice_read = smc_splice_read,
546};
547
548static int smc_create(struct net *net, struct socket *sock, int protocol,
549 int kern)
550{
551 struct smc_sock *smc;
552 struct sock *sk;
553 int rc;
554
555 rc = -ESOCKTNOSUPPORT;
556 if (sock->type != SOCK_STREAM)
557 goto out;
558
559 rc = -EPROTONOSUPPORT;
560 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
561 goto out;
562
563 rc = -ENOBUFS;
564 sock->ops = &smc_sock_ops;
565 sk = smc_sock_alloc(net, sock);
566 if (!sk)
567 goto out;
568
569 /* create internal TCP socket for CLC handshake and fallback */
570 smc = smc_sk(sk);
571 rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
572 IPPROTO_TCP, &smc->clcsock);
573 if (rc)
574 sk_common_release(sk);
575
576out:
577 return rc;
578}
579
580static const struct net_proto_family smc_sock_family_ops = {
581 .family = PF_SMC,
582 .owner = THIS_MODULE,
583 .create = smc_create,
584};
585
586static int __init smc_init(void)
587{
588 int rc;
589
Thomas Richter6812baa2017-01-09 16:55:15 +0100590 rc = smc_pnet_init();
591 if (rc)
592 return rc;
593
Ursula Braunac713872017-01-09 16:55:13 +0100594 rc = proto_register(&smc_proto, 1);
595 if (rc) {
596 pr_err("%s: proto_register fails with %d\n", __func__, rc);
Thomas Richter6812baa2017-01-09 16:55:15 +0100597 goto out_pnet;
Ursula Braunac713872017-01-09 16:55:13 +0100598 }
599
600 rc = sock_register(&smc_sock_family_ops);
601 if (rc) {
602 pr_err("%s: sock_register fails with %d\n", __func__, rc);
603 goto out_proto;
604 }
605
Ursula Brauna4cf0442017-01-09 16:55:14 +0100606 rc = smc_ib_register_client();
607 if (rc) {
608 pr_err("%s: ib_register fails with %d\n", __func__, rc);
609 goto out_sock;
610 }
611
Ursula Braunac713872017-01-09 16:55:13 +0100612 return 0;
613
Ursula Brauna4cf0442017-01-09 16:55:14 +0100614out_sock:
615 sock_unregister(PF_SMC);
Ursula Braunac713872017-01-09 16:55:13 +0100616out_proto:
617 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +0100618out_pnet:
619 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +0100620 return rc;
621}
622
623static void __exit smc_exit(void)
624{
Ursula Brauna4cf0442017-01-09 16:55:14 +0100625 smc_ib_unregister_client();
Ursula Braunac713872017-01-09 16:55:13 +0100626 sock_unregister(PF_SMC);
627 proto_unregister(&smc_proto);
Thomas Richter6812baa2017-01-09 16:55:15 +0100628 smc_pnet_exit();
Ursula Braunac713872017-01-09 16:55:13 +0100629}
630
631module_init(smc_init);
632module_exit(smc_exit);
633
634MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
635MODULE_DESCRIPTION("smc socket address family");
636MODULE_LICENSE("GPL");
637MODULE_ALIAS_NETPROTO(PF_SMC);