blob: 50492ee495ce2b52880fa27343219347a1c54302 [file] [log] [blame]
Ursula Braunac713872017-01-09 16:55:13 +01001/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
9 * Copyright IBM Corp. 2016
10 *
11 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 * based on prototype from Frank Blaschka
13 */
14
15#define KMSG_COMPONENT "smc"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/socket.h>
20#include <net/sock.h>
21
22#include "smc.h"
Ursula Brauna4cf0442017-01-09 16:55:14 +010023#include "smc_ib.h"
Ursula Braunac713872017-01-09 16:55:13 +010024
25static void smc_set_keepalive(struct sock *sk, int val)
26{
27 struct smc_sock *smc = smc_sk(sk);
28
29 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
30}
31
32static struct proto smc_proto = {
33 .name = "SMC",
34 .owner = THIS_MODULE,
35 .keepalive = smc_set_keepalive,
36 .obj_size = sizeof(struct smc_sock),
37 .slab_flags = SLAB_DESTROY_BY_RCU,
38};
39
40static int smc_release(struct socket *sock)
41{
42 struct sock *sk = sock->sk;
43 struct smc_sock *smc;
44
45 if (!sk)
46 goto out;
47
48 smc = smc_sk(sk);
49 lock_sock(sk);
50
51 sk->sk_state = SMC_CLOSED;
52 if (smc->clcsock) {
53 sock_release(smc->clcsock);
54 smc->clcsock = NULL;
55 }
56
57 /* detach socket */
58 sock_orphan(sk);
59 sock->sk = NULL;
60 release_sock(sk);
61
62 sock_put(sk);
63out:
64 return 0;
65}
66
67static void smc_destruct(struct sock *sk)
68{
69 if (sk->sk_state != SMC_CLOSED)
70 return;
71 if (!sock_flag(sk, SOCK_DEAD))
72 return;
73
74 sk_refcnt_debug_dec(sk);
75}
76
77static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
78{
79 struct smc_sock *smc;
80 struct sock *sk;
81
82 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
83 if (!sk)
84 return NULL;
85
86 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
87 sk->sk_state = SMC_INIT;
88 sk->sk_destruct = smc_destruct;
89 sk->sk_protocol = SMCPROTO_SMC;
90 sk_refcnt_debug_inc(sk);
91
92 smc = smc_sk(sk);
93
94 return sk;
95}
96
97static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
98 int addr_len)
99{
100 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
101 struct sock *sk = sock->sk;
102 struct smc_sock *smc;
103 int rc;
104
105 smc = smc_sk(sk);
106
107 /* replicate tests from inet_bind(), to be safe wrt. future changes */
108 rc = -EINVAL;
109 if (addr_len < sizeof(struct sockaddr_in))
110 goto out;
111
112 rc = -EAFNOSUPPORT;
113 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
114 if ((addr->sin_family != AF_INET) &&
115 ((addr->sin_family != AF_UNSPEC) ||
116 (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
117 goto out;
118
119 lock_sock(sk);
120
121 /* Check if socket is already active */
122 rc = -EINVAL;
123 if (sk->sk_state != SMC_INIT)
124 goto out_rel;
125
126 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
127 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
128
129out_rel:
130 release_sock(sk);
131out:
132 return rc;
133}
134
135static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
136 unsigned long mask)
137{
138 /* options we don't get control via setsockopt for */
139 nsk->sk_type = osk->sk_type;
140 nsk->sk_sndbuf = osk->sk_sndbuf;
141 nsk->sk_rcvbuf = osk->sk_rcvbuf;
142 nsk->sk_sndtimeo = osk->sk_sndtimeo;
143 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
144 nsk->sk_mark = osk->sk_mark;
145 nsk->sk_priority = osk->sk_priority;
146 nsk->sk_rcvlowat = osk->sk_rcvlowat;
147 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
148 nsk->sk_err = osk->sk_err;
149
150 nsk->sk_flags &= ~mask;
151 nsk->sk_flags |= osk->sk_flags & mask;
152}
153
154#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
155 (1UL << SOCK_KEEPOPEN) | \
156 (1UL << SOCK_LINGER) | \
157 (1UL << SOCK_BROADCAST) | \
158 (1UL << SOCK_TIMESTAMP) | \
159 (1UL << SOCK_DBG) | \
160 (1UL << SOCK_RCVTSTAMP) | \
161 (1UL << SOCK_RCVTSTAMPNS) | \
162 (1UL << SOCK_LOCALROUTE) | \
163 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
164 (1UL << SOCK_RXQ_OVFL) | \
165 (1UL << SOCK_WIFI_STATUS) | \
166 (1UL << SOCK_NOFCS) | \
167 (1UL << SOCK_FILTER_LOCKED))
168/* copy only relevant settings and flags of SOL_SOCKET level from smc to
169 * clc socket (since smc is not called for these options from net/core)
170 */
171static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
172{
173 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
174}
175
176#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
177 (1UL << SOCK_KEEPOPEN) | \
178 (1UL << SOCK_LINGER) | \
179 (1UL << SOCK_DBG))
180/* copy only settings and flags relevant for smc from clc to smc socket */
181static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
182{
183 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
184}
185
186static int smc_connect(struct socket *sock, struct sockaddr *addr,
187 int alen, int flags)
188{
189 struct sock *sk = sock->sk;
190 struct smc_sock *smc;
191 int rc = -EINVAL;
192
193 smc = smc_sk(sk);
194
195 /* separate smc parameter checking to be safe */
196 if (alen < sizeof(addr->sa_family))
197 goto out_err;
198 if (addr->sa_family != AF_INET)
199 goto out_err;
200
201 lock_sock(sk);
202 switch (sk->sk_state) {
203 default:
204 goto out;
205 case SMC_ACTIVE:
206 rc = -EISCONN;
207 goto out;
208 case SMC_INIT:
209 rc = 0;
210 break;
211 }
212
213 smc_copy_sock_settings_to_clc(smc);
214 rc = kernel_connect(smc->clcsock, addr, alen, flags);
215 if (rc)
216 goto out;
217
218 sk->sk_state = SMC_ACTIVE;
219
220 /* always use TCP fallback as transport mechanism for now;
221 * This will change once RDMA transport is implemented
222 */
223 smc->use_fallback = true;
224
225out:
226 release_sock(sk);
227out_err:
228 return rc;
229}
230
231static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
232{
233 struct sock *sk = &lsmc->sk;
234 struct socket *new_clcsock;
235 struct sock *new_sk;
236 int rc;
237
238 new_sk = smc_sock_alloc(sock_net(sk), NULL);
239 if (!new_sk) {
240 rc = -ENOMEM;
241 lsmc->sk.sk_err = ENOMEM;
242 *new_smc = NULL;
243 goto out;
244 }
245 *new_smc = smc_sk(new_sk);
246
247 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
248 if (rc) {
249 sock_put(new_sk);
250 *new_smc = NULL;
251 goto out;
252 }
253
254 (*new_smc)->clcsock = new_clcsock;
255out:
256 return rc;
257}
258
259static int smc_listen(struct socket *sock, int backlog)
260{
261 struct sock *sk = sock->sk;
262 struct smc_sock *smc;
263 int rc;
264
265 smc = smc_sk(sk);
266 lock_sock(sk);
267
268 rc = -EINVAL;
269 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
270 goto out;
271
272 rc = 0;
273 if (sk->sk_state == SMC_LISTEN) {
274 sk->sk_max_ack_backlog = backlog;
275 goto out;
276 }
277 /* some socket options are handled in core, so we could not apply
278 * them to the clc socket -- copy smc socket options to clc socket
279 */
280 smc_copy_sock_settings_to_clc(smc);
281
282 rc = kernel_listen(smc->clcsock, backlog);
283 if (rc)
284 goto out;
285 sk->sk_max_ack_backlog = backlog;
286 sk->sk_ack_backlog = 0;
287 sk->sk_state = SMC_LISTEN;
288
289out:
290 release_sock(sk);
291 return rc;
292}
293
294static int smc_accept(struct socket *sock, struct socket *new_sock,
295 int flags)
296{
297 struct smc_sock *new_smc;
298 struct sock *sk = sock->sk;
299 struct smc_sock *lsmc;
300 int rc;
301
302 lsmc = smc_sk(sk);
303 lock_sock(sk);
304
305 if (lsmc->sk.sk_state != SMC_LISTEN) {
306 rc = -EINVAL;
307 goto out;
308 }
309
310 rc = smc_clcsock_accept(lsmc, &new_smc);
311 if (rc)
312 goto out;
313 sock_graft(&new_smc->sk, new_sock);
314 new_smc->sk.sk_state = SMC_ACTIVE;
315
316 smc_copy_sock_settings_to_smc(new_smc);
317
318 /* always use TCP fallback as transport mechanism for now;
319 * This will change once RDMA transport is implemented
320 */
321 new_smc->use_fallback = true;
322
323out:
324 release_sock(sk);
325 return rc;
326}
327
328static int smc_getname(struct socket *sock, struct sockaddr *addr,
329 int *len, int peer)
330{
331 struct smc_sock *smc;
332
333 if (peer && (sock->sk->sk_state != SMC_ACTIVE))
334 return -ENOTCONN;
335
336 smc = smc_sk(sock->sk);
337
338 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
339}
340
341static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
342{
343 struct sock *sk = sock->sk;
344 struct smc_sock *smc;
345 int rc = -EPIPE;
346
347 smc = smc_sk(sk);
348 lock_sock(sk);
349 if (sk->sk_state != SMC_ACTIVE)
350 goto out;
351 if (smc->use_fallback)
352 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
353 else
354 rc = sock_no_sendmsg(sock, msg, len);
355out:
356 release_sock(sk);
357 return rc;
358}
359
360static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
361 int flags)
362{
363 struct sock *sk = sock->sk;
364 struct smc_sock *smc;
365 int rc = -ENOTCONN;
366
367 smc = smc_sk(sk);
368 lock_sock(sk);
369 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
370 goto out;
371
372 if (smc->use_fallback)
373 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
374 else
375 rc = sock_no_recvmsg(sock, msg, len, flags);
376out:
377 release_sock(sk);
378 return rc;
379}
380
381static unsigned int smc_poll(struct file *file, struct socket *sock,
382 poll_table *wait)
383{
384 struct sock *sk = sock->sk;
385 unsigned int mask = 0;
386 struct smc_sock *smc;
387
388 smc = smc_sk(sock->sk);
389 if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) ||
390 smc->use_fallback) {
391 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
392 /* if non-blocking connect finished ... */
393 lock_sock(sk);
394 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
395 sk->sk_state = SMC_ACTIVE;
396 /* always use TCP fallback as transport mechanism;
397 * This will change once RDMA transport is implemented
398 */
399 smc->use_fallback = true;
400 }
401 release_sock(sk);
402 } else {
403 mask = sock_no_poll(file, sock, wait);
404 }
405
406 return mask;
407}
408
409static int smc_shutdown(struct socket *sock, int how)
410{
411 struct sock *sk = sock->sk;
412 struct smc_sock *smc;
413 int rc = -EINVAL;
414
415 smc = smc_sk(sk);
416
417 if ((how < SHUT_RD) || (how > SHUT_RDWR))
418 goto out_err;
419
420 lock_sock(sk);
421
422 rc = -ENOTCONN;
423 if (sk->sk_state == SMC_CLOSED)
424 goto out;
425 if (smc->use_fallback) {
426 rc = kernel_sock_shutdown(smc->clcsock, how);
427 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
428 if (sk->sk_shutdown == SHUTDOWN_MASK)
429 sk->sk_state = SMC_CLOSED;
430 } else {
431 rc = sock_no_shutdown(sock, how);
432 }
433
434out:
435 release_sock(sk);
436
437out_err:
438 return rc;
439}
440
441static int smc_setsockopt(struct socket *sock, int level, int optname,
442 char __user *optval, unsigned int optlen)
443{
444 struct sock *sk = sock->sk;
445 struct smc_sock *smc;
446
447 smc = smc_sk(sk);
448
449 /* generic setsockopts reaching us here always apply to the
450 * CLC socket
451 */
452 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
453 optval, optlen);
454}
455
456static int smc_getsockopt(struct socket *sock, int level, int optname,
457 char __user *optval, int __user *optlen)
458{
459 struct smc_sock *smc;
460
461 smc = smc_sk(sock->sk);
462 /* socket options apply to the CLC socket */
463 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
464 optval, optlen);
465}
466
467static int smc_ioctl(struct socket *sock, unsigned int cmd,
468 unsigned long arg)
469{
470 struct smc_sock *smc;
471
472 smc = smc_sk(sock->sk);
473 if (smc->use_fallback)
474 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
475 else
476 return sock_no_ioctl(sock, cmd, arg);
477}
478
479static ssize_t smc_sendpage(struct socket *sock, struct page *page,
480 int offset, size_t size, int flags)
481{
482 struct sock *sk = sock->sk;
483 struct smc_sock *smc;
484 int rc = -EPIPE;
485
486 smc = smc_sk(sk);
487 lock_sock(sk);
488 if (sk->sk_state != SMC_ACTIVE)
489 goto out;
490 if (smc->use_fallback)
491 rc = kernel_sendpage(smc->clcsock, page, offset,
492 size, flags);
493 else
494 rc = sock_no_sendpage(sock, page, offset, size, flags);
495
496out:
497 release_sock(sk);
498 return rc;
499}
500
501static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
502 struct pipe_inode_info *pipe, size_t len,
503 unsigned int flags)
504{
505 struct sock *sk = sock->sk;
506 struct smc_sock *smc;
507 int rc = -ENOTCONN;
508
509 smc = smc_sk(sk);
510 lock_sock(sk);
511 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
512 goto out;
513 if (smc->use_fallback) {
514 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
515 pipe, len, flags);
516 } else {
517 rc = -EOPNOTSUPP;
518 }
519out:
520 release_sock(sk);
521 return rc;
522}
523
524/* must look like tcp */
525static const struct proto_ops smc_sock_ops = {
526 .family = PF_SMC,
527 .owner = THIS_MODULE,
528 .release = smc_release,
529 .bind = smc_bind,
530 .connect = smc_connect,
531 .socketpair = sock_no_socketpair,
532 .accept = smc_accept,
533 .getname = smc_getname,
534 .poll = smc_poll,
535 .ioctl = smc_ioctl,
536 .listen = smc_listen,
537 .shutdown = smc_shutdown,
538 .setsockopt = smc_setsockopt,
539 .getsockopt = smc_getsockopt,
540 .sendmsg = smc_sendmsg,
541 .recvmsg = smc_recvmsg,
542 .mmap = sock_no_mmap,
543 .sendpage = smc_sendpage,
544 .splice_read = smc_splice_read,
545};
546
547static int smc_create(struct net *net, struct socket *sock, int protocol,
548 int kern)
549{
550 struct smc_sock *smc;
551 struct sock *sk;
552 int rc;
553
554 rc = -ESOCKTNOSUPPORT;
555 if (sock->type != SOCK_STREAM)
556 goto out;
557
558 rc = -EPROTONOSUPPORT;
559 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
560 goto out;
561
562 rc = -ENOBUFS;
563 sock->ops = &smc_sock_ops;
564 sk = smc_sock_alloc(net, sock);
565 if (!sk)
566 goto out;
567
568 /* create internal TCP socket for CLC handshake and fallback */
569 smc = smc_sk(sk);
570 rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
571 IPPROTO_TCP, &smc->clcsock);
572 if (rc)
573 sk_common_release(sk);
574
575out:
576 return rc;
577}
578
579static const struct net_proto_family smc_sock_family_ops = {
580 .family = PF_SMC,
581 .owner = THIS_MODULE,
582 .create = smc_create,
583};
584
585static int __init smc_init(void)
586{
587 int rc;
588
589 rc = proto_register(&smc_proto, 1);
590 if (rc) {
591 pr_err("%s: proto_register fails with %d\n", __func__, rc);
592 goto out;
593 }
594
595 rc = sock_register(&smc_sock_family_ops);
596 if (rc) {
597 pr_err("%s: sock_register fails with %d\n", __func__, rc);
598 goto out_proto;
599 }
600
Ursula Brauna4cf0442017-01-09 16:55:14 +0100601 rc = smc_ib_register_client();
602 if (rc) {
603 pr_err("%s: ib_register fails with %d\n", __func__, rc);
604 goto out_sock;
605 }
606
Ursula Braunac713872017-01-09 16:55:13 +0100607 return 0;
608
Ursula Brauna4cf0442017-01-09 16:55:14 +0100609out_sock:
610 sock_unregister(PF_SMC);
Ursula Braunac713872017-01-09 16:55:13 +0100611out_proto:
612 proto_unregister(&smc_proto);
613out:
614 return rc;
615}
616
617static void __exit smc_exit(void)
618{
Ursula Brauna4cf0442017-01-09 16:55:14 +0100619 smc_ib_unregister_client();
Ursula Braunac713872017-01-09 16:55:13 +0100620 sock_unregister(PF_SMC);
621 proto_unregister(&smc_proto);
622}
623
624module_init(smc_init);
625module_exit(smc_exit);
626
627MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
628MODULE_DESCRIPTION("smc socket address family");
629MODULE_LICENSE("GPL");
630MODULE_ALIAS_NETPROTO(PF_SMC);