| /* |
| * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| * |
| * AF_SMC protocol family socket handler keeping the AF_INET sock address type |
| * applies to SOCK_STREAM sockets only |
| * offers an alternative communication option for TCP-protocol sockets |
| * applicable with RoCE-cards only |
| * |
| * Copyright IBM Corp. 2016 |
| * |
| * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| * based on prototype from Frank Blaschka |
| */ |
| |
| #define KMSG_COMPONENT "smc" |
| #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| |
| #include <linux/module.h> |
| #include <linux/socket.h> |
| #include <net/sock.h> |
| |
| #include "smc.h" |
| #include "smc_ib.h" |
| #include "smc_pnet.h" |
| |
| static void smc_set_keepalive(struct sock *sk, int val) |
| { |
| struct smc_sock *smc = smc_sk(sk); |
| |
| smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); |
| } |
| |
| static struct proto smc_proto = { |
| .name = "SMC", |
| .owner = THIS_MODULE, |
| .keepalive = smc_set_keepalive, |
| .obj_size = sizeof(struct smc_sock), |
| .slab_flags = SLAB_DESTROY_BY_RCU, |
| }; |
| |
| static int smc_release(struct socket *sock) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| |
| if (!sk) |
| goto out; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| |
| sk->sk_state = SMC_CLOSED; |
| if (smc->clcsock) { |
| sock_release(smc->clcsock); |
| smc->clcsock = NULL; |
| } |
| |
| /* detach socket */ |
| sock_orphan(sk); |
| sock->sk = NULL; |
| release_sock(sk); |
| |
| sock_put(sk); |
| out: |
| return 0; |
| } |
| |
| static void smc_destruct(struct sock *sk) |
| { |
| if (sk->sk_state != SMC_CLOSED) |
| return; |
| if (!sock_flag(sk, SOCK_DEAD)) |
| return; |
| |
| sk_refcnt_debug_dec(sk); |
| } |
| |
| static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) |
| { |
| struct smc_sock *smc; |
| struct sock *sk; |
| |
| sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); |
| if (!sk) |
| return NULL; |
| |
| sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ |
| sk->sk_state = SMC_INIT; |
| sk->sk_destruct = smc_destruct; |
| sk->sk_protocol = SMCPROTO_SMC; |
| sk_refcnt_debug_inc(sk); |
| |
| smc = smc_sk(sk); |
| |
| return sk; |
| } |
| |
| static int smc_bind(struct socket *sock, struct sockaddr *uaddr, |
| int addr_len) |
| { |
| struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc; |
| |
| smc = smc_sk(sk); |
| |
| /* replicate tests from inet_bind(), to be safe wrt. future changes */ |
| rc = -EINVAL; |
| if (addr_len < sizeof(struct sockaddr_in)) |
| goto out; |
| |
| rc = -EAFNOSUPPORT; |
| /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ |
| if ((addr->sin_family != AF_INET) && |
| ((addr->sin_family != AF_UNSPEC) || |
| (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) |
| goto out; |
| |
| lock_sock(sk); |
| |
| /* Check if socket is already active */ |
| rc = -EINVAL; |
| if (sk->sk_state != SMC_INIT) |
| goto out_rel; |
| |
| smc->clcsock->sk->sk_reuse = sk->sk_reuse; |
| rc = kernel_bind(smc->clcsock, uaddr, addr_len); |
| |
| out_rel: |
| release_sock(sk); |
| out: |
| return rc; |
| } |
| |
| static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, |
| unsigned long mask) |
| { |
| /* options we don't get control via setsockopt for */ |
| nsk->sk_type = osk->sk_type; |
| nsk->sk_sndbuf = osk->sk_sndbuf; |
| nsk->sk_rcvbuf = osk->sk_rcvbuf; |
| nsk->sk_sndtimeo = osk->sk_sndtimeo; |
| nsk->sk_rcvtimeo = osk->sk_rcvtimeo; |
| nsk->sk_mark = osk->sk_mark; |
| nsk->sk_priority = osk->sk_priority; |
| nsk->sk_rcvlowat = osk->sk_rcvlowat; |
| nsk->sk_bound_dev_if = osk->sk_bound_dev_if; |
| nsk->sk_err = osk->sk_err; |
| |
| nsk->sk_flags &= ~mask; |
| nsk->sk_flags |= osk->sk_flags & mask; |
| } |
| |
| #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ |
| (1UL << SOCK_KEEPOPEN) | \ |
| (1UL << SOCK_LINGER) | \ |
| (1UL << SOCK_BROADCAST) | \ |
| (1UL << SOCK_TIMESTAMP) | \ |
| (1UL << SOCK_DBG) | \ |
| (1UL << SOCK_RCVTSTAMP) | \ |
| (1UL << SOCK_RCVTSTAMPNS) | \ |
| (1UL << SOCK_LOCALROUTE) | \ |
| (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ |
| (1UL << SOCK_RXQ_OVFL) | \ |
| (1UL << SOCK_WIFI_STATUS) | \ |
| (1UL << SOCK_NOFCS) | \ |
| (1UL << SOCK_FILTER_LOCKED)) |
| /* copy only relevant settings and flags of SOL_SOCKET level from smc to |
| * clc socket (since smc is not called for these options from net/core) |
| */ |
| static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) |
| { |
| smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); |
| } |
| |
| #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ |
| (1UL << SOCK_KEEPOPEN) | \ |
| (1UL << SOCK_LINGER) | \ |
| (1UL << SOCK_DBG)) |
| /* copy only settings and flags relevant for smc from clc to smc socket */ |
| static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) |
| { |
| smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); |
| } |
| |
| static int smc_connect(struct socket *sock, struct sockaddr *addr, |
| int alen, int flags) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -EINVAL; |
| |
| smc = smc_sk(sk); |
| |
| /* separate smc parameter checking to be safe */ |
| if (alen < sizeof(addr->sa_family)) |
| goto out_err; |
| if (addr->sa_family != AF_INET) |
| goto out_err; |
| |
| lock_sock(sk); |
| switch (sk->sk_state) { |
| default: |
| goto out; |
| case SMC_ACTIVE: |
| rc = -EISCONN; |
| goto out; |
| case SMC_INIT: |
| rc = 0; |
| break; |
| } |
| |
| smc_copy_sock_settings_to_clc(smc); |
| rc = kernel_connect(smc->clcsock, addr, alen, flags); |
| if (rc) |
| goto out; |
| |
| sk->sk_state = SMC_ACTIVE; |
| |
| /* always use TCP fallback as transport mechanism for now; |
| * This will change once RDMA transport is implemented |
| */ |
| smc->use_fallback = true; |
| |
| out: |
| release_sock(sk); |
| out_err: |
| return rc; |
| } |
| |
| static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) |
| { |
| struct sock *sk = &lsmc->sk; |
| struct socket *new_clcsock; |
| struct sock *new_sk; |
| int rc; |
| |
| new_sk = smc_sock_alloc(sock_net(sk), NULL); |
| if (!new_sk) { |
| rc = -ENOMEM; |
| lsmc->sk.sk_err = ENOMEM; |
| *new_smc = NULL; |
| goto out; |
| } |
| *new_smc = smc_sk(new_sk); |
| |
| rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); |
| if (rc) { |
| sock_put(new_sk); |
| *new_smc = NULL; |
| goto out; |
| } |
| |
| (*new_smc)->clcsock = new_clcsock; |
| out: |
| return rc; |
| } |
| |
| static int smc_listen(struct socket *sock, int backlog) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| |
| rc = -EINVAL; |
| if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) |
| goto out; |
| |
| rc = 0; |
| if (sk->sk_state == SMC_LISTEN) { |
| sk->sk_max_ack_backlog = backlog; |
| goto out; |
| } |
| /* some socket options are handled in core, so we could not apply |
| * them to the clc socket -- copy smc socket options to clc socket |
| */ |
| smc_copy_sock_settings_to_clc(smc); |
| |
| rc = kernel_listen(smc->clcsock, backlog); |
| if (rc) |
| goto out; |
| sk->sk_max_ack_backlog = backlog; |
| sk->sk_ack_backlog = 0; |
| sk->sk_state = SMC_LISTEN; |
| |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| static int smc_accept(struct socket *sock, struct socket *new_sock, |
| int flags) |
| { |
| struct smc_sock *new_smc; |
| struct sock *sk = sock->sk; |
| struct smc_sock *lsmc; |
| int rc; |
| |
| lsmc = smc_sk(sk); |
| lock_sock(sk); |
| |
| if (lsmc->sk.sk_state != SMC_LISTEN) { |
| rc = -EINVAL; |
| goto out; |
| } |
| |
| rc = smc_clcsock_accept(lsmc, &new_smc); |
| if (rc) |
| goto out; |
| sock_graft(&new_smc->sk, new_sock); |
| new_smc->sk.sk_state = SMC_ACTIVE; |
| |
| smc_copy_sock_settings_to_smc(new_smc); |
| |
| /* always use TCP fallback as transport mechanism for now; |
| * This will change once RDMA transport is implemented |
| */ |
| new_smc->use_fallback = true; |
| |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| static int smc_getname(struct socket *sock, struct sockaddr *addr, |
| int *len, int peer) |
| { |
| struct smc_sock *smc; |
| |
| if (peer && (sock->sk->sk_state != SMC_ACTIVE)) |
| return -ENOTCONN; |
| |
| smc = smc_sk(sock->sk); |
| |
| return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); |
| } |
| |
| static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -EPIPE; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| if (sk->sk_state != SMC_ACTIVE) |
| goto out; |
| if (smc->use_fallback) |
| rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); |
| else |
| rc = sock_no_sendmsg(sock, msg, len); |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, |
| int flags) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -ENOTCONN; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) |
| goto out; |
| |
| if (smc->use_fallback) |
| rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); |
| else |
| rc = sock_no_recvmsg(sock, msg, len, flags); |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| static unsigned int smc_poll(struct file *file, struct socket *sock, |
| poll_table *wait) |
| { |
| struct sock *sk = sock->sk; |
| unsigned int mask = 0; |
| struct smc_sock *smc; |
| |
| smc = smc_sk(sock->sk); |
| if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || |
| smc->use_fallback) { |
| mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); |
| /* if non-blocking connect finished ... */ |
| lock_sock(sk); |
| if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { |
| sk->sk_state = SMC_ACTIVE; |
| /* always use TCP fallback as transport mechanism; |
| * This will change once RDMA transport is implemented |
| */ |
| smc->use_fallback = true; |
| } |
| release_sock(sk); |
| } else { |
| mask = sock_no_poll(file, sock, wait); |
| } |
| |
| return mask; |
| } |
| |
| static int smc_shutdown(struct socket *sock, int how) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -EINVAL; |
| |
| smc = smc_sk(sk); |
| |
| if ((how < SHUT_RD) || (how > SHUT_RDWR)) |
| goto out_err; |
| |
| lock_sock(sk); |
| |
| rc = -ENOTCONN; |
| if (sk->sk_state == SMC_CLOSED) |
| goto out; |
| if (smc->use_fallback) { |
| rc = kernel_sock_shutdown(smc->clcsock, how); |
| sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; |
| if (sk->sk_shutdown == SHUTDOWN_MASK) |
| sk->sk_state = SMC_CLOSED; |
| } else { |
| rc = sock_no_shutdown(sock, how); |
| } |
| |
| out: |
| release_sock(sk); |
| |
| out_err: |
| return rc; |
| } |
| |
| static int smc_setsockopt(struct socket *sock, int level, int optname, |
| char __user *optval, unsigned int optlen) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| |
| smc = smc_sk(sk); |
| |
| /* generic setsockopts reaching us here always apply to the |
| * CLC socket |
| */ |
| return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, |
| optval, optlen); |
| } |
| |
| static int smc_getsockopt(struct socket *sock, int level, int optname, |
| char __user *optval, int __user *optlen) |
| { |
| struct smc_sock *smc; |
| |
| smc = smc_sk(sock->sk); |
| /* socket options apply to the CLC socket */ |
| return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, |
| optval, optlen); |
| } |
| |
| static int smc_ioctl(struct socket *sock, unsigned int cmd, |
| unsigned long arg) |
| { |
| struct smc_sock *smc; |
| |
| smc = smc_sk(sock->sk); |
| if (smc->use_fallback) |
| return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); |
| else |
| return sock_no_ioctl(sock, cmd, arg); |
| } |
| |
| static ssize_t smc_sendpage(struct socket *sock, struct page *page, |
| int offset, size_t size, int flags) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -EPIPE; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| if (sk->sk_state != SMC_ACTIVE) |
| goto out; |
| if (smc->use_fallback) |
| rc = kernel_sendpage(smc->clcsock, page, offset, |
| size, flags); |
| else |
| rc = sock_no_sendpage(sock, page, offset, size, flags); |
| |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, |
| struct pipe_inode_info *pipe, size_t len, |
| unsigned int flags) |
| { |
| struct sock *sk = sock->sk; |
| struct smc_sock *smc; |
| int rc = -ENOTCONN; |
| |
| smc = smc_sk(sk); |
| lock_sock(sk); |
| if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) |
| goto out; |
| if (smc->use_fallback) { |
| rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, |
| pipe, len, flags); |
| } else { |
| rc = -EOPNOTSUPP; |
| } |
| out: |
| release_sock(sk); |
| return rc; |
| } |
| |
| /* must look like tcp */ |
| static const struct proto_ops smc_sock_ops = { |
| .family = PF_SMC, |
| .owner = THIS_MODULE, |
| .release = smc_release, |
| .bind = smc_bind, |
| .connect = smc_connect, |
| .socketpair = sock_no_socketpair, |
| .accept = smc_accept, |
| .getname = smc_getname, |
| .poll = smc_poll, |
| .ioctl = smc_ioctl, |
| .listen = smc_listen, |
| .shutdown = smc_shutdown, |
| .setsockopt = smc_setsockopt, |
| .getsockopt = smc_getsockopt, |
| .sendmsg = smc_sendmsg, |
| .recvmsg = smc_recvmsg, |
| .mmap = sock_no_mmap, |
| .sendpage = smc_sendpage, |
| .splice_read = smc_splice_read, |
| }; |
| |
| static int smc_create(struct net *net, struct socket *sock, int protocol, |
| int kern) |
| { |
| struct smc_sock *smc; |
| struct sock *sk; |
| int rc; |
| |
| rc = -ESOCKTNOSUPPORT; |
| if (sock->type != SOCK_STREAM) |
| goto out; |
| |
| rc = -EPROTONOSUPPORT; |
| if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) |
| goto out; |
| |
| rc = -ENOBUFS; |
| sock->ops = &smc_sock_ops; |
| sk = smc_sock_alloc(net, sock); |
| if (!sk) |
| goto out; |
| |
| /* create internal TCP socket for CLC handshake and fallback */ |
| smc = smc_sk(sk); |
| rc = sock_create_kern(net, PF_INET, SOCK_STREAM, |
| IPPROTO_TCP, &smc->clcsock); |
| if (rc) |
| sk_common_release(sk); |
| |
| out: |
| return rc; |
| } |
| |
| static const struct net_proto_family smc_sock_family_ops = { |
| .family = PF_SMC, |
| .owner = THIS_MODULE, |
| .create = smc_create, |
| }; |
| |
| static int __init smc_init(void) |
| { |
| int rc; |
| |
| rc = smc_pnet_init(); |
| if (rc) |
| return rc; |
| |
| rc = proto_register(&smc_proto, 1); |
| if (rc) { |
| pr_err("%s: proto_register fails with %d\n", __func__, rc); |
| goto out_pnet; |
| } |
| |
| rc = sock_register(&smc_sock_family_ops); |
| if (rc) { |
| pr_err("%s: sock_register fails with %d\n", __func__, rc); |
| goto out_proto; |
| } |
| |
| rc = smc_ib_register_client(); |
| if (rc) { |
| pr_err("%s: ib_register fails with %d\n", __func__, rc); |
| goto out_sock; |
| } |
| |
| return 0; |
| |
| out_sock: |
| sock_unregister(PF_SMC); |
| out_proto: |
| proto_unregister(&smc_proto); |
| out_pnet: |
| smc_pnet_exit(); |
| return rc; |
| } |
| |
| static void __exit smc_exit(void) |
| { |
| smc_ib_unregister_client(); |
| sock_unregister(PF_SMC); |
| proto_unregister(&smc_proto); |
| smc_pnet_exit(); |
| } |
| |
| module_init(smc_init); |
| module_exit(smc_exit); |
| |
| MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); |
| MODULE_DESCRIPTION("smc socket address family"); |
| MODULE_LICENSE("GPL"); |
| MODULE_ALIAS_NETPROTO(PF_SMC); |