mptcp: Add MPTCP socket stubs
Implements the infrastructure for MPTCP sockets.
MPTCP sockets open one in-kernel TCP socket per subflow. These subflow
sockets are only managed by the MPTCP socket that owns them and are not
visible from userspace. This commit allows a userspace program to open
an MPTCP socket with:
sock = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP);
The resulting socket is simply a wrapper around a single regular TCP
socket, without any of the MPTCP protocol implemented over the wire.
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Co-developed-by: Peter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/Kconfig b/net/Kconfig
index 54916b7..b0937a7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -91,6 +91,7 @@
source "net/ipv4/Kconfig"
source "net/ipv6/Kconfig"
source "net/netlabel/Kconfig"
+source "net/mptcp/Kconfig"
endif # if INET
diff --git a/net/Makefile b/net/Makefile
index 848303d..07ea481 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -87,3 +87,4 @@
obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
+obj-$(CONFIG_MPTCP) += mptcp/
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6711a97..7dfb78c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
#include <net/icmp.h>
#include <net/inet_common.h>
#include <net/tcp.h>
+#include <net/mptcp.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -4021,4 +4022,5 @@ void __init tcp_init(void)
tcp_metrics_init();
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
tcp_tasklet_init();
+ mptcp_init();
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5b52601..60068ff 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2163,9 +2163,16 @@ int __init tcpv6_init(void)
ret = register_pernet_subsys(&tcpv6_net_ops);
if (ret)
goto out_tcpv6_protosw;
+
+ ret = mptcpv6_init();
+ if (ret)
+ goto out_tcpv6_pernet_subsys;
+
out:
return ret;
+out_tcpv6_pernet_subsys:
+ unregister_pernet_subsys(&tcpv6_net_ops);
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
new file mode 100644
index 0000000..c1a99f07a
--- /dev/null
+++ b/net/mptcp/Kconfig
@@ -0,0 +1,16 @@
+
+config MPTCP
+ bool "MPTCP: Multipath TCP"
+ depends on INET
+ select SKB_EXTENSIONS
+ help
+ Multipath TCP (MPTCP) connections send and receive data over multiple
+ subflows in order to utilize multiple network paths. Each subflow
+ uses the TCP protocol, and TCP options carry header information for
+ MPTCP.
+
+config MPTCP_IPV6
+ bool "MPTCP: IPv6 support for Multipath TCP"
+ depends on MPTCP
+ select IPV6
+ default y
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
new file mode 100644
index 0000000..659129d
--- /dev/null
+++ b/net/mptcp/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MPTCP) += mptcp.o
+
+mptcp-y := protocol.o
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
new file mode 100644
index 0000000..5e24e7c
--- /dev/null
+++ b/net/mptcp/protocol.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2017 - 2019, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <net/sock.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+#include "protocol.h"
+
+static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *subflow = msk->subflow;
+
+ if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+ return -EOPNOTSUPP;
+
+ return sock_sendmsg(subflow, msg);
+}
+
+static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags, int *addr_len)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *subflow = msk->subflow;
+
+ if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
+ return -EOPNOTSUPP;
+
+ return sock_recvmsg(subflow, msg, flags);
+}
+
+static int mptcp_init_sock(struct sock *sk)
+{
+ return 0;
+}
+
+static void mptcp_close(struct sock *sk, long timeout)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ inet_sk_state_store(sk, TCP_CLOSE);
+
+ if (msk->subflow) {
+ pr_debug("subflow=%p", msk->subflow->sk);
+ sock_release(msk->subflow);
+ }
+
+ sock_orphan(sk);
+ sock_put(sk);
+}
+
+static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int err;
+
+ saddr->sa_family = AF_INET;
+
+ pr_debug("msk=%p, subflow=%p", msk, msk->subflow->sk);
+
+ err = kernel_connect(msk->subflow, saddr, len, 0);
+
+ sk->sk_state = TCP_ESTABLISHED;
+
+ return err;
+}
+
+static struct proto mptcp_prot = {
+ .name = "MPTCP",
+ .owner = THIS_MODULE,
+ .init = mptcp_init_sock,
+ .close = mptcp_close,
+ .accept = inet_csk_accept,
+ .connect = mptcp_connect,
+ .shutdown = tcp_shutdown,
+ .sendmsg = mptcp_sendmsg,
+ .recvmsg = mptcp_recvmsg,
+ .hash = inet_hash,
+ .unhash = inet_unhash,
+ .get_port = inet_csk_get_port,
+ .obj_size = sizeof(struct mptcp_sock),
+ .no_autobind = true,
+};
+
+static struct inet_protosw mptcp_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ .prot = &mptcp_prot,
+ .ops = &inet_stream_ops,
+};
+
+void __init mptcp_init(void)
+{
+ if (proto_register(&mptcp_prot, 1) != 0)
+ panic("Failed to register MPTCP proto.\n");
+
+ inet_register_protosw(&mptcp_protosw);
+}
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static struct proto mptcp_v6_prot;
+
+static struct inet_protosw mptcp_v6_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ .prot = &mptcp_v6_prot,
+ .ops = &inet6_stream_ops,
+ .flags = INET_PROTOSW_ICSK,
+};
+
+int mptcpv6_init(void)
+{
+ int err;
+
+ mptcp_v6_prot = mptcp_prot;
+ strcpy(mptcp_v6_prot.name, "MPTCPv6");
+ mptcp_v6_prot.slab = NULL;
+ mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) +
+ sizeof(struct ipv6_pinfo);
+
+ err = proto_register(&mptcp_v6_prot, 1);
+ if (err)
+ return err;
+
+ err = inet6_register_protosw(&mptcp_v6_protosw);
+ if (err)
+ proto_unregister(&mptcp_v6_prot);
+
+ return err;
+}
+#endif
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
new file mode 100644
index 0000000..ee04a01
--- /dev/null
+++ b/net/mptcp/protocol.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Multipath TCP
+ *
+ * Copyright (c) 2017 - 2019, Intel Corporation.
+ */
+
+#ifndef __MPTCP_PROTOCOL_H
+#define __MPTCP_PROTOCOL_H
+
+/* MPTCP connection sock */
+struct mptcp_sock {
+ /* inet_connection_sock must be the first member */
+ struct inet_connection_sock sk;
+ struct socket *subflow; /* outgoing connect/listener/!mp_capable */
+};
+
+static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
+{
+ return (struct mptcp_sock *)sk;
+}
+
+#endif /* __MPTCP_PROTOCOL_H */