blob: 1d55563e9aca242c3e7fa72f5edda0a644e0f963 [file] [log] [blame]
Mat Martineauf870fa02020-01-21 16:56:15 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
7#define pr_fmt(fmt) "MPTCP: " fmt
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -080012#include <linux/sched/signal.h>
13#include <linux/atomic.h>
Mat Martineauf870fa02020-01-21 16:56:15 -080014#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
Peter Krystadcf7da0d2020-01-21 16:56:19 -080019#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/transp_v6.h>
21#endif
Mat Martineauf870fa02020-01-21 16:56:15 -080022#include <net/mptcp.h>
23#include "protocol.h"
24
Peter Krystad2303f992020-01-21 16:56:17 -080025#define MPTCP_SAME_STATE TCP_MAX_STATES
26
Florian Westphalb0519de2020-02-06 00:39:37 +010027#if IS_ENABLED(CONFIG_MPTCP_IPV6)
28struct mptcp6_sock {
29 struct mptcp_sock msk;
30 struct ipv6_pinfo np;
31};
32#endif
33
Peter Krystad2303f992020-01-21 16:56:17 -080034/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
35 * completed yet or has failed, return the subflow socket.
36 * Otherwise return NULL.
37 */
38static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
39{
Christoph Paaschd22f4982020-01-21 16:56:32 -080040 if (!msk->subflow || READ_ONCE(msk->can_ack))
Peter Krystad2303f992020-01-21 16:56:17 -080041 return NULL;
42
43 return msk->subflow;
44}
45
Paolo Abeni8ab183d2020-01-21 16:56:33 -080046static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
47{
48 return msk->first && !sk_is_mptcp(msk->first);
49}
50
Paolo Abeni8ab183d2020-01-21 16:56:33 -080051static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
Peter Krystadcec37a62020-01-21 16:56:18 -080052{
Peter Krystadcec37a62020-01-21 16:56:18 -080053 sock_owned_by_me((const struct sock *)msk);
54
Paolo Abeni8ab183d2020-01-21 16:56:33 -080055 if (likely(!__mptcp_needs_tcp_fallback(msk)))
Peter Krystadcec37a62020-01-21 16:56:18 -080056 return NULL;
57
Paolo Abeni8ab183d2020-01-21 16:56:33 -080058 if (msk->subflow) {
Florian Westphal2c22c062020-02-04 18:12:30 +010059 release_sock((struct sock *)msk);
60 return msk->subflow;
Paolo Abeni8ab183d2020-01-21 16:56:33 -080061 }
62
Florian Westphal2c22c062020-02-04 18:12:30 +010063 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -080064}
65
Peter Krystad2303f992020-01-21 16:56:17 -080066static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
67{
Paolo Abeni8ab183d2020-01-21 16:56:33 -080068 return !msk->first;
Peter Krystad2303f992020-01-21 16:56:17 -080069}
70
71static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
72{
73 struct mptcp_subflow_context *subflow;
74 struct sock *sk = (struct sock *)msk;
75 struct socket *ssock;
76 int err;
77
78 ssock = __mptcp_nmpc_socket(msk);
79 if (ssock)
80 goto set_state;
81
82 if (!__mptcp_can_create_subflow(msk))
83 return ERR_PTR(-EINVAL);
84
85 err = mptcp_subflow_create_socket(sk, &ssock);
86 if (err)
87 return ERR_PTR(err);
88
Paolo Abeni8ab183d2020-01-21 16:56:33 -080089 msk->first = ssock->sk;
Peter Krystad2303f992020-01-21 16:56:17 -080090 msk->subflow = ssock;
91 subflow = mptcp_subflow_ctx(ssock->sk);
Peter Krystadcec37a62020-01-21 16:56:18 -080092 list_add(&subflow->node, &msk->conn_list);
Peter Krystad2303f992020-01-21 16:56:17 -080093 subflow->request_mptcp = 1;
94
95set_state:
96 if (state != MPTCP_SAME_STATE)
97 inet_sk_state_store(sk, state);
98 return ssock;
99}
100
Peter Krystadcec37a62020-01-21 16:56:18 -0800101static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
102{
103 struct mptcp_subflow_context *subflow;
104
105 sock_owned_by_me((const struct sock *)msk);
106
107 mptcp_for_each_subflow(msk, subflow) {
108 return mptcp_subflow_tcp_sock(subflow);
109 }
110
111 return NULL;
112}
113
Florian Westphal101f6f82020-02-26 10:14:46 +0100114void mptcp_data_ready(struct sock *sk)
115{
116 struct mptcp_sock *msk = mptcp_sk(sk);
117
118 set_bit(MPTCP_DATA_READY, &msk->flags);
119 sk->sk_data_ready(sk);
120}
121
Mat Martineau6d0060f2020-01-21 16:56:23 -0800122static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
123{
124 if (!msk->cached_ext)
125 msk->cached_ext = __skb_ext_alloc();
126
127 return !!msk->cached_ext;
128}
129
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800130static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
131{
132 struct mptcp_subflow_context *subflow;
133 struct sock *sk = (struct sock *)msk;
134
135 sock_owned_by_me(sk);
136
137 mptcp_for_each_subflow(msk, subflow) {
138 if (subflow->data_avail)
139 return mptcp_subflow_tcp_sock(subflow);
140 }
141
142 return NULL;
143}
144
Paolo Abeni57040752020-01-21 16:56:27 -0800145static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
146 const struct sk_buff *skb,
147 const struct mptcp_ext *mpext)
Mat Martineau6d0060f2020-01-21 16:56:23 -0800148{
Paolo Abeni57040752020-01-21 16:56:27 -0800149 if (!tcp_skb_can_collapse_to(skb))
150 return false;
151
152 /* can collapse only if MPTCP level sequence is in order */
153 return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
154}
155
156static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
157 struct msghdr *msg, long *timeo, int *pmss_now,
158 int *ps_goal)
159{
160 int mss_now, avail_size, size_goal, ret;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800161 struct mptcp_sock *msk = mptcp_sk(sk);
162 struct mptcp_ext *mpext = NULL;
Paolo Abeni57040752020-01-21 16:56:27 -0800163 struct sk_buff *skb, *tail;
164 bool can_collapse = false;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800165 struct page_frag *pfrag;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800166 size_t psize;
167
168 /* use the mptcp page cache so that we can easily move the data
169 * from one substream to another, but do per subflow memory accounting
170 */
171 pfrag = sk_page_frag(sk);
172 while (!sk_page_frag_refill(ssk, pfrag) ||
173 !mptcp_ext_cache_refill(msk)) {
174 ret = sk_stream_wait_memory(ssk, timeo);
175 if (ret)
176 return ret;
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800177 if (unlikely(__mptcp_needs_tcp_fallback(msk)))
178 return 0;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800179 }
180
181 /* compute copy limit */
182 mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
Paolo Abeni57040752020-01-21 16:56:27 -0800183 *pmss_now = mss_now;
184 *ps_goal = size_goal;
185 avail_size = size_goal;
186 skb = tcp_write_queue_tail(ssk);
187 if (skb) {
188 mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
Mat Martineau6d0060f2020-01-21 16:56:23 -0800189
Paolo Abeni57040752020-01-21 16:56:27 -0800190 /* Limit the write to the size available in the
191 * current skb, if any, so that we create at most a new skb.
192 * Explicitly tells TCP internals to avoid collapsing on later
193 * queue management operation, to avoid breaking the ext <->
194 * SSN association set here
195 */
196 can_collapse = (size_goal - skb->len > 0) &&
197 mptcp_skb_can_collapse_to(msk, skb, mpext);
198 if (!can_collapse)
199 TCP_SKB_CB(skb)->eor = 1;
200 else
201 avail_size = size_goal - skb->len;
202 }
203 psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
204
205 /* Copy to page */
Mat Martineau6d0060f2020-01-21 16:56:23 -0800206 pr_debug("left=%zu", msg_data_left(msg));
207 psize = copy_page_from_iter(pfrag->page, pfrag->offset,
208 min_t(size_t, msg_data_left(msg), psize),
209 &msg->msg_iter);
210 pr_debug("left=%zu", msg_data_left(msg));
211 if (!psize)
212 return -EINVAL;
213
Paolo Abeni57040752020-01-21 16:56:27 -0800214 /* tell the TCP stack to delay the push so that we can safely
215 * access the skb after the sendpages call
Mat Martineau6d0060f2020-01-21 16:56:23 -0800216 */
Mat Martineau6d0060f2020-01-21 16:56:23 -0800217 ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
218 msg->msg_flags | MSG_SENDPAGE_NOTLAST);
219 if (ret <= 0)
220 return ret;
221 if (unlikely(ret < psize))
222 iov_iter_revert(&msg->msg_iter, psize - ret);
223
Paolo Abeni57040752020-01-21 16:56:27 -0800224 /* if the tail skb extension is still the cached one, collapsing
225 * really happened. Note: we can't check for 'same skb' as the sk_buff
226 * hdr on tail can be transmitted, freed and re-allocated by the
227 * do_tcp_sendpages() call
228 */
229 tail = tcp_write_queue_tail(ssk);
230 if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) {
231 WARN_ON_ONCE(!can_collapse);
232 mpext->data_len += ret;
233 goto out;
234 }
235
Mat Martineau6d0060f2020-01-21 16:56:23 -0800236 skb = tcp_write_queue_tail(ssk);
237 mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
238 msk->cached_ext = NULL;
239
240 memset(mpext, 0, sizeof(*mpext));
241 mpext->data_seq = msk->write_seq;
242 mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
243 mpext->data_len = ret;
244 mpext->use_map = 1;
245 mpext->dsn64 = 1;
246
247 pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
248 mpext->data_seq, mpext->subflow_seq, mpext->data_len,
249 mpext->dsn64);
250
Paolo Abeni57040752020-01-21 16:56:27 -0800251out:
Mat Martineau6d0060f2020-01-21 16:56:23 -0800252 pfrag->offset += ret;
253 msk->write_seq += ret;
254 mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
255
Mat Martineau6d0060f2020-01-21 16:56:23 -0800256 return ret;
257}
258
Florian Westphal1891c4a2020-01-21 16:56:25 -0800259static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
260{
261 struct socket *sock;
262
263 if (likely(sk_stream_is_writeable(ssk)))
264 return;
265
266 sock = READ_ONCE(ssk->sk_socket);
267
268 if (sock) {
269 clear_bit(MPTCP_SEND_SPACE, &msk->flags);
270 smp_mb__after_atomic();
271 /* set NOSPACE only after clearing SEND_SPACE flag */
272 set_bit(SOCK_NOSPACE, &sock->flags);
273 }
274}
275
Mat Martineauf870fa02020-01-21 16:56:15 -0800276static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
277{
Paolo Abeni57040752020-01-21 16:56:27 -0800278 int mss_now = 0, size_goal = 0, ret = 0;
Mat Martineauf870fa02020-01-21 16:56:15 -0800279 struct mptcp_sock *msk = mptcp_sk(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800280 struct socket *ssock;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800281 size_t copied = 0;
Peter Krystadcec37a62020-01-21 16:56:18 -0800282 struct sock *ssk;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800283 long timeo;
Mat Martineauf870fa02020-01-21 16:56:15 -0800284
285 if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
286 return -EOPNOTSUPP;
287
Peter Krystadcec37a62020-01-21 16:56:18 -0800288 lock_sock(sk);
289 ssock = __mptcp_tcp_fallback(msk);
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800290 if (unlikely(ssock)) {
291fallback:
Peter Krystadcec37a62020-01-21 16:56:18 -0800292 pr_debug("fallback passthrough");
293 ret = sock_sendmsg(ssock, msg);
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800294 return ret >= 0 ? ret + copied : (copied ? copied : ret);
Peter Krystadcec37a62020-01-21 16:56:18 -0800295 }
296
Mat Martineau6d0060f2020-01-21 16:56:23 -0800297 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
298
Peter Krystadcec37a62020-01-21 16:56:18 -0800299 ssk = mptcp_subflow_get(msk);
300 if (!ssk) {
301 release_sock(sk);
302 return -ENOTCONN;
303 }
304
Mat Martineau6d0060f2020-01-21 16:56:23 -0800305 pr_debug("conn_list->subflow=%p", ssk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800306
Mat Martineau6d0060f2020-01-21 16:56:23 -0800307 lock_sock(ssk);
308 while (msg_data_left(msg)) {
Paolo Abeni57040752020-01-21 16:56:27 -0800309 ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
310 &size_goal);
Mat Martineau6d0060f2020-01-21 16:56:23 -0800311 if (ret < 0)
312 break;
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800313 if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
314 release_sock(ssk);
315 ssock = __mptcp_tcp_fallback(msk);
316 goto fallback;
317 }
Mat Martineau6d0060f2020-01-21 16:56:23 -0800318
319 copied += ret;
320 }
321
Paolo Abeni57040752020-01-21 16:56:27 -0800322 if (copied) {
Mat Martineau6d0060f2020-01-21 16:56:23 -0800323 ret = copied;
Paolo Abeni57040752020-01-21 16:56:27 -0800324 tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
325 size_goal);
326 }
Mat Martineau6d0060f2020-01-21 16:56:23 -0800327
Florian Westphal1891c4a2020-01-21 16:56:25 -0800328 ssk_check_wmem(msk, ssk);
Mat Martineau6d0060f2020-01-21 16:56:23 -0800329 release_sock(ssk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800330 release_sock(sk);
331 return ret;
Mat Martineauf870fa02020-01-21 16:56:15 -0800332}
333
Mat Martineau648ef4b2020-01-21 16:56:24 -0800334int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb,
335 unsigned int offset, size_t len)
336{
337 struct mptcp_read_arg *arg = desc->arg.data;
338 size_t copy_len;
339
340 copy_len = min(desc->count, len);
341
342 if (likely(arg->msg)) {
343 int err;
344
345 err = skb_copy_datagram_msg(skb, offset, arg->msg, copy_len);
346 if (err) {
347 pr_debug("error path");
348 desc->error = err;
349 return err;
350 }
351 } else {
352 pr_debug("Flushing skb payload");
353 }
354
355 desc->count -= copy_len;
356
357 pr_debug("consumed %zu bytes, %zu left", copy_len, desc->count);
358 return copy_len;
359}
360
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800361static void mptcp_wait_data(struct sock *sk, long *timeo)
362{
363 DEFINE_WAIT_FUNC(wait, woken_wake_function);
364 struct mptcp_sock *msk = mptcp_sk(sk);
365
366 add_wait_queue(sk_sleep(sk), &wait);
367 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
368
369 sk_wait_event(sk, timeo,
370 test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
371
372 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
373 remove_wait_queue(sk_sleep(sk), &wait);
374}
375
Mat Martineauf870fa02020-01-21 16:56:15 -0800376static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
377 int nonblock, int flags, int *addr_len)
378{
379 struct mptcp_sock *msk = mptcp_sk(sk);
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800380 struct mptcp_subflow_context *subflow;
381 bool more_data_avail = false;
382 struct mptcp_read_arg arg;
383 read_descriptor_t desc;
384 bool wait_data = false;
Peter Krystadcec37a62020-01-21 16:56:18 -0800385 struct socket *ssock;
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800386 struct tcp_sock *tp;
387 bool done = false;
Peter Krystadcec37a62020-01-21 16:56:18 -0800388 struct sock *ssk;
389 int copied = 0;
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800390 int target;
391 long timeo;
Mat Martineauf870fa02020-01-21 16:56:15 -0800392
393 if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
394 return -EOPNOTSUPP;
395
Peter Krystadcec37a62020-01-21 16:56:18 -0800396 lock_sock(sk);
397 ssock = __mptcp_tcp_fallback(msk);
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800398 if (unlikely(ssock)) {
399fallback:
Peter Krystadcec37a62020-01-21 16:56:18 -0800400 pr_debug("fallback-read subflow=%p",
401 mptcp_subflow_ctx(ssock->sk));
402 copied = sock_recvmsg(ssock, msg, flags);
Peter Krystadcec37a62020-01-21 16:56:18 -0800403 return copied;
404 }
405
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800406 arg.msg = msg;
407 desc.arg.data = &arg;
408 desc.error = 0;
409
410 timeo = sock_rcvtimeo(sk, nonblock);
411
412 len = min_t(size_t, len, INT_MAX);
413 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
414
415 while (!done) {
416 u32 map_remaining;
417 int bytes_read;
418
419 ssk = mptcp_subflow_recv_lookup(msk);
420 pr_debug("msk=%p ssk=%p", msk, ssk);
421 if (!ssk)
422 goto wait_for_data;
423
424 subflow = mptcp_subflow_ctx(ssk);
425 tp = tcp_sk(ssk);
426
427 lock_sock(ssk);
428 do {
429 /* try to read as much data as available */
430 map_remaining = subflow->map_data_len -
431 mptcp_subflow_get_map_offset(subflow);
432 desc.count = min_t(size_t, len - copied, map_remaining);
433 pr_debug("reading %zu bytes, copied %d", desc.count,
434 copied);
435 bytes_read = tcp_read_sock(ssk, &desc,
436 mptcp_read_actor);
437 if (bytes_read < 0) {
438 if (!copied)
439 copied = bytes_read;
440 done = true;
441 goto next;
442 }
443
444 pr_debug("msk ack_seq=%llx -> %llx", msk->ack_seq,
445 msk->ack_seq + bytes_read);
446 msk->ack_seq += bytes_read;
447 copied += bytes_read;
448 if (copied >= len) {
449 done = true;
450 goto next;
451 }
452 if (tp->urg_data && tp->urg_seq == tp->copied_seq) {
453 pr_err("Urgent data present, cannot proceed");
454 done = true;
455 goto next;
456 }
457next:
458 more_data_avail = mptcp_subflow_data_available(ssk);
459 } while (more_data_avail && !done);
460 release_sock(ssk);
461 continue;
462
463wait_for_data:
464 more_data_avail = false;
465
466 /* only the master socket status is relevant here. The exit
467 * conditions mirror closely tcp_recvmsg()
468 */
469 if (copied >= target)
470 break;
471
472 if (copied) {
473 if (sk->sk_err ||
474 sk->sk_state == TCP_CLOSE ||
475 (sk->sk_shutdown & RCV_SHUTDOWN) ||
476 !timeo ||
477 signal_pending(current))
478 break;
479 } else {
480 if (sk->sk_err) {
481 copied = sock_error(sk);
482 break;
483 }
484
485 if (sk->sk_shutdown & RCV_SHUTDOWN)
486 break;
487
488 if (sk->sk_state == TCP_CLOSE) {
489 copied = -ENOTCONN;
490 break;
491 }
492
493 if (!timeo) {
494 copied = -EAGAIN;
495 break;
496 }
497
498 if (signal_pending(current)) {
499 copied = sock_intr_errno(timeo);
500 break;
501 }
502 }
503
504 pr_debug("block timeout %ld", timeo);
505 wait_data = true;
506 mptcp_wait_data(sk, &timeo);
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800507 if (unlikely(__mptcp_tcp_fallback(msk)))
508 goto fallback;
Peter Krystadcec37a62020-01-21 16:56:18 -0800509 }
510
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800511 if (more_data_avail) {
512 if (!test_bit(MPTCP_DATA_READY, &msk->flags))
513 set_bit(MPTCP_DATA_READY, &msk->flags);
514 } else if (!wait_data) {
515 clear_bit(MPTCP_DATA_READY, &msk->flags);
516
517 /* .. race-breaker: ssk might get new data after last
518 * data_available() returns false.
519 */
520 ssk = mptcp_subflow_recv_lookup(msk);
521 if (unlikely(ssk))
522 set_bit(MPTCP_DATA_READY, &msk->flags);
523 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800524
525 release_sock(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800526 return copied;
527}
528
529/* subflow sockets can be either outgoing (connect) or incoming
530 * (accept).
531 *
532 * Outgoing subflows use in-kernel sockets.
533 * Incoming subflows do not have their own 'struct socket' allocated,
534 * so we need to use tcp_close() after detaching them from the mptcp
535 * parent socket.
536 */
537static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
538 struct mptcp_subflow_context *subflow,
539 long timeout)
540{
541 struct socket *sock = READ_ONCE(ssk->sk_socket);
542
543 list_del(&subflow->node);
544
545 if (sock && sock != sk->sk_socket) {
546 /* outgoing subflow */
547 sock_release(sock);
548 } else {
549 /* incoming subflow */
550 tcp_close(ssk, timeout);
551 }
Mat Martineauf870fa02020-01-21 16:56:15 -0800552}
553
Matthieu Baerts784325e2020-01-21 16:56:28 -0800554static int __mptcp_init_sock(struct sock *sk)
Mat Martineauf870fa02020-01-21 16:56:15 -0800555{
Peter Krystadcec37a62020-01-21 16:56:18 -0800556 struct mptcp_sock *msk = mptcp_sk(sk);
557
558 INIT_LIST_HEAD(&msk->conn_list);
Florian Westphal1891c4a2020-01-21 16:56:25 -0800559 __set_bit(MPTCP_SEND_SPACE, &msk->flags);
Peter Krystadcec37a62020-01-21 16:56:18 -0800560
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800561 msk->first = NULL;
562
Mat Martineauf870fa02020-01-21 16:56:15 -0800563 return 0;
564}
565
Matthieu Baerts784325e2020-01-21 16:56:28 -0800566static int mptcp_init_sock(struct sock *sk)
567{
568 if (!mptcp_is_enabled(sock_net(sk)))
569 return -ENOPROTOOPT;
570
571 return __mptcp_init_sock(sk);
572}
573
Peter Krystad21498492020-01-21 16:56:21 -0800574static void mptcp_subflow_shutdown(struct sock *ssk, int how)
575{
576 lock_sock(ssk);
577
578 switch (ssk->sk_state) {
579 case TCP_LISTEN:
580 if (!(how & RCV_SHUTDOWN))
581 break;
582 /* fall through */
583 case TCP_SYN_SENT:
584 tcp_disconnect(ssk, O_NONBLOCK);
585 break;
586 default:
587 ssk->sk_shutdown |= how;
588 tcp_shutdown(ssk, how);
589 break;
590 }
591
592 /* Wake up anyone sleeping in poll. */
593 ssk->sk_state_change(ssk);
594 release_sock(ssk);
595}
596
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800597/* Called with msk lock held, releases such lock before returning */
Florian Westphal2c22c062020-02-04 18:12:30 +0100598static void mptcp_close(struct sock *sk, long timeout)
Mat Martineauf870fa02020-01-21 16:56:15 -0800599{
Peter Krystadcec37a62020-01-21 16:56:18 -0800600 struct mptcp_subflow_context *subflow, *tmp;
Mat Martineauf870fa02020-01-21 16:56:15 -0800601 struct mptcp_sock *msk = mptcp_sk(sk);
Florian Westphalb2c5b612020-01-29 15:54:45 +0100602 LIST_HEAD(conn_list);
Mat Martineauf870fa02020-01-21 16:56:15 -0800603
Florian Westphal2c22c062020-02-04 18:12:30 +0100604 lock_sock(sk);
605
Peter Krystad79c09492020-01-21 16:56:20 -0800606 mptcp_token_destroy(msk->token);
Mat Martineauf870fa02020-01-21 16:56:15 -0800607 inet_sk_state_store(sk, TCP_CLOSE);
608
Florian Westphalb2c5b612020-01-29 15:54:45 +0100609 list_splice_init(&msk->conn_list, &conn_list);
610
611 release_sock(sk);
612
613 list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
Peter Krystadcec37a62020-01-21 16:56:18 -0800614 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
615
616 __mptcp_close_ssk(sk, ssk, subflow, timeout);
Mat Martineauf870fa02020-01-21 16:56:15 -0800617 }
618
Peter Krystadcec37a62020-01-21 16:56:18 -0800619 sk_common_release(sk);
Mat Martineauf870fa02020-01-21 16:56:15 -0800620}
621
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800622static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
623{
624#if IS_ENABLED(CONFIG_MPTCP_IPV6)
625 const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
626 struct ipv6_pinfo *msk6 = inet6_sk(msk);
627
628 msk->sk_v6_daddr = ssk->sk_v6_daddr;
629 msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
630
631 if (msk6 && ssk6) {
632 msk6->saddr = ssk6->saddr;
633 msk6->flow_label = ssk6->flow_label;
634 }
635#endif
636
637 inet_sk(msk)->inet_num = inet_sk(ssk)->inet_num;
638 inet_sk(msk)->inet_dport = inet_sk(ssk)->inet_dport;
639 inet_sk(msk)->inet_sport = inet_sk(ssk)->inet_sport;
640 inet_sk(msk)->inet_daddr = inet_sk(ssk)->inet_daddr;
641 inet_sk(msk)->inet_saddr = inet_sk(ssk)->inet_saddr;
642 inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
643}
644
Florian Westphalb0519de2020-02-06 00:39:37 +0100645#if IS_ENABLED(CONFIG_MPTCP_IPV6)
646static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
647{
648 unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo);
649
650 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
651}
652#endif
653
Chen Wandun5609e2b2020-02-10 16:27:59 +0800654static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
Florian Westphalb0519de2020-02-06 00:39:37 +0100655{
656 struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
657
658 if (!nsk)
659 return NULL;
660
661#if IS_ENABLED(CONFIG_MPTCP_IPV6)
662 if (nsk->sk_family == AF_INET6)
663 inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
664#endif
665
666 return nsk;
667}
668
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800669static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
670 bool kern)
671{
672 struct mptcp_sock *msk = mptcp_sk(sk);
673 struct socket *listener;
674 struct sock *newsk;
675
676 listener = __mptcp_nmpc_socket(msk);
677 if (WARN_ON_ONCE(!listener)) {
678 *err = -EINVAL;
679 return NULL;
680 }
681
682 pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
683 newsk = inet_csk_accept(listener->sk, flags, err, kern);
684 if (!newsk)
685 return NULL;
686
687 pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
688
689 if (sk_is_mptcp(newsk)) {
690 struct mptcp_subflow_context *subflow;
691 struct sock *new_mptcp_sock;
692 struct sock *ssk = newsk;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800693 u64 ack_seq;
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800694
695 subflow = mptcp_subflow_ctx(newsk);
696 lock_sock(sk);
697
698 local_bh_disable();
Florian Westphalb0519de2020-02-06 00:39:37 +0100699 new_mptcp_sock = mptcp_sk_clone_lock(sk);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800700 if (!new_mptcp_sock) {
701 *err = -ENOBUFS;
702 local_bh_enable();
703 release_sock(sk);
Peter Krystad21498492020-01-21 16:56:21 -0800704 mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800705 tcp_close(newsk, 0);
706 return NULL;
707 }
708
Matthieu Baerts784325e2020-01-21 16:56:28 -0800709 __mptcp_init_sock(new_mptcp_sock);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800710
711 msk = mptcp_sk(new_mptcp_sock);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800712 msk->local_key = subflow->local_key;
Peter Krystad79c09492020-01-21 16:56:20 -0800713 msk->token = subflow->token;
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800714 msk->subflow = NULL;
Paolo Abeni8ab183d2020-01-21 16:56:33 -0800715 msk->first = newsk;
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800716
Peter Krystad79c09492020-01-21 16:56:20 -0800717 mptcp_token_update_accept(newsk, new_mptcp_sock);
Mat Martineau6d0060f2020-01-21 16:56:23 -0800718
Mat Martineau6d0060f2020-01-21 16:56:23 -0800719 msk->write_seq = subflow->idsn + 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800720 if (subflow->can_ack) {
721 msk->can_ack = true;
722 msk->remote_key = subflow->remote_key;
723 mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
724 ack_seq++;
725 msk->ack_seq = ack_seq;
726 }
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800727 newsk = new_mptcp_sock;
728 mptcp_copy_inaddrs(newsk, ssk);
729 list_add(&subflow->node, &msk->conn_list);
730
731 /* will be fully established at mptcp_stream_accept()
732 * completion.
733 */
734 inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
735 bh_unlock_sock(new_mptcp_sock);
736 local_bh_enable();
737 release_sock(sk);
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800738
739 /* the subflow can already receive packet, avoid racing with
740 * the receive path and process the pending ones
741 */
742 lock_sock(ssk);
Paolo Abeni7a6a6cb2020-01-21 16:56:26 -0800743 subflow->rel_write_seq = 1;
744 subflow->tcp_sock = ssk;
745 subflow->conn = new_mptcp_sock;
746 if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
747 mptcp_subflow_data_available(ssk);
748 release_sock(ssk);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800749 }
750
751 return newsk;
752}
753
Peter Krystad79c09492020-01-21 16:56:20 -0800754static void mptcp_destroy(struct sock *sk)
755{
Florian Westphalc9fd9c52020-01-29 15:54:43 +0100756 struct mptcp_sock *msk = mptcp_sk(sk);
757
758 if (msk->cached_ext)
759 __skb_ext_put(msk->cached_ext);
Peter Krystad79c09492020-01-21 16:56:20 -0800760}
761
Peter Krystad717e79c2020-01-21 16:56:22 -0800762static int mptcp_setsockopt(struct sock *sk, int level, int optname,
Florian Westphal50e741b2020-01-29 15:54:44 +0100763 char __user *optval, unsigned int optlen)
Peter Krystad717e79c2020-01-21 16:56:22 -0800764{
765 struct mptcp_sock *msk = mptcp_sk(sk);
Peter Krystad717e79c2020-01-21 16:56:22 -0800766 struct socket *ssock;
Peter Krystad717e79c2020-01-21 16:56:22 -0800767
768 pr_debug("msk=%p", msk);
769
770 /* @@ the meaning of setsockopt() when the socket is connected and
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800771 * there are multiple subflows is not yet defined. It is up to the
772 * MPTCP-level socket to configure the subflows until the subflow
773 * is in TCP fallback, when TCP socket options are passed through
774 * to the one remaining subflow.
Peter Krystad717e79c2020-01-21 16:56:22 -0800775 */
776 lock_sock(sk);
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800777 ssock = __mptcp_tcp_fallback(msk);
778 if (ssock)
779 return tcp_setsockopt(ssock->sk, level, optname, optval,
780 optlen);
Florian Westphal50e741b2020-01-29 15:54:44 +0100781
Peter Krystad717e79c2020-01-21 16:56:22 -0800782 release_sock(sk);
783
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800784 return -EOPNOTSUPP;
Peter Krystad717e79c2020-01-21 16:56:22 -0800785}
786
787static int mptcp_getsockopt(struct sock *sk, int level, int optname,
Florian Westphal50e741b2020-01-29 15:54:44 +0100788 char __user *optval, int __user *option)
Peter Krystad717e79c2020-01-21 16:56:22 -0800789{
790 struct mptcp_sock *msk = mptcp_sk(sk);
Peter Krystad717e79c2020-01-21 16:56:22 -0800791 struct socket *ssock;
Peter Krystad717e79c2020-01-21 16:56:22 -0800792
793 pr_debug("msk=%p", msk);
794
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800795 /* @@ the meaning of setsockopt() when the socket is connected and
796 * there are multiple subflows is not yet defined. It is up to the
797 * MPTCP-level socket to configure the subflows until the subflow
798 * is in TCP fallback, when socket options are passed through
799 * to the one remaining subflow.
Peter Krystad717e79c2020-01-21 16:56:22 -0800800 */
801 lock_sock(sk);
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800802 ssock = __mptcp_tcp_fallback(msk);
803 if (ssock)
804 return tcp_getsockopt(ssock->sk, level, optname, optval,
805 option);
Florian Westphal50e741b2020-01-29 15:54:44 +0100806
Peter Krystad717e79c2020-01-21 16:56:22 -0800807 release_sock(sk);
808
Mat Martineaub6e4a1a2020-02-14 14:14:29 -0800809 return -EOPNOTSUPP;
Peter Krystad717e79c2020-01-21 16:56:22 -0800810}
811
Peter Krystadcec37a62020-01-21 16:56:18 -0800812static int mptcp_get_port(struct sock *sk, unsigned short snum)
Mat Martineauf870fa02020-01-21 16:56:15 -0800813{
814 struct mptcp_sock *msk = mptcp_sk(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800815 struct socket *ssock;
Mat Martineauf870fa02020-01-21 16:56:15 -0800816
Peter Krystadcec37a62020-01-21 16:56:18 -0800817 ssock = __mptcp_nmpc_socket(msk);
818 pr_debug("msk=%p, subflow=%p", msk, ssock);
819 if (WARN_ON_ONCE(!ssock))
820 return -EINVAL;
Mat Martineauf870fa02020-01-21 16:56:15 -0800821
Peter Krystadcec37a62020-01-21 16:56:18 -0800822 return inet_csk_get_port(ssock->sk, snum);
823}
Mat Martineauf870fa02020-01-21 16:56:15 -0800824
Peter Krystadcec37a62020-01-21 16:56:18 -0800825void mptcp_finish_connect(struct sock *ssk)
826{
827 struct mptcp_subflow_context *subflow;
828 struct mptcp_sock *msk;
829 struct sock *sk;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800830 u64 ack_seq;
Mat Martineauf870fa02020-01-21 16:56:15 -0800831
Peter Krystadcec37a62020-01-21 16:56:18 -0800832 subflow = mptcp_subflow_ctx(ssk);
Mat Martineauf870fa02020-01-21 16:56:15 -0800833
Peter Krystadcec37a62020-01-21 16:56:18 -0800834 if (!subflow->mp_capable)
835 return;
836
837 sk = subflow->conn;
838 msk = mptcp_sk(sk);
839
Mat Martineau648ef4b2020-01-21 16:56:24 -0800840 pr_debug("msk=%p, token=%u", sk, subflow->token);
841
Mat Martineau6d0060f2020-01-21 16:56:23 -0800842 mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
843 ack_seq++;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800844 subflow->map_seq = ack_seq;
845 subflow->map_subflow_seq = 1;
Mat Martineau6d0060f2020-01-21 16:56:23 -0800846 subflow->rel_write_seq = 1;
847
Peter Krystadcec37a62020-01-21 16:56:18 -0800848 /* the socket is not connected yet, no msk/subflow ops can access/race
849 * accessing the field below
850 */
851 WRITE_ONCE(msk->remote_key, subflow->remote_key);
852 WRITE_ONCE(msk->local_key, subflow->local_key);
Peter Krystad79c09492020-01-21 16:56:20 -0800853 WRITE_ONCE(msk->token, subflow->token);
Mat Martineau6d0060f2020-01-21 16:56:23 -0800854 WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
855 WRITE_ONCE(msk->ack_seq, ack_seq);
Christoph Paaschd22f4982020-01-21 16:56:32 -0800856 WRITE_ONCE(msk->can_ack, 1);
Mat Martineauf870fa02020-01-21 16:56:15 -0800857}
858
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800859static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
860{
861 write_lock_bh(&sk->sk_callback_lock);
862 rcu_assign_pointer(sk->sk_wq, &parent->wq);
863 sk_set_socket(sk, parent);
864 sk->sk_uid = SOCK_INODE(parent)->i_uid;
865 write_unlock_bh(&sk->sk_callback_lock);
866}
867
Florian Westphal1891c4a2020-01-21 16:56:25 -0800868static bool mptcp_memory_free(const struct sock *sk, int wake)
869{
870 struct mptcp_sock *msk = mptcp_sk(sk);
871
872 return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true;
873}
874
Mat Martineauf870fa02020-01-21 16:56:15 -0800875static struct proto mptcp_prot = {
876 .name = "MPTCP",
877 .owner = THIS_MODULE,
878 .init = mptcp_init_sock,
879 .close = mptcp_close,
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800880 .accept = mptcp_accept,
Peter Krystad717e79c2020-01-21 16:56:22 -0800881 .setsockopt = mptcp_setsockopt,
882 .getsockopt = mptcp_getsockopt,
Mat Martineauf870fa02020-01-21 16:56:15 -0800883 .shutdown = tcp_shutdown,
Peter Krystad79c09492020-01-21 16:56:20 -0800884 .destroy = mptcp_destroy,
Mat Martineauf870fa02020-01-21 16:56:15 -0800885 .sendmsg = mptcp_sendmsg,
886 .recvmsg = mptcp_recvmsg,
887 .hash = inet_hash,
888 .unhash = inet_unhash,
Peter Krystadcec37a62020-01-21 16:56:18 -0800889 .get_port = mptcp_get_port,
Florian Westphal1891c4a2020-01-21 16:56:25 -0800890 .stream_memory_free = mptcp_memory_free,
Mat Martineauf870fa02020-01-21 16:56:15 -0800891 .obj_size = sizeof(struct mptcp_sock),
892 .no_autobind = true,
893};
894
Peter Krystad2303f992020-01-21 16:56:17 -0800895static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
896{
897 struct mptcp_sock *msk = mptcp_sk(sock->sk);
898 struct socket *ssock;
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800899 int err;
Peter Krystad2303f992020-01-21 16:56:17 -0800900
901 lock_sock(sock->sk);
902 ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
903 if (IS_ERR(ssock)) {
904 err = PTR_ERR(ssock);
905 goto unlock;
906 }
907
908 err = ssock->ops->bind(ssock, uaddr, addr_len);
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800909 if (!err)
910 mptcp_copy_inaddrs(sock->sk, ssock->sk);
Peter Krystad2303f992020-01-21 16:56:17 -0800911
912unlock:
913 release_sock(sock->sk);
914 return err;
915}
916
917static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
918 int addr_len, int flags)
919{
920 struct mptcp_sock *msk = mptcp_sk(sock->sk);
921 struct socket *ssock;
922 int err;
923
924 lock_sock(sock->sk);
925 ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
926 if (IS_ERR(ssock)) {
927 err = PTR_ERR(ssock);
928 goto unlock;
929 }
930
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800931#ifdef CONFIG_TCP_MD5SIG
932 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
933 * TCP option space.
934 */
935 if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
936 mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
937#endif
938
Peter Krystad2303f992020-01-21 16:56:17 -0800939 err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
940 inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800941 mptcp_copy_inaddrs(sock->sk, ssock->sk);
Peter Krystad2303f992020-01-21 16:56:17 -0800942
943unlock:
944 release_sock(sock->sk);
945 return err;
946}
947
Peter Krystadcf7da0d2020-01-21 16:56:19 -0800948static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
949 int peer)
950{
951 if (sock->sk->sk_prot == &tcp_prot) {
952 /* we are being invoked from __sys_accept4, after
953 * mptcp_accept() has just accepted a non-mp-capable
954 * flow: sk is a tcp_sk, not an mptcp one.
955 *
956 * Hand the socket over to tcp so all further socket ops
957 * bypass mptcp.
958 */
959 sock->ops = &inet_stream_ops;
960 }
961
962 return inet_getname(sock, uaddr, peer);
963}
964
965#if IS_ENABLED(CONFIG_MPTCP_IPV6)
966static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
967 int peer)
968{
969 if (sock->sk->sk_prot == &tcpv6_prot) {
970 /* we are being invoked from __sys_accept4 after
971 * mptcp_accept() has accepted a non-mp-capable
972 * subflow: sk is a tcp_sk, not mptcp.
973 *
974 * Hand the socket over to tcp so all further
975 * socket ops bypass mptcp.
976 */
977 sock->ops = &inet6_stream_ops;
978 }
979
980 return inet6_getname(sock, uaddr, peer);
981}
982#endif
983
984static int mptcp_listen(struct socket *sock, int backlog)
985{
986 struct mptcp_sock *msk = mptcp_sk(sock->sk);
987 struct socket *ssock;
988 int err;
989
990 pr_debug("msk=%p", msk);
991
992 lock_sock(sock->sk);
993 ssock = __mptcp_socket_create(msk, TCP_LISTEN);
994 if (IS_ERR(ssock)) {
995 err = PTR_ERR(ssock);
996 goto unlock;
997 }
998
999 err = ssock->ops->listen(ssock, backlog);
1000 inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
1001 if (!err)
1002 mptcp_copy_inaddrs(sock->sk, ssock->sk);
1003
1004unlock:
1005 release_sock(sock->sk);
1006 return err;
1007}
1008
1009static bool is_tcp_proto(const struct proto *p)
1010{
1011#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1012 return p == &tcp_prot || p == &tcpv6_prot;
1013#else
1014 return p == &tcp_prot;
1015#endif
1016}
1017
1018static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
1019 int flags, bool kern)
1020{
1021 struct mptcp_sock *msk = mptcp_sk(sock->sk);
1022 struct socket *ssock;
1023 int err;
1024
1025 pr_debug("msk=%p", msk);
1026
1027 lock_sock(sock->sk);
1028 if (sock->sk->sk_state != TCP_LISTEN)
1029 goto unlock_fail;
1030
1031 ssock = __mptcp_nmpc_socket(msk);
1032 if (!ssock)
1033 goto unlock_fail;
1034
1035 sock_hold(ssock->sk);
1036 release_sock(sock->sk);
1037
1038 err = ssock->ops->accept(sock, newsock, flags, kern);
1039 if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
1040 struct mptcp_sock *msk = mptcp_sk(newsock->sk);
1041 struct mptcp_subflow_context *subflow;
1042
1043 /* set ssk->sk_socket of accept()ed flows to mptcp socket.
1044 * This is needed so NOSPACE flag can be set from tcp stack.
1045 */
1046 list_for_each_entry(subflow, &msk->conn_list, node) {
1047 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1048
1049 if (!ssk->sk_socket)
1050 mptcp_sock_graft(ssk, newsock);
1051 }
1052
1053 inet_sk_state_store(newsock->sk, TCP_ESTABLISHED);
1054 }
1055
1056 sock_put(ssock->sk);
1057 return err;
1058
1059unlock_fail:
1060 release_sock(sock->sk);
1061 return -EINVAL;
1062}
1063
Peter Krystad2303f992020-01-21 16:56:17 -08001064static __poll_t mptcp_poll(struct file *file, struct socket *sock,
1065 struct poll_table_struct *wait)
1066{
Florian Westphal1891c4a2020-01-21 16:56:25 -08001067 struct sock *sk = sock->sk;
Paolo Abeni8ab183d2020-01-21 16:56:33 -08001068 struct mptcp_sock *msk;
Florian Westphal1891c4a2020-01-21 16:56:25 -08001069 struct socket *ssock;
Peter Krystad2303f992020-01-21 16:56:17 -08001070 __poll_t mask = 0;
1071
Florian Westphal1891c4a2020-01-21 16:56:25 -08001072 msk = mptcp_sk(sk);
1073 lock_sock(sk);
1074 ssock = __mptcp_nmpc_socket(msk);
1075 if (ssock) {
1076 mask = ssock->ops->poll(file, ssock, wait);
1077 release_sock(sk);
1078 return mask;
1079 }
1080
1081 release_sock(sk);
1082 sock_poll_wait(file, sock, wait);
1083 lock_sock(sk);
Paolo Abeni8ab183d2020-01-21 16:56:33 -08001084 ssock = __mptcp_tcp_fallback(msk);
1085 if (unlikely(ssock))
1086 return ssock->ops->poll(file, ssock, NULL);
Florian Westphal1891c4a2020-01-21 16:56:25 -08001087
1088 if (test_bit(MPTCP_DATA_READY, &msk->flags))
1089 mask = EPOLLIN | EPOLLRDNORM;
1090 if (sk_stream_is_writeable(sk) &&
1091 test_bit(MPTCP_SEND_SPACE, &msk->flags))
1092 mask |= EPOLLOUT | EPOLLWRNORM;
1093 if (sk->sk_shutdown & RCV_SHUTDOWN)
1094 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1095
1096 release_sock(sk);
1097
Peter Krystad2303f992020-01-21 16:56:17 -08001098 return mask;
1099}
1100
Peter Krystad21498492020-01-21 16:56:21 -08001101static int mptcp_shutdown(struct socket *sock, int how)
1102{
1103 struct mptcp_sock *msk = mptcp_sk(sock->sk);
1104 struct mptcp_subflow_context *subflow;
1105 int ret = 0;
1106
1107 pr_debug("sk=%p, how=%d", msk, how);
1108
1109 lock_sock(sock->sk);
1110
1111 if (how == SHUT_WR || how == SHUT_RDWR)
1112 inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
1113
1114 how++;
1115
1116 if ((how & ~SHUTDOWN_MASK) || !how) {
1117 ret = -EINVAL;
1118 goto out_unlock;
1119 }
1120
1121 if (sock->state == SS_CONNECTING) {
1122 if ((1 << sock->sk->sk_state) &
1123 (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
1124 sock->state = SS_DISCONNECTING;
1125 else
1126 sock->state = SS_CONNECTED;
1127 }
1128
1129 mptcp_for_each_subflow(msk, subflow) {
1130 struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
1131
1132 mptcp_subflow_shutdown(tcp_sk, how);
1133 }
1134
1135out_unlock:
1136 release_sock(sock->sk);
1137
1138 return ret;
1139}
1140
Florian Westphale42f1ac2020-01-24 16:04:02 -08001141static const struct proto_ops mptcp_stream_ops = {
1142 .family = PF_INET,
1143 .owner = THIS_MODULE,
1144 .release = inet_release,
1145 .bind = mptcp_bind,
1146 .connect = mptcp_stream_connect,
1147 .socketpair = sock_no_socketpair,
1148 .accept = mptcp_stream_accept,
1149 .getname = mptcp_v4_getname,
1150 .poll = mptcp_poll,
1151 .ioctl = inet_ioctl,
1152 .gettstamp = sock_gettstamp,
1153 .listen = mptcp_listen,
1154 .shutdown = mptcp_shutdown,
1155 .setsockopt = sock_common_setsockopt,
1156 .getsockopt = sock_common_getsockopt,
1157 .sendmsg = inet_sendmsg,
1158 .recvmsg = inet_recvmsg,
1159 .mmap = sock_no_mmap,
1160 .sendpage = inet_sendpage,
1161#ifdef CONFIG_COMPAT
1162 .compat_setsockopt = compat_sock_common_setsockopt,
1163 .compat_getsockopt = compat_sock_common_getsockopt,
1164#endif
1165};
Peter Krystad2303f992020-01-21 16:56:17 -08001166
Mat Martineauf870fa02020-01-21 16:56:15 -08001167static struct inet_protosw mptcp_protosw = {
1168 .type = SOCK_STREAM,
1169 .protocol = IPPROTO_MPTCP,
1170 .prot = &mptcp_prot,
Peter Krystad2303f992020-01-21 16:56:17 -08001171 .ops = &mptcp_stream_ops,
1172 .flags = INET_PROTOSW_ICSK,
Mat Martineauf870fa02020-01-21 16:56:15 -08001173};
1174
Matthieu Baerts784325e2020-01-21 16:56:28 -08001175void mptcp_proto_init(void)
Mat Martineauf870fa02020-01-21 16:56:15 -08001176{
Peter Krystad2303f992020-01-21 16:56:17 -08001177 mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
Peter Krystad2303f992020-01-21 16:56:17 -08001178
1179 mptcp_subflow_init();
1180
Mat Martineauf870fa02020-01-21 16:56:15 -08001181 if (proto_register(&mptcp_prot, 1) != 0)
1182 panic("Failed to register MPTCP proto.\n");
1183
1184 inet_register_protosw(&mptcp_protosw);
1185}
1186
1187#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Florian Westphale42f1ac2020-01-24 16:04:02 -08001188static const struct proto_ops mptcp_v6_stream_ops = {
1189 .family = PF_INET6,
1190 .owner = THIS_MODULE,
1191 .release = inet6_release,
1192 .bind = mptcp_bind,
1193 .connect = mptcp_stream_connect,
1194 .socketpair = sock_no_socketpair,
1195 .accept = mptcp_stream_accept,
1196 .getname = mptcp_v6_getname,
1197 .poll = mptcp_poll,
1198 .ioctl = inet6_ioctl,
1199 .gettstamp = sock_gettstamp,
1200 .listen = mptcp_listen,
1201 .shutdown = mptcp_shutdown,
1202 .setsockopt = sock_common_setsockopt,
1203 .getsockopt = sock_common_getsockopt,
1204 .sendmsg = inet6_sendmsg,
1205 .recvmsg = inet6_recvmsg,
1206 .mmap = sock_no_mmap,
1207 .sendpage = inet_sendpage,
1208#ifdef CONFIG_COMPAT
1209 .compat_setsockopt = compat_sock_common_setsockopt,
1210 .compat_getsockopt = compat_sock_common_getsockopt,
1211#endif
1212};
1213
Mat Martineauf870fa02020-01-21 16:56:15 -08001214static struct proto mptcp_v6_prot;
1215
Peter Krystad79c09492020-01-21 16:56:20 -08001216static void mptcp_v6_destroy(struct sock *sk)
1217{
1218 mptcp_destroy(sk);
1219 inet6_destroy_sock(sk);
1220}
1221
Mat Martineauf870fa02020-01-21 16:56:15 -08001222static struct inet_protosw mptcp_v6_protosw = {
1223 .type = SOCK_STREAM,
1224 .protocol = IPPROTO_MPTCP,
1225 .prot = &mptcp_v6_prot,
Peter Krystad2303f992020-01-21 16:56:17 -08001226 .ops = &mptcp_v6_stream_ops,
Mat Martineauf870fa02020-01-21 16:56:15 -08001227 .flags = INET_PROTOSW_ICSK,
1228};
1229
Matthieu Baerts784325e2020-01-21 16:56:28 -08001230int mptcp_proto_v6_init(void)
Mat Martineauf870fa02020-01-21 16:56:15 -08001231{
1232 int err;
1233
1234 mptcp_v6_prot = mptcp_prot;
1235 strcpy(mptcp_v6_prot.name, "MPTCPv6");
1236 mptcp_v6_prot.slab = NULL;
Peter Krystad79c09492020-01-21 16:56:20 -08001237 mptcp_v6_prot.destroy = mptcp_v6_destroy;
Florian Westphalb0519de2020-02-06 00:39:37 +01001238 mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
Mat Martineauf870fa02020-01-21 16:56:15 -08001239
1240 err = proto_register(&mptcp_v6_prot, 1);
1241 if (err)
1242 return err;
1243
Mat Martineauf870fa02020-01-21 16:56:15 -08001244 err = inet6_register_protosw(&mptcp_v6_protosw);
1245 if (err)
1246 proto_unregister(&mptcp_v6_prot);
1247
1248 return err;
1249}
1250#endif