blob: 84f6408594c95eb27ca19605f1a77e739ae4913d [file] [log] [blame]
Peter Krystad2303f992020-01-21 16:56:17 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
Peter Krystad79c09492020-01-21 16:56:20 -08007#define pr_fmt(fmt) "MPTCP: " fmt
8
Peter Krystad2303f992020-01-21 16:56:17 -08009#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Peter Krystadf2962342020-03-27 14:48:39 -070012#include <crypto/algapi.h>
Peter Krystad2303f992020-01-21 16:56:17 -080013#include <net/sock.h>
14#include <net/inet_common.h>
15#include <net/inet_hashtables.h>
16#include <net/protocol.h>
17#include <net/tcp.h>
Peter Krystadcec37a62020-01-21 16:56:18 -080018#if IS_ENABLED(CONFIG_MPTCP_IPV6)
19#include <net/ip6_route.h>
20#endif
Peter Krystad2303f992020-01-21 16:56:17 -080021#include <net/mptcp.h>
22#include "protocol.h"
Florian Westphalfc518952020-03-27 14:48:50 -070023#include "mib.h"
24
25static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
26 enum linux_mptcp_mib_field field)
27{
28 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
29}
Peter Krystad2303f992020-01-21 16:56:17 -080030
Peter Krystad79c09492020-01-21 16:56:20 -080031static int subflow_rebuild_header(struct sock *sk)
32{
33 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070034 int local_id, err = 0;
Peter Krystad79c09492020-01-21 16:56:20 -080035
36 if (subflow->request_mptcp && !subflow->token) {
37 pr_debug("subflow=%p", sk);
38 err = mptcp_token_new_connect(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070039 } else if (subflow->request_join && !subflow->local_nonce) {
40 struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
41
42 pr_debug("subflow=%p", sk);
43
44 do {
45 get_random_bytes(&subflow->local_nonce, sizeof(u32));
46 } while (!subflow->local_nonce);
47
48 if (subflow->local_id)
49 goto out;
50
51 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
52 if (local_id < 0)
53 return -EINVAL;
54
55 subflow->local_id = local_id;
Peter Krystad79c09492020-01-21 16:56:20 -080056 }
57
Peter Krystadec3edaa2020-03-27 14:48:40 -070058out:
Peter Krystad79c09492020-01-21 16:56:20 -080059 if (err)
60 return err;
61
62 return subflow->icsk_af_ops->rebuild_header(sk);
63}
64
65static void subflow_req_destructor(struct request_sock *req)
66{
67 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
68
69 pr_debug("subflow_req=%p", subflow_req);
70
71 if (subflow_req->mp_capable)
72 mptcp_token_destroy_request(subflow_req->token);
73 tcp_request_sock_ops.destructor(req);
74}
75
Peter Krystadf2962342020-03-27 14:48:39 -070076static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
77 void *hmac)
78{
79 u8 msg[8];
80
81 put_unaligned_be32(nonce1, &msg[0]);
82 put_unaligned_be32(nonce2, &msg[4]);
83
84 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
85}
86
87/* validate received token and create truncated hmac and nonce for SYN-ACK */
88static bool subflow_token_join_request(struct request_sock *req,
89 const struct sk_buff *skb)
90{
91 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
92 u8 hmac[MPTCPOPT_HMAC_LEN];
93 struct mptcp_sock *msk;
94 int local_id;
95
96 msk = mptcp_token_get_sock(subflow_req->token);
97 if (!msk) {
Florian Westphalfc518952020-03-27 14:48:50 -070098 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
Peter Krystadf2962342020-03-27 14:48:39 -070099 return false;
100 }
101
102 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
103 if (local_id < 0) {
104 sock_put((struct sock *)msk);
105 return false;
106 }
107 subflow_req->local_id = local_id;
108
109 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
110
111 subflow_generate_hmac(msk->local_key, msk->remote_key,
112 subflow_req->local_nonce,
113 subflow_req->remote_nonce, hmac);
114
115 subflow_req->thmac = get_unaligned_be64(hmac);
116
117 sock_put((struct sock *)msk);
118 return true;
119}
120
Peter Krystadcec37a62020-01-21 16:56:18 -0800121static void subflow_init_req(struct request_sock *req,
122 const struct sock *sk_listener,
123 struct sk_buff *skb)
124{
125 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
126 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
127 struct tcp_options_received rx_opt;
128
129 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
130
131 memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
132 mptcp_get_options(skb, &rx_opt);
133
134 subflow_req->mp_capable = 0;
Peter Krystadf2962342020-03-27 14:48:39 -0700135 subflow_req->mp_join = 0;
Peter Krystadcec37a62020-01-21 16:56:18 -0800136
137#ifdef CONFIG_TCP_MD5SIG
138 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
139 * TCP option space.
140 */
141 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
142 return;
143#endif
144
Florian Westphalfc518952020-03-27 14:48:50 -0700145 if (rx_opt.mptcp.mp_capable) {
146 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
147
148 if (rx_opt.mptcp.mp_join)
149 return;
150 } else if (rx_opt.mptcp.mp_join) {
151 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
152 }
Peter Krystadf2962342020-03-27 14:48:39 -0700153
Peter Krystadcec37a62020-01-21 16:56:18 -0800154 if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
Peter Krystad79c09492020-01-21 16:56:20 -0800155 int err;
156
157 err = mptcp_token_new_request(req);
158 if (err == 0)
159 subflow_req->mp_capable = 1;
160
Mat Martineau648ef4b2020-01-21 16:56:24 -0800161 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700162 } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
Peter Krystadec3edaa2020-03-27 14:48:40 -0700163 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700164 subflow_req->mp_join = 1;
165 subflow_req->backup = rx_opt.mptcp.backup;
166 subflow_req->remote_id = rx_opt.mptcp.join_id;
167 subflow_req->token = rx_opt.mptcp.token;
168 subflow_req->remote_nonce = rx_opt.mptcp.nonce;
169 pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
170 subflow_req->remote_nonce);
171 if (!subflow_token_join_request(req, skb)) {
172 subflow_req->mp_join = 0;
173 // @@ need to trigger RST
174 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800175 }
176}
177
178static void subflow_v4_init_req(struct request_sock *req,
179 const struct sock *sk_listener,
180 struct sk_buff *skb)
181{
182 tcp_rsk(req)->is_mptcp = 1;
183
184 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
185
186 subflow_init_req(req, sk_listener, skb);
187}
188
189#if IS_ENABLED(CONFIG_MPTCP_IPV6)
190static void subflow_v6_init_req(struct request_sock *req,
191 const struct sock *sk_listener,
192 struct sk_buff *skb)
193{
194 tcp_rsk(req)->is_mptcp = 1;
195
196 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
197
198 subflow_init_req(req, sk_listener, skb);
199}
200#endif
201
Peter Krystadec3edaa2020-03-27 14:48:40 -0700202/* validate received truncated hmac and create hmac for third ACK */
203static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
204{
205 u8 hmac[MPTCPOPT_HMAC_LEN];
206 u64 thmac;
207
208 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
209 subflow->remote_nonce, subflow->local_nonce,
210 hmac);
211
212 thmac = get_unaligned_be64(hmac);
213 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
214 subflow, subflow->token,
215 (unsigned long long)thmac,
216 (unsigned long long)subflow->thmac);
217
218 return thmac == subflow->thmac;
219}
220
Peter Krystadcec37a62020-01-21 16:56:18 -0800221static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
222{
223 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Davide Carattic3c123d2020-03-19 22:45:37 +0100224 struct sock *parent = subflow->conn;
Paolo Abeni263e1202020-04-30 15:01:51 +0200225 struct tcp_sock *tp = tcp_sk(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800226
227 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
228
Paolo Abeni12008322020-04-24 13:15:21 +0200229 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
Davide Carattic3c123d2020-03-19 22:45:37 +0100230 inet_sk_state_store(parent, TCP_ESTABLISHED);
231 parent->sk_state_change(parent);
232 }
233
Paolo Abeni263e1202020-04-30 15:01:51 +0200234 /* be sure no special action on any packet other than syn-ack */
235 if (subflow->conn_finished)
236 return;
237
238 subflow->conn_finished = 1;
239
240 if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
241 subflow->mp_capable = 1;
242 subflow->can_ack = 1;
243 subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
244 pr_debug("subflow=%p, remote_key=%llu", subflow,
245 subflow->remote_key);
246 } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
247 subflow->mp_join = 1;
248 subflow->thmac = tp->rx_opt.mptcp.thmac;
249 subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
250 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
251 subflow->thmac, subflow->remote_nonce);
252 } else if (subflow->request_mptcp) {
253 tp->is_mptcp = 0;
254 }
255
256 if (!tp->is_mptcp)
Peter Krystadec3edaa2020-03-27 14:48:40 -0700257 return;
258
259 if (subflow->mp_capable) {
Peter Krystadcec37a62020-01-21 16:56:18 -0800260 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
261 subflow->remote_key);
262 mptcp_finish_connect(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800263
264 if (skb) {
265 pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
266 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
267 }
Peter Krystadec3edaa2020-03-27 14:48:40 -0700268 } else if (subflow->mp_join) {
269 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
270 subflow, subflow->thmac,
271 subflow->remote_nonce);
272 if (!subflow_thmac_valid(subflow)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700273 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700274 subflow->mp_join = 0;
275 goto do_reset;
276 }
277
278 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
279 subflow->local_nonce,
280 subflow->remote_nonce,
281 subflow->hmac);
282
283 if (skb)
284 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
285
286 if (!mptcp_finish_join(sk))
287 goto do_reset;
288
Florian Westphalfc518952020-03-27 14:48:50 -0700289 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700290 } else {
291do_reset:
292 tcp_send_active_reset(sk, GFP_ATOMIC);
293 tcp_done(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800294 }
295}
296
297static struct request_sock_ops subflow_request_sock_ops;
298static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
299
300static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
301{
302 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
303
304 pr_debug("subflow=%p", subflow);
305
306 /* Never answer to SYNs sent to broadcast or multicast */
307 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
308 goto drop;
309
310 return tcp_conn_request(&subflow_request_sock_ops,
311 &subflow_request_sock_ipv4_ops,
312 sk, skb);
313drop:
314 tcp_listendrop(sk);
315 return 0;
316}
317
318#if IS_ENABLED(CONFIG_MPTCP_IPV6)
319static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
320static struct inet_connection_sock_af_ops subflow_v6_specific;
321static struct inet_connection_sock_af_ops subflow_v6m_specific;
322
323static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
324{
325 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
326
327 pr_debug("subflow=%p", subflow);
328
329 if (skb->protocol == htons(ETH_P_IP))
330 return subflow_v4_conn_request(sk, skb);
331
332 if (!ipv6_unicast_destination(skb))
333 goto drop;
334
335 return tcp_conn_request(&subflow_request_sock_ops,
336 &subflow_request_sock_ipv6_ops, sk, skb);
337
338drop:
339 tcp_listendrop(sk);
340 return 0; /* don't send reset */
341}
342#endif
343
Peter Krystadf2962342020-03-27 14:48:39 -0700344/* validate hmac received in third ACK */
345static bool subflow_hmac_valid(const struct request_sock *req,
346 const struct tcp_options_received *rx_opt)
347{
348 const struct mptcp_subflow_request_sock *subflow_req;
349 u8 hmac[MPTCPOPT_HMAC_LEN];
350 struct mptcp_sock *msk;
351 bool ret;
352
353 subflow_req = mptcp_subflow_rsk(req);
354 msk = mptcp_token_get_sock(subflow_req->token);
355 if (!msk)
356 return false;
357
358 subflow_generate_hmac(msk->remote_key, msk->local_key,
359 subflow_req->remote_nonce,
360 subflow_req->local_nonce, hmac);
361
362 ret = true;
363 if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
364 ret = false;
365
366 sock_put((struct sock *)msk);
367 return ret;
368}
369
Florian Westphaldf1036d2020-04-17 09:28:22 +0200370static void mptcp_sock_destruct(struct sock *sk)
371{
372 /* if new mptcp socket isn't accepted, it is free'd
373 * from the tcp listener sockets request queue, linked
374 * from req->sk. The tcp socket is released.
375 * This calls the ULP release function which will
376 * also remove the mptcp socket, via
377 * sock_put(ctx->conn).
378 *
379 * Problem is that the mptcp socket will not be in
380 * SYN_RECV state and doesn't have SOCK_DEAD flag.
381 * Both result in warnings from inet_sock_destruct.
382 */
383
384 if (sk->sk_state == TCP_SYN_RECV) {
385 sk->sk_state = TCP_CLOSE;
386 WARN_ON_ONCE(sk->sk_socket);
387 sock_orphan(sk);
388 }
389
390 inet_sock_destruct(sk);
391}
392
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200393static void mptcp_force_close(struct sock *sk)
394{
395 inet_sk_state_store(sk, TCP_CLOSE);
396 sk_common_release(sk);
397}
398
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200399static void subflow_ulp_fallback(struct sock *sk,
400 struct mptcp_subflow_context *old_ctx)
401{
402 struct inet_connection_sock *icsk = inet_csk(sk);
403
404 mptcp_subflow_tcp_fallback(sk, old_ctx);
405 icsk->icsk_ulp_ops = NULL;
406 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
407 tcp_sk(sk)->is_mptcp = 0;
408}
409
Peter Krystadcec37a62020-01-21 16:56:18 -0800410static struct sock *subflow_syn_recv_sock(const struct sock *sk,
411 struct sk_buff *skb,
412 struct request_sock *req,
413 struct dst_entry *dst,
414 struct request_sock *req_unhash,
415 bool *own_req)
416{
417 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800418 struct mptcp_subflow_request_sock *subflow_req;
419 struct tcp_options_received opt_rx;
Peter Krystadf2962342020-03-27 14:48:39 -0700420 bool fallback_is_fatal = false;
Paolo Abeni58b09912020-03-13 16:52:41 +0100421 struct sock *new_msk = NULL;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200422 bool fallback = false;
Peter Krystadcec37a62020-01-21 16:56:18 -0800423 struct sock *child;
424
425 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
426
Paolo Abenifca5c822020-04-20 16:25:06 +0200427 opt_rx.mptcp.mp_capable = 0;
Florian Westphalae2dd712020-01-29 15:54:46 +0100428 if (tcp_rsk(req)->is_mptcp == 0)
429 goto create_child;
430
Christoph Paaschd22f4982020-01-21 16:56:32 -0800431 /* if the sk is MP_CAPABLE, we try to fetch the client key */
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800432 subflow_req = mptcp_subflow_rsk(req);
433 if (subflow_req->mp_capable) {
Christoph Paaschd22f4982020-01-21 16:56:32 -0800434 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
435 /* here we can receive and accept an in-window,
436 * out-of-order pkt, which will not carry the MP_CAPABLE
437 * opt even on mptcp enabled paths
438 */
Paolo Abeni58b09912020-03-13 16:52:41 +0100439 goto create_msk;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800440 }
441
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800442 mptcp_get_options(skb, &opt_rx);
Paolo Abenifca5c822020-04-20 16:25:06 +0200443 if (!opt_rx.mptcp.mp_capable) {
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200444 fallback = true;
Paolo Abeni58b09912020-03-13 16:52:41 +0100445 goto create_child;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800446 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100447
448create_msk:
Paolo Abenifca5c822020-04-20 16:25:06 +0200449 new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
Paolo Abeni58b09912020-03-13 16:52:41 +0100450 if (!new_msk)
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200451 fallback = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700452 } else if (subflow_req->mp_join) {
453 fallback_is_fatal = true;
454 opt_rx.mptcp.mp_join = 0;
455 mptcp_get_options(skb, &opt_rx);
456 if (!opt_rx.mptcp.mp_join ||
Florian Westphalfc518952020-03-27 14:48:50 -0700457 !subflow_hmac_valid(req, &opt_rx)) {
458 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
Peter Krystadf2962342020-03-27 14:48:39 -0700459 return NULL;
Florian Westphalfc518952020-03-27 14:48:50 -0700460 }
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800461 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800462
Christoph Paaschd22f4982020-01-21 16:56:32 -0800463create_child:
Peter Krystadcec37a62020-01-21 16:56:18 -0800464 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
465 req_unhash, own_req);
466
467 if (child && *own_req) {
Peter Krystad79c09492020-01-21 16:56:20 -0800468 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
469
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200470 /* we need to fallback on ctx allocation failure and on pre-reqs
471 * checking above. In the latter scenario we additionally need
472 * to reset the context to non MPTCP status.
Peter Krystad79c09492020-01-21 16:56:20 -0800473 */
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200474 if (!ctx || fallback) {
Peter Krystadf2962342020-03-27 14:48:39 -0700475 if (fallback_is_fatal)
476 goto close_child;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200477
478 if (ctx) {
479 subflow_ulp_fallback(child, ctx);
480 kfree_rcu(ctx, rcu);
481 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100482 goto out;
Peter Krystadf2962342020-03-27 14:48:39 -0700483 }
Peter Krystad79c09492020-01-21 16:56:20 -0800484
485 if (ctx->mp_capable) {
Paolo Abeni58b09912020-03-13 16:52:41 +0100486 /* new mpc subflow takes ownership of the newly
487 * created mptcp socket
488 */
Florian Westphaldf1036d2020-04-17 09:28:22 +0200489 new_msk->sk_destruct = mptcp_sock_destruct;
Peter Krystad1b1c7a02020-03-27 14:48:38 -0700490 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
Paolo Abeni58b09912020-03-13 16:52:41 +0100491 ctx->conn = new_msk;
492 new_msk = NULL;
Paolo Abenifca5c822020-04-20 16:25:06 +0200493
494 /* with OoO packets we can reach here without ingress
495 * mpc option
496 */
497 ctx->remote_key = opt_rx.mptcp.sndr_key;
498 ctx->fully_established = opt_rx.mptcp.mp_capable;
499 ctx->can_ack = opt_rx.mptcp.mp_capable;
Peter Krystadf2962342020-03-27 14:48:39 -0700500 } else if (ctx->mp_join) {
501 struct mptcp_sock *owner;
502
503 owner = mptcp_token_get_sock(ctx->token);
504 if (!owner)
505 goto close_child;
506
507 ctx->conn = (struct sock *)owner;
508 if (!mptcp_finish_join(child))
509 goto close_child;
Florian Westphalfc518952020-03-27 14:48:50 -0700510
511 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
Peter Krystadcec37a62020-01-21 16:56:18 -0800512 }
513 }
514
Paolo Abeni58b09912020-03-13 16:52:41 +0100515out:
516 /* dispose of the left over mptcp master, if any */
517 if (unlikely(new_msk))
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200518 mptcp_force_close(new_msk);
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200519
520 /* check for expected invariant - should never trigger, just help
521 * catching eariler subtle bugs
522 */
523 WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
524 (!mptcp_subflow_ctx(child) ||
525 !mptcp_subflow_ctx(child)->conn));
Peter Krystadcec37a62020-01-21 16:56:18 -0800526 return child;
Peter Krystadf2962342020-03-27 14:48:39 -0700527
528close_child:
529 tcp_send_active_reset(child, GFP_ATOMIC);
530 inet_csk_prepare_forced_close(child);
531 tcp_done(child);
532 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800533}
534
535static struct inet_connection_sock_af_ops subflow_specific;
536
Mat Martineau648ef4b2020-01-21 16:56:24 -0800537enum mapping_status {
538 MAPPING_OK,
539 MAPPING_INVALID,
540 MAPPING_EMPTY,
541 MAPPING_DATA_FIN
542};
543
544static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
545{
546 if ((u32)seq == (u32)old_seq)
547 return old_seq;
548
549 /* Assume map covers data not mapped yet. */
550 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
551}
552
553static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
554{
555 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
556 ssn, subflow->map_subflow_seq, subflow->map_data_len);
557}
558
559static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
560{
561 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
562 unsigned int skb_consumed;
563
564 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
565 if (WARN_ON_ONCE(skb_consumed >= skb->len))
566 return true;
567
568 return skb->len - skb_consumed <= subflow->map_data_len -
569 mptcp_subflow_get_map_offset(subflow);
570}
571
572static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
573{
574 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
575 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
576
577 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
578 /* Mapping covers data later in the subflow stream,
579 * currently unsupported.
580 */
581 warn_bad_map(subflow, ssn);
582 return false;
583 }
584 if (unlikely(!before(ssn, subflow->map_subflow_seq +
585 subflow->map_data_len))) {
586 /* Mapping does covers past subflow data, invalid */
587 warn_bad_map(subflow, ssn + skb->len);
588 return false;
589 }
590 return true;
591}
592
593static enum mapping_status get_mapping_status(struct sock *ssk)
594{
595 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
596 struct mptcp_ext *mpext;
597 struct sk_buff *skb;
598 u16 data_len;
599 u64 map_seq;
600
601 skb = skb_peek(&ssk->sk_receive_queue);
602 if (!skb)
603 return MAPPING_EMPTY;
604
605 mpext = mptcp_get_ext(skb);
606 if (!mpext || !mpext->use_map) {
607 if (!subflow->map_valid && !skb->len) {
608 /* the TCP stack deliver 0 len FIN pkt to the receive
609 * queue, that is the only 0len pkts ever expected here,
610 * and we can admit no mapping only for 0 len pkts
611 */
612 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
613 WARN_ONCE(1, "0len seq %d:%d flags %x",
614 TCP_SKB_CB(skb)->seq,
615 TCP_SKB_CB(skb)->end_seq,
616 TCP_SKB_CB(skb)->tcp_flags);
617 sk_eat_skb(ssk, skb);
618 return MAPPING_EMPTY;
619 }
620
621 if (!subflow->map_valid)
622 return MAPPING_INVALID;
623
624 goto validate_seq;
625 }
626
627 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
628 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
629 mpext->data_len, mpext->data_fin);
630
631 data_len = mpext->data_len;
632 if (data_len == 0) {
633 pr_err("Infinite mapping not handled");
Florian Westphalfc518952020-03-27 14:48:50 -0700634 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800635 return MAPPING_INVALID;
636 }
637
638 if (mpext->data_fin == 1) {
639 if (data_len == 1) {
640 pr_debug("DATA_FIN with no payload");
641 if (subflow->map_valid) {
642 /* A DATA_FIN might arrive in a DSS
643 * option before the previous mapping
644 * has been fully consumed. Continue
645 * handling the existing mapping.
646 */
647 skb_ext_del(skb, SKB_EXT_MPTCP);
648 return MAPPING_OK;
649 } else {
650 return MAPPING_DATA_FIN;
651 }
652 }
653
654 /* Adjust for DATA_FIN using 1 byte of sequence space */
655 data_len--;
656 }
657
658 if (!mpext->dsn64) {
659 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
660 mpext->data_seq);
661 pr_debug("expanded seq=%llu", subflow->map_seq);
662 } else {
663 map_seq = mpext->data_seq;
664 }
665
666 if (subflow->map_valid) {
667 /* Allow replacing only with an identical map */
668 if (subflow->map_seq == map_seq &&
669 subflow->map_subflow_seq == mpext->subflow_seq &&
670 subflow->map_data_len == data_len) {
671 skb_ext_del(skb, SKB_EXT_MPTCP);
672 return MAPPING_OK;
673 }
674
675 /* If this skb data are fully covered by the current mapping,
676 * the new map would need caching, which is not supported
677 */
Florian Westphalfc518952020-03-27 14:48:50 -0700678 if (skb_is_fully_mapped(ssk, skb)) {
679 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800680 return MAPPING_INVALID;
Florian Westphalfc518952020-03-27 14:48:50 -0700681 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800682
683 /* will validate the next map after consuming the current one */
684 return MAPPING_OK;
685 }
686
687 subflow->map_seq = map_seq;
688 subflow->map_subflow_seq = mpext->subflow_seq;
689 subflow->map_data_len = data_len;
690 subflow->map_valid = 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800691 subflow->mpc_map = mpext->mpc_map;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800692 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
693 subflow->map_seq, subflow->map_subflow_seq,
694 subflow->map_data_len);
695
696validate_seq:
697 /* we revalidate valid mapping on new skb, because we must ensure
698 * the current skb is completely covered by the available mapping
699 */
700 if (!validate_mapping(ssk, skb))
701 return MAPPING_INVALID;
702
703 skb_ext_del(skb, SKB_EXT_MPTCP);
704 return MAPPING_OK;
705}
706
Florian Westphalbfae9da2020-02-26 10:14:50 +0100707static int subflow_read_actor(read_descriptor_t *desc,
708 struct sk_buff *skb,
709 unsigned int offset, size_t len)
710{
711 size_t copy_len = min(desc->count, len);
712
713 desc->count -= copy_len;
714
715 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
716 return copy_len;
717}
718
Mat Martineau648ef4b2020-01-21 16:56:24 -0800719static bool subflow_check_data_avail(struct sock *ssk)
720{
721 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
722 enum mapping_status status;
723 struct mptcp_sock *msk;
724 struct sk_buff *skb;
725
726 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
727 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
728 if (subflow->data_avail)
729 return true;
730
Mat Martineau648ef4b2020-01-21 16:56:24 -0800731 msk = mptcp_sk(subflow->conn);
732 for (;;) {
733 u32 map_remaining;
734 size_t delta;
735 u64 ack_seq;
736 u64 old_ack;
737
738 status = get_mapping_status(ssk);
739 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
740 if (status == MAPPING_INVALID) {
741 ssk->sk_err = EBADMSG;
742 goto fatal;
743 }
744
745 if (status != MAPPING_OK)
746 return false;
747
748 skb = skb_peek(&ssk->sk_receive_queue);
749 if (WARN_ON_ONCE(!skb))
750 return false;
751
Christoph Paaschd22f4982020-01-21 16:56:32 -0800752 /* if msk lacks the remote key, this subflow must provide an
753 * MP_CAPABLE-based mapping
754 */
755 if (unlikely(!READ_ONCE(msk->can_ack))) {
756 if (!subflow->mpc_map) {
757 ssk->sk_err = EBADMSG;
758 goto fatal;
759 }
760 WRITE_ONCE(msk->remote_key, subflow->remote_key);
761 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
762 WRITE_ONCE(msk->can_ack, true);
763 }
764
Mat Martineau648ef4b2020-01-21 16:56:24 -0800765 old_ack = READ_ONCE(msk->ack_seq);
766 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
767 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
768 ack_seq);
769 if (ack_seq == old_ack)
770 break;
771
772 /* only accept in-sequence mapping. Old values are spurious
773 * retransmission; we can hit "future" values on active backup
774 * subflow switch, we relay on retransmissions to get
775 * in-sequence data.
776 * Cuncurrent subflows support will require subflow data
777 * reordering
778 */
779 map_remaining = subflow->map_data_len -
780 mptcp_subflow_get_map_offset(subflow);
781 if (before64(ack_seq, old_ack))
782 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
783 else
784 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
785
786 /* discard mapped data */
787 pr_debug("discarding %zu bytes, current map len=%d", delta,
788 map_remaining);
789 if (delta) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800790 read_descriptor_t desc = {
791 .count = delta,
Mat Martineau648ef4b2020-01-21 16:56:24 -0800792 };
793 int ret;
794
Florian Westphalbfae9da2020-02-26 10:14:50 +0100795 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800796 if (ret < 0) {
797 ssk->sk_err = -ret;
798 goto fatal;
799 }
800 if (ret < delta)
801 return false;
802 if (delta == map_remaining)
803 subflow->map_valid = 0;
804 }
805 }
806 return true;
807
808fatal:
809 /* fatal protocol error, close the socket */
810 /* This barrier is coupled with smp_rmb() in tcp_poll() */
811 smp_wmb();
812 ssk->sk_error_report(ssk);
813 tcp_set_state(ssk, TCP_CLOSE);
814 tcp_send_active_reset(ssk, GFP_ATOMIC);
815 return false;
816}
817
818bool mptcp_subflow_data_available(struct sock *sk)
819{
820 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
821 struct sk_buff *skb;
822
823 /* check if current mapping is still valid */
824 if (subflow->map_valid &&
825 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
826 subflow->map_valid = 0;
827 subflow->data_avail = 0;
828
829 pr_debug("Done with mapping: seq=%u data_len=%u",
830 subflow->map_subflow_seq,
831 subflow->map_data_len);
832 }
833
834 if (!subflow_check_data_avail(sk)) {
835 subflow->data_avail = 0;
836 return false;
837 }
838
839 skb = skb_peek(&sk->sk_receive_queue);
840 subflow->data_avail = skb &&
841 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
842 return subflow->data_avail;
843}
844
845static void subflow_data_ready(struct sock *sk)
846{
847 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
848 struct sock *parent = subflow->conn;
849
Peter Krystadf2962342020-03-27 14:48:39 -0700850 if (!subflow->mp_capable && !subflow->mp_join) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800851 subflow->tcp_data_ready(sk);
852
Paolo Abenidc093db2020-03-13 16:52:42 +0100853 parent->sk_data_ready(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800854 return;
855 }
856
Florian Westphal101f6f82020-02-26 10:14:46 +0100857 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +0100858 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800859}
860
861static void subflow_write_space(struct sock *sk)
862{
863 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
864 struct sock *parent = subflow->conn;
865
866 sk_stream_write_space(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +0100867 if (sk_stream_is_writeable(sk)) {
Florian Westphal1891c4a2020-01-21 16:56:25 -0800868 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
869 smp_mb__after_atomic();
870 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
Mat Martineau648ef4b2020-01-21 16:56:24 -0800871 sk_stream_write_space(parent);
872 }
873}
874
Peter Krystadcec37a62020-01-21 16:56:18 -0800875static struct inet_connection_sock_af_ops *
876subflow_default_af_ops(struct sock *sk)
877{
878#if IS_ENABLED(CONFIG_MPTCP_IPV6)
879 if (sk->sk_family == AF_INET6)
880 return &subflow_v6_specific;
881#endif
882 return &subflow_specific;
883}
884
Peter Krystadcec37a62020-01-21 16:56:18 -0800885#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100886void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
887{
Peter Krystadcec37a62020-01-21 16:56:18 -0800888 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
889 struct inet_connection_sock *icsk = inet_csk(sk);
890 struct inet_connection_sock_af_ops *target;
891
892 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
893
894 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
Mat Martineauedc7e482020-01-24 16:04:03 -0800895 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
Peter Krystadcec37a62020-01-21 16:56:18 -0800896
897 if (likely(icsk->icsk_af_ops == target))
898 return;
899
900 subflow->icsk_af_ops = icsk->icsk_af_ops;
901 icsk->icsk_af_ops = target;
Peter Krystadcec37a62020-01-21 16:56:18 -0800902}
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100903#endif
Peter Krystadcec37a62020-01-21 16:56:18 -0800904
Peter Krystadec3edaa2020-03-27 14:48:40 -0700905static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
906 struct sockaddr_storage *addr)
907{
908 memset(addr, 0, sizeof(*addr));
909 addr->ss_family = info->family;
910 if (addr->ss_family == AF_INET) {
911 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
912
913 in_addr->sin_addr = info->addr;
914 in_addr->sin_port = info->port;
915 }
916#if IS_ENABLED(CONFIG_MPTCP_IPV6)
917 else if (addr->ss_family == AF_INET6) {
918 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
919
920 in6_addr->sin6_addr = info->addr6;
921 in6_addr->sin6_port = info->port;
922 }
923#endif
924}
925
926int __mptcp_subflow_connect(struct sock *sk, int ifindex,
927 const struct mptcp_addr_info *loc,
928 const struct mptcp_addr_info *remote)
929{
930 struct mptcp_sock *msk = mptcp_sk(sk);
931 struct mptcp_subflow_context *subflow;
932 struct sockaddr_storage addr;
933 struct socket *sf;
934 u32 remote_token;
935 int addrlen;
936 int err;
937
938 if (sk->sk_state != TCP_ESTABLISHED)
939 return -ENOTCONN;
940
941 err = mptcp_subflow_create_socket(sk, &sf);
942 if (err)
943 return err;
944
945 subflow = mptcp_subflow_ctx(sf->sk);
946 subflow->remote_key = msk->remote_key;
947 subflow->local_key = msk->local_key;
948 subflow->token = msk->token;
949 mptcp_info2sockaddr(loc, &addr);
950
951 addrlen = sizeof(struct sockaddr_in);
952#if IS_ENABLED(CONFIG_MPTCP_IPV6)
953 if (loc->family == AF_INET6)
954 addrlen = sizeof(struct sockaddr_in6);
955#endif
956 sf->sk->sk_bound_dev_if = ifindex;
957 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
958 if (err)
959 goto failed;
960
961 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
962 pr_debug("msk=%p remote_token=%u", msk, remote_token);
963 subflow->remote_token = remote_token;
964 subflow->local_id = loc->id;
965 subflow->request_join = 1;
966 subflow->request_bkup = 1;
967 mptcp_info2sockaddr(remote, &addr);
968
969 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
970 if (err && err != -EINPROGRESS)
971 goto failed;
972
973 spin_lock_bh(&msk->join_list_lock);
974 list_add_tail(&subflow->node, &msk->join_list);
975 spin_unlock_bh(&msk->join_list_lock);
976
977 return err;
978
979failed:
980 sock_release(sf);
981 return err;
982}
983
Peter Krystad2303f992020-01-21 16:56:17 -0800984int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
985{
986 struct mptcp_subflow_context *subflow;
987 struct net *net = sock_net(sk);
988 struct socket *sf;
989 int err;
990
Peter Krystadcec37a62020-01-21 16:56:18 -0800991 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
992 &sf);
Peter Krystad2303f992020-01-21 16:56:17 -0800993 if (err)
994 return err;
995
996 lock_sock(sf->sk);
997
998 /* kernel sockets do not by default acquire net ref, but TCP timer
999 * needs it.
1000 */
1001 sf->sk->sk_net_refcnt = 1;
1002 get_net(net);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001003#ifdef CONFIG_PROC_FS
Peter Krystad2303f992020-01-21 16:56:17 -08001004 this_cpu_add(*net->core.sock_inuse, 1);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001005#endif
Peter Krystad2303f992020-01-21 16:56:17 -08001006 err = tcp_set_ulp(sf->sk, "mptcp");
1007 release_sock(sf->sk);
1008
1009 if (err)
1010 return err;
1011
1012 subflow = mptcp_subflow_ctx(sf->sk);
1013 pr_debug("subflow=%p", subflow);
1014
1015 *new_sock = sf;
Peter Krystad79c09492020-01-21 16:56:20 -08001016 sock_hold(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001017 subflow->conn = sk;
1018
1019 return 0;
1020}
1021
1022static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1023 gfp_t priority)
1024{
1025 struct inet_connection_sock *icsk = inet_csk(sk);
1026 struct mptcp_subflow_context *ctx;
1027
1028 ctx = kzalloc(sizeof(*ctx), priority);
1029 if (!ctx)
1030 return NULL;
1031
1032 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001033 INIT_LIST_HEAD(&ctx->node);
Peter Krystad2303f992020-01-21 16:56:17 -08001034
1035 pr_debug("subflow=%p", ctx);
1036
1037 ctx->tcp_sock = sk;
1038
1039 return ctx;
1040}
1041
Mat Martineau648ef4b2020-01-21 16:56:24 -08001042static void __subflow_state_change(struct sock *sk)
1043{
1044 struct socket_wq *wq;
1045
1046 rcu_read_lock();
1047 wq = rcu_dereference(sk->sk_wq);
1048 if (skwq_has_sleeper(wq))
1049 wake_up_interruptible_all(&wq->wait);
1050 rcu_read_unlock();
1051}
1052
1053static bool subflow_is_done(const struct sock *sk)
1054{
1055 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1056}
1057
1058static void subflow_state_change(struct sock *sk)
1059{
1060 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +01001061 struct sock *parent = subflow->conn;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001062
1063 __subflow_state_change(sk);
1064
1065 /* as recvmsg() does not acquire the subflow socket for ssk selection
1066 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1067 * the data available machinery here.
1068 */
Paolo Abenidc093db2020-03-13 16:52:42 +01001069 if (subflow->mp_capable && mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001070 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001071
Paolo Abenidc093db2020-03-13 16:52:42 +01001072 if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
Mat Martineau648ef4b2020-01-21 16:56:24 -08001073 !subflow->rx_eof && subflow_is_done(sk)) {
1074 subflow->rx_eof = 1;
Florian Westphal59832e22020-04-02 13:44:52 +02001075 mptcp_subflow_eof(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001076 }
1077}
1078
Peter Krystad2303f992020-01-21 16:56:17 -08001079static int subflow_ulp_init(struct sock *sk)
1080{
Peter Krystadcec37a62020-01-21 16:56:18 -08001081 struct inet_connection_sock *icsk = inet_csk(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001082 struct mptcp_subflow_context *ctx;
1083 struct tcp_sock *tp = tcp_sk(sk);
1084 int err = 0;
1085
1086 /* disallow attaching ULP to a socket unless it has been
1087 * created with sock_create_kern()
1088 */
1089 if (!sk->sk_kern_sock) {
1090 err = -EOPNOTSUPP;
1091 goto out;
1092 }
1093
1094 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1095 if (!ctx) {
1096 err = -ENOMEM;
1097 goto out;
1098 }
1099
1100 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1101
1102 tp->is_mptcp = 1;
Peter Krystadcec37a62020-01-21 16:56:18 -08001103 ctx->icsk_af_ops = icsk->icsk_af_ops;
1104 icsk->icsk_af_ops = subflow_default_af_ops(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001105 ctx->tcp_data_ready = sk->sk_data_ready;
1106 ctx->tcp_state_change = sk->sk_state_change;
1107 ctx->tcp_write_space = sk->sk_write_space;
1108 sk->sk_data_ready = subflow_data_ready;
1109 sk->sk_write_space = subflow_write_space;
1110 sk->sk_state_change = subflow_state_change;
Peter Krystad2303f992020-01-21 16:56:17 -08001111out:
1112 return err;
1113}
1114
1115static void subflow_ulp_release(struct sock *sk)
1116{
1117 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1118
1119 if (!ctx)
1120 return;
1121
Peter Krystad79c09492020-01-21 16:56:20 -08001122 if (ctx->conn)
1123 sock_put(ctx->conn);
1124
Peter Krystad2303f992020-01-21 16:56:17 -08001125 kfree_rcu(ctx, rcu);
1126}
1127
Peter Krystadcec37a62020-01-21 16:56:18 -08001128static void subflow_ulp_clone(const struct request_sock *req,
1129 struct sock *newsk,
1130 const gfp_t priority)
1131{
1132 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1133 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1134 struct mptcp_subflow_context *new_ctx;
1135
Peter Krystadf2962342020-03-27 14:48:39 -07001136 if (!tcp_rsk(req)->is_mptcp ||
1137 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001138 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001139 return;
1140 }
1141
1142 new_ctx = subflow_create_ctx(newsk, priority);
Mat Martineauedc7e482020-01-24 16:04:03 -08001143 if (!new_ctx) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001144 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001145 return;
1146 }
1147
1148 new_ctx->conn_finished = 1;
1149 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001150 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1151 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1152 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
Paolo Abeni58b09912020-03-13 16:52:41 +01001153 new_ctx->rel_write_seq = 1;
1154 new_ctx->tcp_sock = newsk;
1155
Peter Krystadf2962342020-03-27 14:48:39 -07001156 if (subflow_req->mp_capable) {
1157 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1158 * is fully established only after we receive the remote key
1159 */
1160 new_ctx->mp_capable = 1;
Peter Krystadf2962342020-03-27 14:48:39 -07001161 new_ctx->local_key = subflow_req->local_key;
1162 new_ctx->token = subflow_req->token;
1163 new_ctx->ssn_offset = subflow_req->ssn_offset;
1164 new_ctx->idsn = subflow_req->idsn;
1165 } else if (subflow_req->mp_join) {
Peter Krystadec3edaa2020-03-27 14:48:40 -07001166 new_ctx->ssn_offset = subflow_req->ssn_offset;
Peter Krystadf2962342020-03-27 14:48:39 -07001167 new_ctx->mp_join = 1;
1168 new_ctx->fully_established = 1;
1169 new_ctx->backup = subflow_req->backup;
1170 new_ctx->local_id = subflow_req->local_id;
1171 new_ctx->token = subflow_req->token;
1172 new_ctx->thmac = subflow_req->thmac;
1173 }
Peter Krystadcec37a62020-01-21 16:56:18 -08001174}
1175
Peter Krystad2303f992020-01-21 16:56:17 -08001176static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1177 .name = "mptcp",
1178 .owner = THIS_MODULE,
1179 .init = subflow_ulp_init,
1180 .release = subflow_ulp_release,
Peter Krystadcec37a62020-01-21 16:56:18 -08001181 .clone = subflow_ulp_clone,
Peter Krystad2303f992020-01-21 16:56:17 -08001182};
1183
Peter Krystadcec37a62020-01-21 16:56:18 -08001184static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1185{
1186 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1187 subflow_ops->slab_name = "request_sock_subflow";
1188
1189 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1190 subflow_ops->obj_size, 0,
1191 SLAB_ACCOUNT |
1192 SLAB_TYPESAFE_BY_RCU,
1193 NULL);
1194 if (!subflow_ops->slab)
1195 return -ENOMEM;
1196
Peter Krystad79c09492020-01-21 16:56:20 -08001197 subflow_ops->destructor = subflow_req_destructor;
1198
Peter Krystadcec37a62020-01-21 16:56:18 -08001199 return 0;
1200}
1201
Peter Krystad2303f992020-01-21 16:56:17 -08001202void mptcp_subflow_init(void)
1203{
Peter Krystadcec37a62020-01-21 16:56:18 -08001204 subflow_request_sock_ops = tcp_request_sock_ops;
1205 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1206 panic("MPTCP: failed to init subflow request sock ops\n");
1207
1208 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1209 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1210
1211 subflow_specific = ipv4_specific;
1212 subflow_specific.conn_request = subflow_v4_conn_request;
1213 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1214 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001215 subflow_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001216
1217#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1218 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1219 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1220
1221 subflow_v6_specific = ipv6_specific;
1222 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1223 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1224 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001225 subflow_v6_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001226
1227 subflow_v6m_specific = subflow_v6_specific;
1228 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1229 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1230 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1231 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1232 subflow_v6m_specific.net_frag_header_len = 0;
1233#endif
1234
Davide Caratti5147dfb2020-03-27 14:48:49 -07001235 mptcp_diag_subflow_init(&subflow_ulp_ops);
1236
Peter Krystad2303f992020-01-21 16:56:17 -08001237 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1238 panic("MPTCP: failed to register subflows to ULP\n");
1239}