blob: 9feb87880d1c24cbaea9fa46c4e0c05e66dff4c7 [file] [log] [blame]
Peter Krystad2303f992020-01-21 16:56:17 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
Peter Krystad79c09492020-01-21 16:56:20 -08007#define pr_fmt(fmt) "MPTCP: " fmt
8
Peter Krystad2303f992020-01-21 16:56:17 -08009#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Peter Krystadf2962342020-03-27 14:48:39 -070012#include <crypto/algapi.h>
Todd Malsbarybd697222020-05-21 19:10:49 -070013#include <crypto/sha.h>
Peter Krystad2303f992020-01-21 16:56:17 -080014#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
Peter Krystadcec37a62020-01-21 16:56:18 -080019#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
Peter Krystad2303f992020-01-21 16:56:17 -080022#include <net/mptcp.h>
23#include "protocol.h"
Florian Westphalfc518952020-03-27 14:48:50 -070024#include "mib.h"
25
26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 enum linux_mptcp_mib_field field)
28{
29 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30}
Peter Krystad2303f992020-01-21 16:56:17 -080031
Peter Krystad79c09492020-01-21 16:56:20 -080032static void subflow_req_destructor(struct request_sock *req)
33{
34 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
35
36 pr_debug("subflow_req=%p", subflow_req);
37
Paolo Abeni8fd4de12020-06-17 12:08:56 +020038 if (subflow_req->msk)
39 sock_put((struct sock *)subflow_req->msk);
40
Paolo Abeni2c5ebd02020-06-26 19:30:00 +020041 mptcp_token_destroy_request(req);
Peter Krystad79c09492020-01-21 16:56:20 -080042 tcp_request_sock_ops.destructor(req);
43}
44
Peter Krystadf2962342020-03-27 14:48:39 -070045static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
46 void *hmac)
47{
48 u8 msg[8];
49
50 put_unaligned_be32(nonce1, &msg[0]);
51 put_unaligned_be32(nonce2, &msg[4]);
52
53 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
54}
55
Paolo Abeni4cf8b7e2020-07-23 13:02:36 +020056static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
57{
58 return mptcp_is_fully_established((void *)msk) &&
59 READ_ONCE(msk->pm.accept_subflow);
60}
61
Peter Krystadf2962342020-03-27 14:48:39 -070062/* validate received token and create truncated hmac and nonce for SYN-ACK */
Paolo Abeni8fd4de12020-06-17 12:08:56 +020063static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
64 const struct sk_buff *skb)
Peter Krystadf2962342020-03-27 14:48:39 -070065{
66 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Todd Malsbarybd697222020-05-21 19:10:49 -070067 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -070068 struct mptcp_sock *msk;
69 int local_id;
70
71 msk = mptcp_token_get_sock(subflow_req->token);
72 if (!msk) {
Florian Westphalfc518952020-03-27 14:48:50 -070073 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020074 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -070075 }
76
77 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
78 if (local_id < 0) {
79 sock_put((struct sock *)msk);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020080 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -070081 }
82 subflow_req->local_id = local_id;
83
84 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
85
86 subflow_generate_hmac(msk->local_key, msk->remote_key,
87 subflow_req->local_nonce,
88 subflow_req->remote_nonce, hmac);
89
90 subflow_req->thmac = get_unaligned_be64(hmac);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020091 return msk;
Peter Krystadf2962342020-03-27 14:48:39 -070092}
93
Peter Krystadcec37a62020-01-21 16:56:18 -080094static void subflow_init_req(struct request_sock *req,
95 const struct sock *sk_listener,
96 struct sk_buff *skb)
97{
98 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
99 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Paolo Abenicfde1412020-04-30 15:01:52 +0200100 struct mptcp_options_received mp_opt;
Peter Krystadcec37a62020-01-21 16:56:18 -0800101
102 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
103
Paolo Abenicfde1412020-04-30 15:01:52 +0200104 mptcp_get_options(skb, &mp_opt);
Peter Krystadcec37a62020-01-21 16:56:18 -0800105
106 subflow_req->mp_capable = 0;
Peter Krystadf2962342020-03-27 14:48:39 -0700107 subflow_req->mp_join = 0;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200108 subflow_req->msk = NULL;
Paolo Abeni2c5ebd02020-06-26 19:30:00 +0200109 mptcp_token_init_request(req);
Peter Krystadcec37a62020-01-21 16:56:18 -0800110
111#ifdef CONFIG_TCP_MD5SIG
112 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
113 * TCP option space.
114 */
115 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
116 return;
117#endif
118
Paolo Abenicfde1412020-04-30 15:01:52 +0200119 if (mp_opt.mp_capable) {
Florian Westphalfc518952020-03-27 14:48:50 -0700120 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
121
Paolo Abenicfde1412020-04-30 15:01:52 +0200122 if (mp_opt.mp_join)
Florian Westphalfc518952020-03-27 14:48:50 -0700123 return;
Paolo Abenicfde1412020-04-30 15:01:52 +0200124 } else if (mp_opt.mp_join) {
Florian Westphalfc518952020-03-27 14:48:50 -0700125 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
126 }
Peter Krystadf2962342020-03-27 14:48:39 -0700127
Paolo Abenicfde1412020-04-30 15:01:52 +0200128 if (mp_opt.mp_capable && listener->request_mptcp) {
Florian Westphal535fb812020-07-30 21:25:51 +0200129 int err, retries = 4;
130
131again:
132 do {
133 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
134 } while (subflow_req->local_key == 0);
Peter Krystad79c09492020-01-21 16:56:20 -0800135
136 err = mptcp_token_new_request(req);
137 if (err == 0)
138 subflow_req->mp_capable = 1;
Florian Westphal535fb812020-07-30 21:25:51 +0200139 else if (retries-- > 0)
140 goto again;
Peter Krystad79c09492020-01-21 16:56:20 -0800141
Mat Martineau648ef4b2020-01-21 16:56:24 -0800142 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Paolo Abenicfde1412020-04-30 15:01:52 +0200143 } else if (mp_opt.mp_join && listener->request_mptcp) {
Peter Krystadec3edaa2020-03-27 14:48:40 -0700144 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700145 subflow_req->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200146 subflow_req->backup = mp_opt.backup;
147 subflow_req->remote_id = mp_opt.join_id;
148 subflow_req->token = mp_opt.token;
149 subflow_req->remote_nonce = mp_opt.nonce;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200150 subflow_req->msk = subflow_token_join_request(req, skb);
151 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
152 subflow_req->remote_nonce, subflow_req->msk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800153 }
154}
155
156static void subflow_v4_init_req(struct request_sock *req,
157 const struct sock *sk_listener,
158 struct sk_buff *skb)
159{
160 tcp_rsk(req)->is_mptcp = 1;
161
162 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
163
164 subflow_init_req(req, sk_listener, skb);
165}
166
167#if IS_ENABLED(CONFIG_MPTCP_IPV6)
168static void subflow_v6_init_req(struct request_sock *req,
169 const struct sock *sk_listener,
170 struct sk_buff *skb)
171{
172 tcp_rsk(req)->is_mptcp = 1;
173
174 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
175
176 subflow_init_req(req, sk_listener, skb);
177}
178#endif
179
Peter Krystadec3edaa2020-03-27 14:48:40 -0700180/* validate received truncated hmac and create hmac for third ACK */
181static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
182{
Todd Malsbarybd697222020-05-21 19:10:49 -0700183 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadec3edaa2020-03-27 14:48:40 -0700184 u64 thmac;
185
186 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
187 subflow->remote_nonce, subflow->local_nonce,
188 hmac);
189
190 thmac = get_unaligned_be64(hmac);
191 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
192 subflow, subflow->token,
193 (unsigned long long)thmac,
194 (unsigned long long)subflow->thmac);
195
196 return thmac == subflow->thmac;
197}
198
Peter Krystadcec37a62020-01-21 16:56:18 -0800199static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
200{
201 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenicfde1412020-04-30 15:01:52 +0200202 struct mptcp_options_received mp_opt;
Davide Carattic3c123d2020-03-19 22:45:37 +0100203 struct sock *parent = subflow->conn;
Peter Krystadcec37a62020-01-21 16:56:18 -0800204
205 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
206
Paolo Abeni12008322020-04-24 13:15:21 +0200207 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
Davide Carattic3c123d2020-03-19 22:45:37 +0100208 inet_sk_state_store(parent, TCP_ESTABLISHED);
209 parent->sk_state_change(parent);
210 }
211
Paolo Abeni263e1202020-04-30 15:01:51 +0200212 /* be sure no special action on any packet other than syn-ack */
213 if (subflow->conn_finished)
214 return;
215
Paolo Abenib0977bb2020-07-23 13:02:29 +0200216 subflow->rel_write_seq = 1;
Paolo Abeni263e1202020-04-30 15:01:51 +0200217 subflow->conn_finished = 1;
Davide Carattie1ff9e82020-06-29 22:26:20 +0200218 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
219 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
Paolo Abeni263e1202020-04-30 15:01:51 +0200220
Paolo Abenicfde1412020-04-30 15:01:52 +0200221 mptcp_get_options(skb, &mp_opt);
Paolo Abenifa25e812020-07-23 13:02:33 +0200222 if (subflow->request_mptcp) {
223 if (!mp_opt.mp_capable) {
224 MPTCP_INC_STATS(sock_net(sk),
225 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
226 mptcp_do_fallback(sk);
227 pr_fallback(mptcp_sk(subflow->conn));
228 goto fallback;
229 }
230
Paolo Abeni263e1202020-04-30 15:01:51 +0200231 subflow->mp_capable = 1;
232 subflow->can_ack = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200233 subflow->remote_key = mp_opt.sndr_key;
Paolo Abeni263e1202020-04-30 15:01:51 +0200234 pr_debug("subflow=%p, remote_key=%llu", subflow,
235 subflow->remote_key);
Paolo Abenifa25e812020-07-23 13:02:33 +0200236 mptcp_finish_connect(sk);
237 } else if (subflow->request_join) {
238 u8 hmac[SHA256_DIGEST_SIZE];
239
240 if (!mp_opt.mp_join)
241 goto do_reset;
242
Paolo Abenicfde1412020-04-30 15:01:52 +0200243 subflow->thmac = mp_opt.thmac;
244 subflow->remote_nonce = mp_opt.nonce;
Paolo Abeni263e1202020-04-30 15:01:51 +0200245 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
246 subflow->thmac, subflow->remote_nonce);
Paolo Abeni263e1202020-04-30 15:01:51 +0200247
Peter Krystadec3edaa2020-03-27 14:48:40 -0700248 if (!subflow_thmac_valid(subflow)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700249 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700250 goto do_reset;
251 }
252
253 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
254 subflow->local_nonce,
255 subflow->remote_nonce,
Todd Malsbarybd697222020-05-21 19:10:49 -0700256 hmac);
Todd Malsbarybd697222020-05-21 19:10:49 -0700257 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700258
Peter Krystadec3edaa2020-03-27 14:48:40 -0700259 if (!mptcp_finish_join(sk))
260 goto do_reset;
261
Paolo Abenifa25e812020-07-23 13:02:33 +0200262 subflow->mp_join = 1;
Florian Westphalfc518952020-03-27 14:48:50 -0700263 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
Paolo Abenifa25e812020-07-23 13:02:33 +0200264 } else if (mptcp_check_fallback(sk)) {
265fallback:
266 mptcp_rcv_space_init(mptcp_sk(parent), sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800267 }
Paolo Abenifa25e812020-07-23 13:02:33 +0200268 return;
269
270do_reset:
271 tcp_send_active_reset(sk, GFP_ATOMIC);
272 tcp_done(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800273}
274
275static struct request_sock_ops subflow_request_sock_ops;
276static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
277
278static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
279{
280 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
281
282 pr_debug("subflow=%p", subflow);
283
284 /* Never answer to SYNs sent to broadcast or multicast */
285 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
286 goto drop;
287
288 return tcp_conn_request(&subflow_request_sock_ops,
289 &subflow_request_sock_ipv4_ops,
290 sk, skb);
291drop:
292 tcp_listendrop(sk);
293 return 0;
294}
295
296#if IS_ENABLED(CONFIG_MPTCP_IPV6)
297static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
298static struct inet_connection_sock_af_ops subflow_v6_specific;
299static struct inet_connection_sock_af_ops subflow_v6m_specific;
300
301static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
302{
303 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
304
305 pr_debug("subflow=%p", subflow);
306
307 if (skb->protocol == htons(ETH_P_IP))
308 return subflow_v4_conn_request(sk, skb);
309
310 if (!ipv6_unicast_destination(skb))
311 goto drop;
312
313 return tcp_conn_request(&subflow_request_sock_ops,
314 &subflow_request_sock_ipv6_ops, sk, skb);
315
316drop:
317 tcp_listendrop(sk);
318 return 0; /* don't send reset */
319}
320#endif
321
Peter Krystadf2962342020-03-27 14:48:39 -0700322/* validate hmac received in third ACK */
323static bool subflow_hmac_valid(const struct request_sock *req,
Paolo Abenicfde1412020-04-30 15:01:52 +0200324 const struct mptcp_options_received *mp_opt)
Peter Krystadf2962342020-03-27 14:48:39 -0700325{
326 const struct mptcp_subflow_request_sock *subflow_req;
Todd Malsbarybd697222020-05-21 19:10:49 -0700327 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -0700328 struct mptcp_sock *msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700329
330 subflow_req = mptcp_subflow_rsk(req);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200331 msk = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700332 if (!msk)
333 return false;
334
335 subflow_generate_hmac(msk->remote_key, msk->local_key,
336 subflow_req->remote_nonce,
337 subflow_req->local_nonce, hmac);
338
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200339 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadf2962342020-03-27 14:48:39 -0700340}
341
Florian Westphaldf1036d2020-04-17 09:28:22 +0200342static void mptcp_sock_destruct(struct sock *sk)
343{
344 /* if new mptcp socket isn't accepted, it is free'd
345 * from the tcp listener sockets request queue, linked
346 * from req->sk. The tcp socket is released.
347 * This calls the ULP release function which will
348 * also remove the mptcp socket, via
349 * sock_put(ctx->conn).
350 *
351 * Problem is that the mptcp socket will not be in
352 * SYN_RECV state and doesn't have SOCK_DEAD flag.
353 * Both result in warnings from inet_sock_destruct.
354 */
355
356 if (sk->sk_state == TCP_SYN_RECV) {
357 sk->sk_state = TCP_CLOSE;
358 WARN_ON_ONCE(sk->sk_socket);
359 sock_orphan(sk);
360 }
361
Paolo Abeni2c5ebd02020-06-26 19:30:00 +0200362 mptcp_token_destroy(mptcp_sk(sk));
Florian Westphaldf1036d2020-04-17 09:28:22 +0200363 inet_sock_destruct(sk);
364}
365
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200366static void mptcp_force_close(struct sock *sk)
367{
368 inet_sk_state_store(sk, TCP_CLOSE);
369 sk_common_release(sk);
370}
371
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200372static void subflow_ulp_fallback(struct sock *sk,
373 struct mptcp_subflow_context *old_ctx)
374{
375 struct inet_connection_sock *icsk = inet_csk(sk);
376
377 mptcp_subflow_tcp_fallback(sk, old_ctx);
378 icsk->icsk_ulp_ops = NULL;
379 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
380 tcp_sk(sk)->is_mptcp = 0;
381}
382
Paolo Abeni39884602020-05-29 17:49:18 +0200383static void subflow_drop_ctx(struct sock *ssk)
384{
385 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
386
387 if (!ctx)
388 return;
389
390 subflow_ulp_fallback(ssk, ctx);
391 if (ctx->conn)
392 sock_put(ctx->conn);
393
394 kfree_rcu(ctx, rcu);
395}
396
Paolo Abenib93df082020-07-23 13:02:32 +0200397void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
398 struct mptcp_options_received *mp_opt)
399{
400 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
401
402 subflow->remote_key = mp_opt->sndr_key;
403 subflow->fully_established = 1;
404 subflow->can_ack = 1;
405 WRITE_ONCE(msk->fully_established, true);
406}
407
Peter Krystadcec37a62020-01-21 16:56:18 -0800408static struct sock *subflow_syn_recv_sock(const struct sock *sk,
409 struct sk_buff *skb,
410 struct request_sock *req,
411 struct dst_entry *dst,
412 struct request_sock *req_unhash,
413 bool *own_req)
414{
415 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800416 struct mptcp_subflow_request_sock *subflow_req;
Paolo Abenicfde1412020-04-30 15:01:52 +0200417 struct mptcp_options_received mp_opt;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200418 bool fallback, fallback_is_fatal;
Paolo Abeni58b09912020-03-13 16:52:41 +0100419 struct sock *new_msk = NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800420 struct sock *child;
421
422 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
423
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200424 /* After child creation we must look for 'mp_capable' even when options
425 * are not parsed
Paolo Abenicfde1412020-04-30 15:01:52 +0200426 */
427 mp_opt.mp_capable = 0;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200428
429 /* hopefully temporary handling for MP_JOIN+syncookie */
430 subflow_req = mptcp_subflow_rsk(req);
Paolo Abenib7514692020-07-23 13:02:34 +0200431 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200432 fallback = !tcp_rsk(req)->is_mptcp;
433 if (fallback)
Florian Westphalae2dd712020-01-29 15:54:46 +0100434 goto create_child;
435
Christoph Paaschd22f4982020-01-21 16:56:32 -0800436 /* if the sk is MP_CAPABLE, we try to fetch the client key */
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800437 if (subflow_req->mp_capable) {
Christoph Paaschd22f4982020-01-21 16:56:32 -0800438 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
439 /* here we can receive and accept an in-window,
440 * out-of-order pkt, which will not carry the MP_CAPABLE
441 * opt even on mptcp enabled paths
442 */
Paolo Abeni58b09912020-03-13 16:52:41 +0100443 goto create_msk;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800444 }
445
Paolo Abenicfde1412020-04-30 15:01:52 +0200446 mptcp_get_options(skb, &mp_opt);
447 if (!mp_opt.mp_capable) {
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200448 fallback = true;
Paolo Abeni58b09912020-03-13 16:52:41 +0100449 goto create_child;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800450 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100451
452create_msk:
Paolo Abenicfde1412020-04-30 15:01:52 +0200453 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
Paolo Abeni58b09912020-03-13 16:52:41 +0100454 if (!new_msk)
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200455 fallback = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700456 } else if (subflow_req->mp_join) {
Paolo Abenicfde1412020-04-30 15:01:52 +0200457 mptcp_get_options(skb, &mp_opt);
458 if (!mp_opt.mp_join ||
Paolo Abeni4cf8b7e2020-07-23 13:02:36 +0200459 !mptcp_can_accept_new_subflow(subflow_req->msk) ||
Paolo Abenicfde1412020-04-30 15:01:52 +0200460 !subflow_hmac_valid(req, &mp_opt)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700461 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200462 fallback = true;
Florian Westphalfc518952020-03-27 14:48:50 -0700463 }
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800464 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800465
Christoph Paaschd22f4982020-01-21 16:56:32 -0800466create_child:
Peter Krystadcec37a62020-01-21 16:56:18 -0800467 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
468 req_unhash, own_req);
469
470 if (child && *own_req) {
Peter Krystad79c09492020-01-21 16:56:20 -0800471 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
472
Paolo Abeni90bf4512020-05-15 19:22:15 +0200473 tcp_rsk(req)->drop_req = false;
474
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200475 /* we need to fallback on ctx allocation failure and on pre-reqs
476 * checking above. In the latter scenario we additionally need
477 * to reset the context to non MPTCP status.
Peter Krystad79c09492020-01-21 16:56:20 -0800478 */
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200479 if (!ctx || fallback) {
Peter Krystadf2962342020-03-27 14:48:39 -0700480 if (fallback_is_fatal)
Paolo Abeni729cd642020-05-15 19:22:17 +0200481 goto dispose_child;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200482
Paolo Abeni39884602020-05-29 17:49:18 +0200483 subflow_drop_ctx(child);
Paolo Abeni58b09912020-03-13 16:52:41 +0100484 goto out;
Peter Krystadf2962342020-03-27 14:48:39 -0700485 }
Peter Krystad79c09492020-01-21 16:56:20 -0800486
487 if (ctx->mp_capable) {
Paolo Abenib93df082020-07-23 13:02:32 +0200488 /* this can't race with mptcp_close(), as the msk is
489 * not yet exposted to user-space
490 */
491 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
492
Paolo Abeni58b09912020-03-13 16:52:41 +0100493 /* new mpc subflow takes ownership of the newly
494 * created mptcp socket
495 */
Florian Westphaldf1036d2020-04-17 09:28:22 +0200496 new_msk->sk_destruct = mptcp_sock_destruct;
Peter Krystad1b1c7a02020-03-27 14:48:38 -0700497 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
Paolo Abeni2c5ebd02020-06-26 19:30:00 +0200498 mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
Paolo Abeni58b09912020-03-13 16:52:41 +0100499 ctx->conn = new_msk;
500 new_msk = NULL;
Paolo Abenifca5c822020-04-20 16:25:06 +0200501
502 /* with OoO packets we can reach here without ingress
503 * mpc option
504 */
Paolo Abenib93df082020-07-23 13:02:32 +0200505 if (mp_opt.mp_capable)
506 mptcp_subflow_fully_established(ctx, &mp_opt);
Peter Krystadf2962342020-03-27 14:48:39 -0700507 } else if (ctx->mp_join) {
508 struct mptcp_sock *owner;
509
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200510 owner = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700511 if (!owner)
Paolo Abeni729cd642020-05-15 19:22:17 +0200512 goto dispose_child;
Peter Krystadf2962342020-03-27 14:48:39 -0700513
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200514 /* move the msk reference ownership to the subflow */
515 subflow_req->msk = NULL;
Peter Krystadf2962342020-03-27 14:48:39 -0700516 ctx->conn = (struct sock *)owner;
517 if (!mptcp_finish_join(child))
Paolo Abeni729cd642020-05-15 19:22:17 +0200518 goto dispose_child;
Florian Westphalfc518952020-03-27 14:48:50 -0700519
520 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
Paolo Abeni90bf4512020-05-15 19:22:15 +0200521 tcp_rsk(req)->drop_req = true;
Peter Krystadcec37a62020-01-21 16:56:18 -0800522 }
523 }
524
Paolo Abeni58b09912020-03-13 16:52:41 +0100525out:
526 /* dispose of the left over mptcp master, if any */
527 if (unlikely(new_msk))
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200528 mptcp_force_close(new_msk);
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200529
530 /* check for expected invariant - should never trigger, just help
531 * catching eariler subtle bugs
532 */
Paolo Abeniac2b47f2020-04-30 15:03:22 +0200533 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200534 (!mptcp_subflow_ctx(child) ||
535 !mptcp_subflow_ctx(child)->conn));
Peter Krystadcec37a62020-01-21 16:56:18 -0800536 return child;
Peter Krystadf2962342020-03-27 14:48:39 -0700537
Paolo Abeni729cd642020-05-15 19:22:17 +0200538dispose_child:
Paolo Abeni39884602020-05-29 17:49:18 +0200539 subflow_drop_ctx(child);
Paolo Abeni729cd642020-05-15 19:22:17 +0200540 tcp_rsk(req)->drop_req = true;
Paolo Abeni729cd642020-05-15 19:22:17 +0200541 inet_csk_prepare_for_destroy_sock(child);
Peter Krystadf2962342020-03-27 14:48:39 -0700542 tcp_done(child);
Paolo Abeni97e61752020-07-23 13:02:35 +0200543 req->rsk_ops->send_reset(sk, skb);
Paolo Abeni729cd642020-05-15 19:22:17 +0200544
545 /* The last child reference will be released by the caller */
546 return child;
Peter Krystadcec37a62020-01-21 16:56:18 -0800547}
548
549static struct inet_connection_sock_af_ops subflow_specific;
550
Mat Martineau648ef4b2020-01-21 16:56:24 -0800551enum mapping_status {
552 MAPPING_OK,
553 MAPPING_INVALID,
554 MAPPING_EMPTY,
Davide Carattie1ff9e82020-06-29 22:26:20 +0200555 MAPPING_DATA_FIN,
556 MAPPING_DUMMY
Mat Martineau648ef4b2020-01-21 16:56:24 -0800557};
558
559static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
560{
561 if ((u32)seq == (u32)old_seq)
562 return old_seq;
563
564 /* Assume map covers data not mapped yet. */
565 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
566}
567
568static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
569{
570 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
571 ssn, subflow->map_subflow_seq, subflow->map_data_len);
572}
573
574static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
575{
576 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
577 unsigned int skb_consumed;
578
579 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
580 if (WARN_ON_ONCE(skb_consumed >= skb->len))
581 return true;
582
583 return skb->len - skb_consumed <= subflow->map_data_len -
584 mptcp_subflow_get_map_offset(subflow);
585}
586
587static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
588{
589 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
590 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
591
592 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
593 /* Mapping covers data later in the subflow stream,
594 * currently unsupported.
595 */
596 warn_bad_map(subflow, ssn);
597 return false;
598 }
599 if (unlikely(!before(ssn, subflow->map_subflow_seq +
600 subflow->map_data_len))) {
601 /* Mapping does covers past subflow data, invalid */
602 warn_bad_map(subflow, ssn + skb->len);
603 return false;
604 }
605 return true;
606}
607
Mat Martineau43b54c62020-07-28 15:12:06 -0700608static enum mapping_status get_mapping_status(struct sock *ssk,
609 struct mptcp_sock *msk)
Mat Martineau648ef4b2020-01-21 16:56:24 -0800610{
611 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
612 struct mptcp_ext *mpext;
613 struct sk_buff *skb;
614 u16 data_len;
615 u64 map_seq;
616
617 skb = skb_peek(&ssk->sk_receive_queue);
618 if (!skb)
619 return MAPPING_EMPTY;
620
Davide Carattie1ff9e82020-06-29 22:26:20 +0200621 if (mptcp_check_fallback(ssk))
622 return MAPPING_DUMMY;
623
Mat Martineau648ef4b2020-01-21 16:56:24 -0800624 mpext = mptcp_get_ext(skb);
625 if (!mpext || !mpext->use_map) {
626 if (!subflow->map_valid && !skb->len) {
627 /* the TCP stack deliver 0 len FIN pkt to the receive
628 * queue, that is the only 0len pkts ever expected here,
629 * and we can admit no mapping only for 0 len pkts
630 */
631 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
632 WARN_ONCE(1, "0len seq %d:%d flags %x",
633 TCP_SKB_CB(skb)->seq,
634 TCP_SKB_CB(skb)->end_seq,
635 TCP_SKB_CB(skb)->tcp_flags);
636 sk_eat_skb(ssk, skb);
637 return MAPPING_EMPTY;
638 }
639
640 if (!subflow->map_valid)
641 return MAPPING_INVALID;
642
643 goto validate_seq;
644 }
645
646 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
647 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
648 mpext->data_len, mpext->data_fin);
649
650 data_len = mpext->data_len;
651 if (data_len == 0) {
652 pr_err("Infinite mapping not handled");
Florian Westphalfc518952020-03-27 14:48:50 -0700653 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800654 return MAPPING_INVALID;
655 }
656
657 if (mpext->data_fin == 1) {
658 if (data_len == 1) {
Mat Martineau43b54c62020-07-28 15:12:06 -0700659 mptcp_update_rcv_data_fin(msk, mpext->data_seq);
660 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800661 if (subflow->map_valid) {
662 /* A DATA_FIN might arrive in a DSS
663 * option before the previous mapping
664 * has been fully consumed. Continue
665 * handling the existing mapping.
666 */
667 skb_ext_del(skb, SKB_EXT_MPTCP);
668 return MAPPING_OK;
669 } else {
670 return MAPPING_DATA_FIN;
671 }
Mat Martineau43b54c62020-07-28 15:12:06 -0700672 } else {
673 mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
674 pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800675 }
676
677 /* Adjust for DATA_FIN using 1 byte of sequence space */
678 data_len--;
679 }
680
681 if (!mpext->dsn64) {
682 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
683 mpext->data_seq);
Christoph Paascha0c1d0e2020-05-14 08:53:03 -0700684 subflow->use_64bit_ack = 0;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800685 pr_debug("expanded seq=%llu", subflow->map_seq);
686 } else {
687 map_seq = mpext->data_seq;
Christoph Paascha0c1d0e2020-05-14 08:53:03 -0700688 subflow->use_64bit_ack = 1;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800689 }
690
691 if (subflow->map_valid) {
692 /* Allow replacing only with an identical map */
693 if (subflow->map_seq == map_seq &&
694 subflow->map_subflow_seq == mpext->subflow_seq &&
695 subflow->map_data_len == data_len) {
696 skb_ext_del(skb, SKB_EXT_MPTCP);
697 return MAPPING_OK;
698 }
699
700 /* If this skb data are fully covered by the current mapping,
701 * the new map would need caching, which is not supported
702 */
Florian Westphalfc518952020-03-27 14:48:50 -0700703 if (skb_is_fully_mapped(ssk, skb)) {
704 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800705 return MAPPING_INVALID;
Florian Westphalfc518952020-03-27 14:48:50 -0700706 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800707
708 /* will validate the next map after consuming the current one */
709 return MAPPING_OK;
710 }
711
712 subflow->map_seq = map_seq;
713 subflow->map_subflow_seq = mpext->subflow_seq;
714 subflow->map_data_len = data_len;
715 subflow->map_valid = 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800716 subflow->mpc_map = mpext->mpc_map;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800717 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
718 subflow->map_seq, subflow->map_subflow_seq,
719 subflow->map_data_len);
720
721validate_seq:
722 /* we revalidate valid mapping on new skb, because we must ensure
723 * the current skb is completely covered by the available mapping
724 */
725 if (!validate_mapping(ssk, skb))
726 return MAPPING_INVALID;
727
728 skb_ext_del(skb, SKB_EXT_MPTCP);
729 return MAPPING_OK;
730}
731
Florian Westphalbfae9da2020-02-26 10:14:50 +0100732static int subflow_read_actor(read_descriptor_t *desc,
733 struct sk_buff *skb,
734 unsigned int offset, size_t len)
735{
736 size_t copy_len = min(desc->count, len);
737
738 desc->count -= copy_len;
739
740 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
741 return copy_len;
742}
743
Mat Martineau648ef4b2020-01-21 16:56:24 -0800744static bool subflow_check_data_avail(struct sock *ssk)
745{
746 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
747 enum mapping_status status;
748 struct mptcp_sock *msk;
749 struct sk_buff *skb;
750
751 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
752 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
753 if (subflow->data_avail)
754 return true;
755
Mat Martineau648ef4b2020-01-21 16:56:24 -0800756 msk = mptcp_sk(subflow->conn);
757 for (;;) {
758 u32 map_remaining;
759 size_t delta;
760 u64 ack_seq;
761 u64 old_ack;
762
Mat Martineau43b54c62020-07-28 15:12:06 -0700763 status = get_mapping_status(ssk, msk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800764 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
765 if (status == MAPPING_INVALID) {
766 ssk->sk_err = EBADMSG;
767 goto fatal;
768 }
Davide Carattie1ff9e82020-06-29 22:26:20 +0200769 if (status == MAPPING_DUMMY) {
770 __mptcp_do_fallback(msk);
771 skb = skb_peek(&ssk->sk_receive_queue);
772 subflow->map_valid = 1;
773 subflow->map_seq = READ_ONCE(msk->ack_seq);
774 subflow->map_data_len = skb->len;
775 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
776 subflow->ssn_offset;
777 return true;
778 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800779
780 if (status != MAPPING_OK)
781 return false;
782
783 skb = skb_peek(&ssk->sk_receive_queue);
784 if (WARN_ON_ONCE(!skb))
785 return false;
786
Christoph Paaschd22f4982020-01-21 16:56:32 -0800787 /* if msk lacks the remote key, this subflow must provide an
788 * MP_CAPABLE-based mapping
789 */
790 if (unlikely(!READ_ONCE(msk->can_ack))) {
791 if (!subflow->mpc_map) {
792 ssk->sk_err = EBADMSG;
793 goto fatal;
794 }
795 WRITE_ONCE(msk->remote_key, subflow->remote_key);
796 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
797 WRITE_ONCE(msk->can_ack, true);
798 }
799
Mat Martineau648ef4b2020-01-21 16:56:24 -0800800 old_ack = READ_ONCE(msk->ack_seq);
801 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
802 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
803 ack_seq);
804 if (ack_seq == old_ack)
805 break;
806
807 /* only accept in-sequence mapping. Old values are spurious
808 * retransmission; we can hit "future" values on active backup
809 * subflow switch, we relay on retransmissions to get
810 * in-sequence data.
811 * Cuncurrent subflows support will require subflow data
812 * reordering
813 */
814 map_remaining = subflow->map_data_len -
815 mptcp_subflow_get_map_offset(subflow);
816 if (before64(ack_seq, old_ack))
817 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
818 else
819 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
820
821 /* discard mapped data */
822 pr_debug("discarding %zu bytes, current map len=%d", delta,
823 map_remaining);
824 if (delta) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800825 read_descriptor_t desc = {
826 .count = delta,
Mat Martineau648ef4b2020-01-21 16:56:24 -0800827 };
828 int ret;
829
Florian Westphalbfae9da2020-02-26 10:14:50 +0100830 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800831 if (ret < 0) {
832 ssk->sk_err = -ret;
833 goto fatal;
834 }
835 if (ret < delta)
836 return false;
837 if (delta == map_remaining)
838 subflow->map_valid = 0;
839 }
840 }
841 return true;
842
843fatal:
844 /* fatal protocol error, close the socket */
845 /* This barrier is coupled with smp_rmb() in tcp_poll() */
846 smp_wmb();
847 ssk->sk_error_report(ssk);
848 tcp_set_state(ssk, TCP_CLOSE);
849 tcp_send_active_reset(ssk, GFP_ATOMIC);
850 return false;
851}
852
853bool mptcp_subflow_data_available(struct sock *sk)
854{
855 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
856 struct sk_buff *skb;
857
858 /* check if current mapping is still valid */
859 if (subflow->map_valid &&
860 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
861 subflow->map_valid = 0;
862 subflow->data_avail = 0;
863
864 pr_debug("Done with mapping: seq=%u data_len=%u",
865 subflow->map_subflow_seq,
866 subflow->map_data_len);
867 }
868
869 if (!subflow_check_data_avail(sk)) {
870 subflow->data_avail = 0;
871 return false;
872 }
873
874 skb = skb_peek(&sk->sk_receive_queue);
875 subflow->data_avail = skb &&
876 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
877 return subflow->data_avail;
878}
879
Florian Westphal071c8ed2020-04-24 12:31:50 +0200880/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
881 * not the ssk one.
882 *
883 * In mptcp, rwin is about the mptcp-level connection data.
884 *
885 * Data that is still on the ssk rx queue can thus be ignored,
886 * as far as mptcp peer is concerened that data is still inflight.
887 * DSS ACK is updated when skb is moved to the mptcp rx queue.
888 */
889void mptcp_space(const struct sock *ssk, int *space, int *full_space)
890{
891 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
892 const struct sock *sk = subflow->conn;
893
894 *space = tcp_space(sk);
895 *full_space = tcp_full_space(sk);
896}
897
Mat Martineau648ef4b2020-01-21 16:56:24 -0800898static void subflow_data_ready(struct sock *sk)
899{
900 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Davide Caratti8c728942020-07-15 22:27:05 +0200901 u16 state = 1 << inet_sk_state_load(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800902 struct sock *parent = subflow->conn;
Davide Carattie1ff9e82020-06-29 22:26:20 +0200903 struct mptcp_sock *msk;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800904
Davide Carattie1ff9e82020-06-29 22:26:20 +0200905 msk = mptcp_sk(parent);
Davide Caratti8c728942020-07-15 22:27:05 +0200906 if (state & TCPF_LISTEN) {
Davide Carattie1ff9e82020-06-29 22:26:20 +0200907 set_bit(MPTCP_DATA_READY, &msk->flags);
Paolo Abenidc093db2020-03-13 16:52:42 +0100908 parent->sk_data_ready(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800909 return;
910 }
911
Davide Carattie1ff9e82020-06-29 22:26:20 +0200912 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
Davide Caratti8c728942020-07-15 22:27:05 +0200913 !subflow->mp_join && !(state & TCPF_CLOSE));
Davide Carattie1ff9e82020-06-29 22:26:20 +0200914
Florian Westphal101f6f82020-02-26 10:14:46 +0100915 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +0100916 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800917}
918
919static void subflow_write_space(struct sock *sk)
920{
921 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
922 struct sock *parent = subflow->conn;
923
924 sk_stream_write_space(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +0100925 if (sk_stream_is_writeable(sk)) {
Florian Westphal1891c4a2020-01-21 16:56:25 -0800926 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
927 smp_mb__after_atomic();
928 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
Mat Martineau648ef4b2020-01-21 16:56:24 -0800929 sk_stream_write_space(parent);
930 }
931}
932
Peter Krystadcec37a62020-01-21 16:56:18 -0800933static struct inet_connection_sock_af_ops *
934subflow_default_af_ops(struct sock *sk)
935{
936#if IS_ENABLED(CONFIG_MPTCP_IPV6)
937 if (sk->sk_family == AF_INET6)
938 return &subflow_v6_specific;
939#endif
940 return &subflow_specific;
941}
942
Peter Krystadcec37a62020-01-21 16:56:18 -0800943#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100944void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
945{
Peter Krystadcec37a62020-01-21 16:56:18 -0800946 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
947 struct inet_connection_sock *icsk = inet_csk(sk);
948 struct inet_connection_sock_af_ops *target;
949
950 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
951
952 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
Mat Martineauedc7e482020-01-24 16:04:03 -0800953 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
Peter Krystadcec37a62020-01-21 16:56:18 -0800954
955 if (likely(icsk->icsk_af_ops == target))
956 return;
957
958 subflow->icsk_af_ops = icsk->icsk_af_ops;
959 icsk->icsk_af_ops = target;
Peter Krystadcec37a62020-01-21 16:56:18 -0800960}
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100961#endif
Peter Krystadcec37a62020-01-21 16:56:18 -0800962
Peter Krystadec3edaa2020-03-27 14:48:40 -0700963static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
964 struct sockaddr_storage *addr)
965{
966 memset(addr, 0, sizeof(*addr));
967 addr->ss_family = info->family;
968 if (addr->ss_family == AF_INET) {
969 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
970
971 in_addr->sin_addr = info->addr;
972 in_addr->sin_port = info->port;
973 }
974#if IS_ENABLED(CONFIG_MPTCP_IPV6)
975 else if (addr->ss_family == AF_INET6) {
976 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
977
978 in6_addr->sin6_addr = info->addr6;
979 in6_addr->sin6_port = info->port;
980 }
981#endif
982}
983
984int __mptcp_subflow_connect(struct sock *sk, int ifindex,
985 const struct mptcp_addr_info *loc,
986 const struct mptcp_addr_info *remote)
987{
988 struct mptcp_sock *msk = mptcp_sk(sk);
989 struct mptcp_subflow_context *subflow;
990 struct sockaddr_storage addr;
Paolo Abeni6bad9122020-06-30 16:38:26 +0200991 int local_id = loc->id;
Peter Krystadec3edaa2020-03-27 14:48:40 -0700992 struct socket *sf;
Paolo Abeni6bad9122020-06-30 16:38:26 +0200993 struct sock *ssk;
Peter Krystadec3edaa2020-03-27 14:48:40 -0700994 u32 remote_token;
995 int addrlen;
996 int err;
997
Paolo Abenib93df082020-07-23 13:02:32 +0200998 if (!mptcp_is_fully_established(sk))
Peter Krystadec3edaa2020-03-27 14:48:40 -0700999 return -ENOTCONN;
1000
1001 err = mptcp_subflow_create_socket(sk, &sf);
1002 if (err)
1003 return err;
1004
Paolo Abeni6bad9122020-06-30 16:38:26 +02001005 ssk = sf->sk;
1006 subflow = mptcp_subflow_ctx(ssk);
1007 do {
1008 get_random_bytes(&subflow->local_nonce, sizeof(u32));
1009 } while (!subflow->local_nonce);
1010
1011 if (!local_id) {
1012 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1013 if (err < 0)
1014 goto failed;
1015
1016 local_id = err;
1017 }
1018
Peter Krystadec3edaa2020-03-27 14:48:40 -07001019 subflow->remote_key = msk->remote_key;
1020 subflow->local_key = msk->local_key;
1021 subflow->token = msk->token;
1022 mptcp_info2sockaddr(loc, &addr);
1023
1024 addrlen = sizeof(struct sockaddr_in);
1025#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1026 if (loc->family == AF_INET6)
1027 addrlen = sizeof(struct sockaddr_in6);
1028#endif
Paolo Abeni6bad9122020-06-30 16:38:26 +02001029 ssk->sk_bound_dev_if = ifindex;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001030 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1031 if (err)
1032 goto failed;
1033
1034 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
Paolo Abeni6bad9122020-06-30 16:38:26 +02001035 pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
1036 local_id);
Peter Krystadec3edaa2020-03-27 14:48:40 -07001037 subflow->remote_token = remote_token;
Paolo Abeni6bad9122020-06-30 16:38:26 +02001038 subflow->local_id = local_id;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001039 subflow->request_join = 1;
1040 subflow->request_bkup = 1;
1041 mptcp_info2sockaddr(remote, &addr);
1042
1043 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1044 if (err && err != -EINPROGRESS)
1045 goto failed;
1046
1047 spin_lock_bh(&msk->join_list_lock);
1048 list_add_tail(&subflow->node, &msk->join_list);
1049 spin_unlock_bh(&msk->join_list_lock);
1050
1051 return err;
1052
1053failed:
1054 sock_release(sf);
1055 return err;
1056}
1057
Peter Krystad2303f992020-01-21 16:56:17 -08001058int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1059{
1060 struct mptcp_subflow_context *subflow;
1061 struct net *net = sock_net(sk);
1062 struct socket *sf;
1063 int err;
1064
Peter Krystadcec37a62020-01-21 16:56:18 -08001065 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1066 &sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001067 if (err)
1068 return err;
1069
1070 lock_sock(sf->sk);
1071
1072 /* kernel sockets do not by default acquire net ref, but TCP timer
1073 * needs it.
1074 */
1075 sf->sk->sk_net_refcnt = 1;
1076 get_net(net);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001077#ifdef CONFIG_PROC_FS
Peter Krystad2303f992020-01-21 16:56:17 -08001078 this_cpu_add(*net->core.sock_inuse, 1);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001079#endif
Peter Krystad2303f992020-01-21 16:56:17 -08001080 err = tcp_set_ulp(sf->sk, "mptcp");
1081 release_sock(sf->sk);
1082
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001083 if (err) {
1084 sock_release(sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001085 return err;
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001086 }
Peter Krystad2303f992020-01-21 16:56:17 -08001087
Paolo Abeni7d14b0d2020-05-07 18:53:24 +02001088 /* the newly created socket really belongs to the owning MPTCP master
1089 * socket, even if for additional subflows the allocation is performed
1090 * by a kernel workqueue. Adjust inode references, so that the
1091 * procfs/diag interaces really show this one belonging to the correct
1092 * user.
1093 */
1094 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1095 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1096 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1097
Peter Krystad2303f992020-01-21 16:56:17 -08001098 subflow = mptcp_subflow_ctx(sf->sk);
1099 pr_debug("subflow=%p", subflow);
1100
1101 *new_sock = sf;
Peter Krystad79c09492020-01-21 16:56:20 -08001102 sock_hold(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001103 subflow->conn = sk;
1104
1105 return 0;
1106}
1107
1108static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1109 gfp_t priority)
1110{
1111 struct inet_connection_sock *icsk = inet_csk(sk);
1112 struct mptcp_subflow_context *ctx;
1113
1114 ctx = kzalloc(sizeof(*ctx), priority);
1115 if (!ctx)
1116 return NULL;
1117
1118 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001119 INIT_LIST_HEAD(&ctx->node);
Peter Krystad2303f992020-01-21 16:56:17 -08001120
1121 pr_debug("subflow=%p", ctx);
1122
1123 ctx->tcp_sock = sk;
1124
1125 return ctx;
1126}
1127
Mat Martineau648ef4b2020-01-21 16:56:24 -08001128static void __subflow_state_change(struct sock *sk)
1129{
1130 struct socket_wq *wq;
1131
1132 rcu_read_lock();
1133 wq = rcu_dereference(sk->sk_wq);
1134 if (skwq_has_sleeper(wq))
1135 wake_up_interruptible_all(&wq->wait);
1136 rcu_read_unlock();
1137}
1138
1139static bool subflow_is_done(const struct sock *sk)
1140{
1141 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1142}
1143
1144static void subflow_state_change(struct sock *sk)
1145{
1146 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +01001147 struct sock *parent = subflow->conn;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001148
1149 __subflow_state_change(sk);
1150
Davide Caratti8fd73802020-06-29 22:26:21 +02001151 if (subflow_simultaneous_connect(sk)) {
1152 mptcp_do_fallback(sk);
Florian Westphala6b118f2020-06-30 21:24:45 +02001153 mptcp_rcv_space_init(mptcp_sk(parent), sk);
Davide Caratti8fd73802020-06-29 22:26:21 +02001154 pr_fallback(mptcp_sk(parent));
1155 subflow->conn_finished = 1;
1156 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1157 inet_sk_state_store(parent, TCP_ESTABLISHED);
1158 parent->sk_state_change(parent);
1159 }
1160 }
1161
Mat Martineau648ef4b2020-01-21 16:56:24 -08001162 /* as recvmsg() does not acquire the subflow socket for ssk selection
1163 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1164 * the data available machinery here.
1165 */
Davide Carattie1ff9e82020-06-29 22:26:20 +02001166 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001167 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001168
Mat Martineau067a0b32020-07-28 15:12:07 -07001169 if (__mptcp_check_fallback(mptcp_sk(parent)) &&
1170 !(parent->sk_shutdown & RCV_SHUTDOWN) &&
Mat Martineau648ef4b2020-01-21 16:56:24 -08001171 !subflow->rx_eof && subflow_is_done(sk)) {
1172 subflow->rx_eof = 1;
Florian Westphal59832e22020-04-02 13:44:52 +02001173 mptcp_subflow_eof(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001174 }
1175}
1176
Peter Krystad2303f992020-01-21 16:56:17 -08001177static int subflow_ulp_init(struct sock *sk)
1178{
Peter Krystadcec37a62020-01-21 16:56:18 -08001179 struct inet_connection_sock *icsk = inet_csk(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001180 struct mptcp_subflow_context *ctx;
1181 struct tcp_sock *tp = tcp_sk(sk);
1182 int err = 0;
1183
1184 /* disallow attaching ULP to a socket unless it has been
1185 * created with sock_create_kern()
1186 */
1187 if (!sk->sk_kern_sock) {
1188 err = -EOPNOTSUPP;
1189 goto out;
1190 }
1191
1192 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1193 if (!ctx) {
1194 err = -ENOMEM;
1195 goto out;
1196 }
1197
1198 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1199
1200 tp->is_mptcp = 1;
Peter Krystadcec37a62020-01-21 16:56:18 -08001201 ctx->icsk_af_ops = icsk->icsk_af_ops;
1202 icsk->icsk_af_ops = subflow_default_af_ops(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001203 ctx->tcp_data_ready = sk->sk_data_ready;
1204 ctx->tcp_state_change = sk->sk_state_change;
1205 ctx->tcp_write_space = sk->sk_write_space;
1206 sk->sk_data_ready = subflow_data_ready;
1207 sk->sk_write_space = subflow_write_space;
1208 sk->sk_state_change = subflow_state_change;
Peter Krystad2303f992020-01-21 16:56:17 -08001209out:
1210 return err;
1211}
1212
1213static void subflow_ulp_release(struct sock *sk)
1214{
1215 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1216
1217 if (!ctx)
1218 return;
1219
Peter Krystad79c09492020-01-21 16:56:20 -08001220 if (ctx->conn)
1221 sock_put(ctx->conn);
1222
Peter Krystad2303f992020-01-21 16:56:17 -08001223 kfree_rcu(ctx, rcu);
1224}
1225
Peter Krystadcec37a62020-01-21 16:56:18 -08001226static void subflow_ulp_clone(const struct request_sock *req,
1227 struct sock *newsk,
1228 const gfp_t priority)
1229{
1230 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1231 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1232 struct mptcp_subflow_context *new_ctx;
1233
Peter Krystadf2962342020-03-27 14:48:39 -07001234 if (!tcp_rsk(req)->is_mptcp ||
1235 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001236 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001237 return;
1238 }
1239
1240 new_ctx = subflow_create_ctx(newsk, priority);
Mat Martineauedc7e482020-01-24 16:04:03 -08001241 if (!new_ctx) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001242 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001243 return;
1244 }
1245
1246 new_ctx->conn_finished = 1;
1247 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001248 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1249 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1250 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
Paolo Abeni58b09912020-03-13 16:52:41 +01001251 new_ctx->rel_write_seq = 1;
1252 new_ctx->tcp_sock = newsk;
1253
Peter Krystadf2962342020-03-27 14:48:39 -07001254 if (subflow_req->mp_capable) {
1255 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1256 * is fully established only after we receive the remote key
1257 */
1258 new_ctx->mp_capable = 1;
Peter Krystadf2962342020-03-27 14:48:39 -07001259 new_ctx->local_key = subflow_req->local_key;
1260 new_ctx->token = subflow_req->token;
1261 new_ctx->ssn_offset = subflow_req->ssn_offset;
1262 new_ctx->idsn = subflow_req->idsn;
1263 } else if (subflow_req->mp_join) {
Peter Krystadec3edaa2020-03-27 14:48:40 -07001264 new_ctx->ssn_offset = subflow_req->ssn_offset;
Peter Krystadf2962342020-03-27 14:48:39 -07001265 new_ctx->mp_join = 1;
1266 new_ctx->fully_established = 1;
1267 new_ctx->backup = subflow_req->backup;
1268 new_ctx->local_id = subflow_req->local_id;
1269 new_ctx->token = subflow_req->token;
1270 new_ctx->thmac = subflow_req->thmac;
1271 }
Peter Krystadcec37a62020-01-21 16:56:18 -08001272}
1273
Peter Krystad2303f992020-01-21 16:56:17 -08001274static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1275 .name = "mptcp",
1276 .owner = THIS_MODULE,
1277 .init = subflow_ulp_init,
1278 .release = subflow_ulp_release,
Peter Krystadcec37a62020-01-21 16:56:18 -08001279 .clone = subflow_ulp_clone,
Peter Krystad2303f992020-01-21 16:56:17 -08001280};
1281
Peter Krystadcec37a62020-01-21 16:56:18 -08001282static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1283{
1284 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1285 subflow_ops->slab_name = "request_sock_subflow";
1286
1287 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1288 subflow_ops->obj_size, 0,
1289 SLAB_ACCOUNT |
1290 SLAB_TYPESAFE_BY_RCU,
1291 NULL);
1292 if (!subflow_ops->slab)
1293 return -ENOMEM;
1294
Peter Krystad79c09492020-01-21 16:56:20 -08001295 subflow_ops->destructor = subflow_req_destructor;
1296
Peter Krystadcec37a62020-01-21 16:56:18 -08001297 return 0;
1298}
1299
Paolo Abenid39dcec2020-06-26 19:29:59 +02001300void __init mptcp_subflow_init(void)
Peter Krystad2303f992020-01-21 16:56:17 -08001301{
Peter Krystadcec37a62020-01-21 16:56:18 -08001302 subflow_request_sock_ops = tcp_request_sock_ops;
1303 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1304 panic("MPTCP: failed to init subflow request sock ops\n");
1305
1306 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1307 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1308
1309 subflow_specific = ipv4_specific;
1310 subflow_specific.conn_request = subflow_v4_conn_request;
1311 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1312 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1313
1314#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1315 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1316 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1317
1318 subflow_v6_specific = ipv6_specific;
1319 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1320 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1321 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1322
1323 subflow_v6m_specific = subflow_v6_specific;
1324 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1325 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1326 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1327 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1328 subflow_v6m_specific.net_frag_header_len = 0;
1329#endif
1330
Davide Caratti5147dfb2020-03-27 14:48:49 -07001331 mptcp_diag_subflow_init(&subflow_ulp_ops);
1332
Peter Krystad2303f992020-01-21 16:56:17 -08001333 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1334 panic("MPTCP: failed to register subflows to ULP\n");
1335}