blob: 3838a0b3a21ffd122c358dc3700ba2fc48547433 [file] [log] [blame]
Peter Krystad2303f992020-01-21 16:56:17 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
Peter Krystad79c09492020-01-21 16:56:20 -08007#define pr_fmt(fmt) "MPTCP: " fmt
8
Peter Krystad2303f992020-01-21 16:56:17 -08009#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Peter Krystadf2962342020-03-27 14:48:39 -070012#include <crypto/algapi.h>
Todd Malsbarybd697222020-05-21 19:10:49 -070013#include <crypto/sha.h>
Peter Krystad2303f992020-01-21 16:56:17 -080014#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
Peter Krystadcec37a62020-01-21 16:56:18 -080019#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
Peter Krystad2303f992020-01-21 16:56:17 -080022#include <net/mptcp.h>
23#include "protocol.h"
Florian Westphalfc518952020-03-27 14:48:50 -070024#include "mib.h"
25
26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 enum linux_mptcp_mib_field field)
28{
29 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30}
Peter Krystad2303f992020-01-21 16:56:17 -080031
Peter Krystad79c09492020-01-21 16:56:20 -080032static int subflow_rebuild_header(struct sock *sk)
33{
34 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070035 int local_id, err = 0;
Peter Krystad79c09492020-01-21 16:56:20 -080036
37 if (subflow->request_mptcp && !subflow->token) {
38 pr_debug("subflow=%p", sk);
39 err = mptcp_token_new_connect(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070040 } else if (subflow->request_join && !subflow->local_nonce) {
41 struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
42
43 pr_debug("subflow=%p", sk);
44
45 do {
46 get_random_bytes(&subflow->local_nonce, sizeof(u32));
47 } while (!subflow->local_nonce);
48
49 if (subflow->local_id)
50 goto out;
51
52 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
53 if (local_id < 0)
54 return -EINVAL;
55
56 subflow->local_id = local_id;
Peter Krystad79c09492020-01-21 16:56:20 -080057 }
58
Peter Krystadec3edaa2020-03-27 14:48:40 -070059out:
Peter Krystad79c09492020-01-21 16:56:20 -080060 if (err)
61 return err;
62
63 return subflow->icsk_af_ops->rebuild_header(sk);
64}
65
66static void subflow_req_destructor(struct request_sock *req)
67{
68 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
69
70 pr_debug("subflow_req=%p", subflow_req);
71
Paolo Abeni8fd4de12020-06-17 12:08:56 +020072 if (subflow_req->msk)
73 sock_put((struct sock *)subflow_req->msk);
74
Peter Krystad79c09492020-01-21 16:56:20 -080075 if (subflow_req->mp_capable)
76 mptcp_token_destroy_request(subflow_req->token);
77 tcp_request_sock_ops.destructor(req);
78}
79
Peter Krystadf2962342020-03-27 14:48:39 -070080static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
81 void *hmac)
82{
83 u8 msg[8];
84
85 put_unaligned_be32(nonce1, &msg[0]);
86 put_unaligned_be32(nonce2, &msg[4]);
87
88 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
89}
90
91/* validate received token and create truncated hmac and nonce for SYN-ACK */
Paolo Abeni8fd4de12020-06-17 12:08:56 +020092static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
93 const struct sk_buff *skb)
Peter Krystadf2962342020-03-27 14:48:39 -070094{
95 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Todd Malsbarybd697222020-05-21 19:10:49 -070096 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -070097 struct mptcp_sock *msk;
98 int local_id;
99
100 msk = mptcp_token_get_sock(subflow_req->token);
101 if (!msk) {
Florian Westphalfc518952020-03-27 14:48:50 -0700102 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200103 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -0700104 }
105
106 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
107 if (local_id < 0) {
108 sock_put((struct sock *)msk);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200109 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -0700110 }
111 subflow_req->local_id = local_id;
112
113 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
114
115 subflow_generate_hmac(msk->local_key, msk->remote_key,
116 subflow_req->local_nonce,
117 subflow_req->remote_nonce, hmac);
118
119 subflow_req->thmac = get_unaligned_be64(hmac);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200120 return msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700121}
122
Peter Krystadcec37a62020-01-21 16:56:18 -0800123static void subflow_init_req(struct request_sock *req,
124 const struct sock *sk_listener,
125 struct sk_buff *skb)
126{
127 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
128 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Paolo Abenicfde1412020-04-30 15:01:52 +0200129 struct mptcp_options_received mp_opt;
Peter Krystadcec37a62020-01-21 16:56:18 -0800130
131 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
132
Paolo Abenicfde1412020-04-30 15:01:52 +0200133 mptcp_get_options(skb, &mp_opt);
Peter Krystadcec37a62020-01-21 16:56:18 -0800134
135 subflow_req->mp_capable = 0;
Peter Krystadf2962342020-03-27 14:48:39 -0700136 subflow_req->mp_join = 0;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200137 subflow_req->msk = NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800138
139#ifdef CONFIG_TCP_MD5SIG
140 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
141 * TCP option space.
142 */
143 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
144 return;
145#endif
146
Paolo Abenicfde1412020-04-30 15:01:52 +0200147 if (mp_opt.mp_capable) {
Florian Westphalfc518952020-03-27 14:48:50 -0700148 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
149
Paolo Abenicfde1412020-04-30 15:01:52 +0200150 if (mp_opt.mp_join)
Florian Westphalfc518952020-03-27 14:48:50 -0700151 return;
Paolo Abenicfde1412020-04-30 15:01:52 +0200152 } else if (mp_opt.mp_join) {
Florian Westphalfc518952020-03-27 14:48:50 -0700153 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
154 }
Peter Krystadf2962342020-03-27 14:48:39 -0700155
Paolo Abenicfde1412020-04-30 15:01:52 +0200156 if (mp_opt.mp_capable && listener->request_mptcp) {
Peter Krystad79c09492020-01-21 16:56:20 -0800157 int err;
158
159 err = mptcp_token_new_request(req);
160 if (err == 0)
161 subflow_req->mp_capable = 1;
162
Mat Martineau648ef4b2020-01-21 16:56:24 -0800163 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Paolo Abenicfde1412020-04-30 15:01:52 +0200164 } else if (mp_opt.mp_join && listener->request_mptcp) {
Peter Krystadec3edaa2020-03-27 14:48:40 -0700165 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700166 subflow_req->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200167 subflow_req->backup = mp_opt.backup;
168 subflow_req->remote_id = mp_opt.join_id;
169 subflow_req->token = mp_opt.token;
170 subflow_req->remote_nonce = mp_opt.nonce;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200171 subflow_req->msk = subflow_token_join_request(req, skb);
172 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
173 subflow_req->remote_nonce, subflow_req->msk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800174 }
175}
176
177static void subflow_v4_init_req(struct request_sock *req,
178 const struct sock *sk_listener,
179 struct sk_buff *skb)
180{
181 tcp_rsk(req)->is_mptcp = 1;
182
183 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
184
185 subflow_init_req(req, sk_listener, skb);
186}
187
188#if IS_ENABLED(CONFIG_MPTCP_IPV6)
189static void subflow_v6_init_req(struct request_sock *req,
190 const struct sock *sk_listener,
191 struct sk_buff *skb)
192{
193 tcp_rsk(req)->is_mptcp = 1;
194
195 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
196
197 subflow_init_req(req, sk_listener, skb);
198}
199#endif
200
Peter Krystadec3edaa2020-03-27 14:48:40 -0700201/* validate received truncated hmac and create hmac for third ACK */
202static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
203{
Todd Malsbarybd697222020-05-21 19:10:49 -0700204 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadec3edaa2020-03-27 14:48:40 -0700205 u64 thmac;
206
207 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
208 subflow->remote_nonce, subflow->local_nonce,
209 hmac);
210
211 thmac = get_unaligned_be64(hmac);
212 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
213 subflow, subflow->token,
214 (unsigned long long)thmac,
215 (unsigned long long)subflow->thmac);
216
217 return thmac == subflow->thmac;
218}
219
Peter Krystadcec37a62020-01-21 16:56:18 -0800220static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
221{
222 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenicfde1412020-04-30 15:01:52 +0200223 struct mptcp_options_received mp_opt;
Davide Carattic3c123d2020-03-19 22:45:37 +0100224 struct sock *parent = subflow->conn;
Paolo Abeni263e1202020-04-30 15:01:51 +0200225 struct tcp_sock *tp = tcp_sk(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800226
227 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
228
Paolo Abeni12008322020-04-24 13:15:21 +0200229 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
Davide Carattic3c123d2020-03-19 22:45:37 +0100230 inet_sk_state_store(parent, TCP_ESTABLISHED);
231 parent->sk_state_change(parent);
232 }
233
Paolo Abeni263e1202020-04-30 15:01:51 +0200234 /* be sure no special action on any packet other than syn-ack */
235 if (subflow->conn_finished)
236 return;
237
238 subflow->conn_finished = 1;
239
Paolo Abenicfde1412020-04-30 15:01:52 +0200240 mptcp_get_options(skb, &mp_opt);
241 if (subflow->request_mptcp && mp_opt.mp_capable) {
Paolo Abeni263e1202020-04-30 15:01:51 +0200242 subflow->mp_capable = 1;
243 subflow->can_ack = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200244 subflow->remote_key = mp_opt.sndr_key;
Paolo Abeni263e1202020-04-30 15:01:51 +0200245 pr_debug("subflow=%p, remote_key=%llu", subflow,
246 subflow->remote_key);
Paolo Abenicfde1412020-04-30 15:01:52 +0200247 } else if (subflow->request_join && mp_opt.mp_join) {
Paolo Abeni263e1202020-04-30 15:01:51 +0200248 subflow->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200249 subflow->thmac = mp_opt.thmac;
250 subflow->remote_nonce = mp_opt.nonce;
Paolo Abeni263e1202020-04-30 15:01:51 +0200251 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
252 subflow->thmac, subflow->remote_nonce);
253 } else if (subflow->request_mptcp) {
254 tp->is_mptcp = 0;
255 }
256
257 if (!tp->is_mptcp)
Peter Krystadec3edaa2020-03-27 14:48:40 -0700258 return;
259
260 if (subflow->mp_capable) {
Peter Krystadcec37a62020-01-21 16:56:18 -0800261 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
262 subflow->remote_key);
263 mptcp_finish_connect(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800264
265 if (skb) {
266 pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
267 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
268 }
Peter Krystadec3edaa2020-03-27 14:48:40 -0700269 } else if (subflow->mp_join) {
Todd Malsbarybd697222020-05-21 19:10:49 -0700270 u8 hmac[SHA256_DIGEST_SIZE];
271
Peter Krystadec3edaa2020-03-27 14:48:40 -0700272 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
273 subflow, subflow->thmac,
274 subflow->remote_nonce);
275 if (!subflow_thmac_valid(subflow)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700276 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700277 subflow->mp_join = 0;
278 goto do_reset;
279 }
280
281 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
282 subflow->local_nonce,
283 subflow->remote_nonce,
Todd Malsbarybd697222020-05-21 19:10:49 -0700284 hmac);
285
286 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700287
288 if (skb)
289 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
290
291 if (!mptcp_finish_join(sk))
292 goto do_reset;
293
Florian Westphalfc518952020-03-27 14:48:50 -0700294 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700295 } else {
296do_reset:
297 tcp_send_active_reset(sk, GFP_ATOMIC);
298 tcp_done(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800299 }
300}
301
302static struct request_sock_ops subflow_request_sock_ops;
303static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
304
305static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
306{
307 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
308
309 pr_debug("subflow=%p", subflow);
310
311 /* Never answer to SYNs sent to broadcast or multicast */
312 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
313 goto drop;
314
315 return tcp_conn_request(&subflow_request_sock_ops,
316 &subflow_request_sock_ipv4_ops,
317 sk, skb);
318drop:
319 tcp_listendrop(sk);
320 return 0;
321}
322
323#if IS_ENABLED(CONFIG_MPTCP_IPV6)
324static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
325static struct inet_connection_sock_af_ops subflow_v6_specific;
326static struct inet_connection_sock_af_ops subflow_v6m_specific;
327
328static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
329{
330 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
331
332 pr_debug("subflow=%p", subflow);
333
334 if (skb->protocol == htons(ETH_P_IP))
335 return subflow_v4_conn_request(sk, skb);
336
337 if (!ipv6_unicast_destination(skb))
338 goto drop;
339
340 return tcp_conn_request(&subflow_request_sock_ops,
341 &subflow_request_sock_ipv6_ops, sk, skb);
342
343drop:
344 tcp_listendrop(sk);
345 return 0; /* don't send reset */
346}
347#endif
348
Peter Krystadf2962342020-03-27 14:48:39 -0700349/* validate hmac received in third ACK */
350static bool subflow_hmac_valid(const struct request_sock *req,
Paolo Abenicfde1412020-04-30 15:01:52 +0200351 const struct mptcp_options_received *mp_opt)
Peter Krystadf2962342020-03-27 14:48:39 -0700352{
353 const struct mptcp_subflow_request_sock *subflow_req;
Todd Malsbarybd697222020-05-21 19:10:49 -0700354 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -0700355 struct mptcp_sock *msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700356
357 subflow_req = mptcp_subflow_rsk(req);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200358 msk = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700359 if (!msk)
360 return false;
361
362 subflow_generate_hmac(msk->remote_key, msk->local_key,
363 subflow_req->remote_nonce,
364 subflow_req->local_nonce, hmac);
365
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200366 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadf2962342020-03-27 14:48:39 -0700367}
368
Florian Westphaldf1036d2020-04-17 09:28:22 +0200369static void mptcp_sock_destruct(struct sock *sk)
370{
371 /* if new mptcp socket isn't accepted, it is free'd
372 * from the tcp listener sockets request queue, linked
373 * from req->sk. The tcp socket is released.
374 * This calls the ULP release function which will
375 * also remove the mptcp socket, via
376 * sock_put(ctx->conn).
377 *
378 * Problem is that the mptcp socket will not be in
379 * SYN_RECV state and doesn't have SOCK_DEAD flag.
380 * Both result in warnings from inet_sock_destruct.
381 */
382
383 if (sk->sk_state == TCP_SYN_RECV) {
384 sk->sk_state = TCP_CLOSE;
385 WARN_ON_ONCE(sk->sk_socket);
386 sock_orphan(sk);
387 }
388
Paolo Abeni4b5af442020-06-10 10:49:00 +0200389 mptcp_token_destroy(mptcp_sk(sk)->token);
Florian Westphaldf1036d2020-04-17 09:28:22 +0200390 inet_sock_destruct(sk);
391}
392
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200393static void mptcp_force_close(struct sock *sk)
394{
395 inet_sk_state_store(sk, TCP_CLOSE);
396 sk_common_release(sk);
397}
398
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200399static void subflow_ulp_fallback(struct sock *sk,
400 struct mptcp_subflow_context *old_ctx)
401{
402 struct inet_connection_sock *icsk = inet_csk(sk);
403
404 mptcp_subflow_tcp_fallback(sk, old_ctx);
405 icsk->icsk_ulp_ops = NULL;
406 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
407 tcp_sk(sk)->is_mptcp = 0;
408}
409
Paolo Abeni39884602020-05-29 17:49:18 +0200410static void subflow_drop_ctx(struct sock *ssk)
411{
412 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
413
414 if (!ctx)
415 return;
416
417 subflow_ulp_fallback(ssk, ctx);
418 if (ctx->conn)
419 sock_put(ctx->conn);
420
421 kfree_rcu(ctx, rcu);
422}
423
Peter Krystadcec37a62020-01-21 16:56:18 -0800424static struct sock *subflow_syn_recv_sock(const struct sock *sk,
425 struct sk_buff *skb,
426 struct request_sock *req,
427 struct dst_entry *dst,
428 struct request_sock *req_unhash,
429 bool *own_req)
430{
431 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800432 struct mptcp_subflow_request_sock *subflow_req;
Paolo Abenicfde1412020-04-30 15:01:52 +0200433 struct mptcp_options_received mp_opt;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200434 bool fallback, fallback_is_fatal;
Paolo Abeni58b09912020-03-13 16:52:41 +0100435 struct sock *new_msk = NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800436 struct sock *child;
437
438 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
439
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200440 /* After child creation we must look for 'mp_capable' even when options
441 * are not parsed
Paolo Abenicfde1412020-04-30 15:01:52 +0200442 */
443 mp_opt.mp_capable = 0;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200444
445 /* hopefully temporary handling for MP_JOIN+syncookie */
446 subflow_req = mptcp_subflow_rsk(req);
447 fallback_is_fatal = subflow_req->mp_join;
448 fallback = !tcp_rsk(req)->is_mptcp;
449 if (fallback)
Florian Westphalae2dd712020-01-29 15:54:46 +0100450 goto create_child;
451
Christoph Paaschd22f4982020-01-21 16:56:32 -0800452 /* if the sk is MP_CAPABLE, we try to fetch the client key */
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800453 if (subflow_req->mp_capable) {
Christoph Paaschd22f4982020-01-21 16:56:32 -0800454 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
455 /* here we can receive and accept an in-window,
456 * out-of-order pkt, which will not carry the MP_CAPABLE
457 * opt even on mptcp enabled paths
458 */
Paolo Abeni58b09912020-03-13 16:52:41 +0100459 goto create_msk;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800460 }
461
Paolo Abenicfde1412020-04-30 15:01:52 +0200462 mptcp_get_options(skb, &mp_opt);
463 if (!mp_opt.mp_capable) {
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200464 fallback = true;
Paolo Abeni58b09912020-03-13 16:52:41 +0100465 goto create_child;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800466 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100467
468create_msk:
Paolo Abenicfde1412020-04-30 15:01:52 +0200469 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
Paolo Abeni58b09912020-03-13 16:52:41 +0100470 if (!new_msk)
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200471 fallback = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700472 } else if (subflow_req->mp_join) {
Paolo Abenicfde1412020-04-30 15:01:52 +0200473 mptcp_get_options(skb, &mp_opt);
474 if (!mp_opt.mp_join ||
475 !subflow_hmac_valid(req, &mp_opt)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700476 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200477 fallback = true;
Florian Westphalfc518952020-03-27 14:48:50 -0700478 }
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800479 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800480
Christoph Paaschd22f4982020-01-21 16:56:32 -0800481create_child:
Peter Krystadcec37a62020-01-21 16:56:18 -0800482 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
483 req_unhash, own_req);
484
485 if (child && *own_req) {
Peter Krystad79c09492020-01-21 16:56:20 -0800486 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
487
Paolo Abeni90bf4512020-05-15 19:22:15 +0200488 tcp_rsk(req)->drop_req = false;
489
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200490 /* we need to fallback on ctx allocation failure and on pre-reqs
491 * checking above. In the latter scenario we additionally need
492 * to reset the context to non MPTCP status.
Peter Krystad79c09492020-01-21 16:56:20 -0800493 */
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200494 if (!ctx || fallback) {
Peter Krystadf2962342020-03-27 14:48:39 -0700495 if (fallback_is_fatal)
Paolo Abeni729cd642020-05-15 19:22:17 +0200496 goto dispose_child;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200497
Paolo Abeni39884602020-05-29 17:49:18 +0200498 subflow_drop_ctx(child);
Paolo Abeni58b09912020-03-13 16:52:41 +0100499 goto out;
Peter Krystadf2962342020-03-27 14:48:39 -0700500 }
Peter Krystad79c09492020-01-21 16:56:20 -0800501
502 if (ctx->mp_capable) {
Paolo Abeni58b09912020-03-13 16:52:41 +0100503 /* new mpc subflow takes ownership of the newly
504 * created mptcp socket
505 */
Florian Westphaldf1036d2020-04-17 09:28:22 +0200506 new_msk->sk_destruct = mptcp_sock_destruct;
Peter Krystad1b1c7a02020-03-27 14:48:38 -0700507 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
Paolo Abeni58b09912020-03-13 16:52:41 +0100508 ctx->conn = new_msk;
509 new_msk = NULL;
Paolo Abenifca5c822020-04-20 16:25:06 +0200510
511 /* with OoO packets we can reach here without ingress
512 * mpc option
513 */
Paolo Abenicfde1412020-04-30 15:01:52 +0200514 ctx->remote_key = mp_opt.sndr_key;
515 ctx->fully_established = mp_opt.mp_capable;
516 ctx->can_ack = mp_opt.mp_capable;
Peter Krystadf2962342020-03-27 14:48:39 -0700517 } else if (ctx->mp_join) {
518 struct mptcp_sock *owner;
519
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200520 owner = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700521 if (!owner)
Paolo Abeni729cd642020-05-15 19:22:17 +0200522 goto dispose_child;
Peter Krystadf2962342020-03-27 14:48:39 -0700523
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200524 /* move the msk reference ownership to the subflow */
525 subflow_req->msk = NULL;
Peter Krystadf2962342020-03-27 14:48:39 -0700526 ctx->conn = (struct sock *)owner;
527 if (!mptcp_finish_join(child))
Paolo Abeni729cd642020-05-15 19:22:17 +0200528 goto dispose_child;
Florian Westphalfc518952020-03-27 14:48:50 -0700529
530 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
Paolo Abeni90bf4512020-05-15 19:22:15 +0200531 tcp_rsk(req)->drop_req = true;
Peter Krystadcec37a62020-01-21 16:56:18 -0800532 }
533 }
534
Paolo Abeni58b09912020-03-13 16:52:41 +0100535out:
536 /* dispose of the left over mptcp master, if any */
537 if (unlikely(new_msk))
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200538 mptcp_force_close(new_msk);
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200539
540 /* check for expected invariant - should never trigger, just help
541 * catching eariler subtle bugs
542 */
Paolo Abeniac2b47f2020-04-30 15:03:22 +0200543 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200544 (!mptcp_subflow_ctx(child) ||
545 !mptcp_subflow_ctx(child)->conn));
Peter Krystadcec37a62020-01-21 16:56:18 -0800546 return child;
Peter Krystadf2962342020-03-27 14:48:39 -0700547
Paolo Abeni729cd642020-05-15 19:22:17 +0200548dispose_child:
Paolo Abeni39884602020-05-29 17:49:18 +0200549 subflow_drop_ctx(child);
Paolo Abeni729cd642020-05-15 19:22:17 +0200550 tcp_rsk(req)->drop_req = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700551 tcp_send_active_reset(child, GFP_ATOMIC);
Paolo Abeni729cd642020-05-15 19:22:17 +0200552 inet_csk_prepare_for_destroy_sock(child);
Peter Krystadf2962342020-03-27 14:48:39 -0700553 tcp_done(child);
Paolo Abeni729cd642020-05-15 19:22:17 +0200554
555 /* The last child reference will be released by the caller */
556 return child;
Peter Krystadcec37a62020-01-21 16:56:18 -0800557}
558
559static struct inet_connection_sock_af_ops subflow_specific;
560
Mat Martineau648ef4b2020-01-21 16:56:24 -0800561enum mapping_status {
562 MAPPING_OK,
563 MAPPING_INVALID,
564 MAPPING_EMPTY,
565 MAPPING_DATA_FIN
566};
567
568static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
569{
570 if ((u32)seq == (u32)old_seq)
571 return old_seq;
572
573 /* Assume map covers data not mapped yet. */
574 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
575}
576
577static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
578{
579 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
580 ssn, subflow->map_subflow_seq, subflow->map_data_len);
581}
582
583static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
584{
585 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
586 unsigned int skb_consumed;
587
588 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
589 if (WARN_ON_ONCE(skb_consumed >= skb->len))
590 return true;
591
592 return skb->len - skb_consumed <= subflow->map_data_len -
593 mptcp_subflow_get_map_offset(subflow);
594}
595
596static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
597{
598 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
599 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
600
601 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
602 /* Mapping covers data later in the subflow stream,
603 * currently unsupported.
604 */
605 warn_bad_map(subflow, ssn);
606 return false;
607 }
608 if (unlikely(!before(ssn, subflow->map_subflow_seq +
609 subflow->map_data_len))) {
610 /* Mapping does covers past subflow data, invalid */
611 warn_bad_map(subflow, ssn + skb->len);
612 return false;
613 }
614 return true;
615}
616
617static enum mapping_status get_mapping_status(struct sock *ssk)
618{
619 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
620 struct mptcp_ext *mpext;
621 struct sk_buff *skb;
622 u16 data_len;
623 u64 map_seq;
624
625 skb = skb_peek(&ssk->sk_receive_queue);
626 if (!skb)
627 return MAPPING_EMPTY;
628
629 mpext = mptcp_get_ext(skb);
630 if (!mpext || !mpext->use_map) {
631 if (!subflow->map_valid && !skb->len) {
632 /* the TCP stack deliver 0 len FIN pkt to the receive
633 * queue, that is the only 0len pkts ever expected here,
634 * and we can admit no mapping only for 0 len pkts
635 */
636 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
637 WARN_ONCE(1, "0len seq %d:%d flags %x",
638 TCP_SKB_CB(skb)->seq,
639 TCP_SKB_CB(skb)->end_seq,
640 TCP_SKB_CB(skb)->tcp_flags);
641 sk_eat_skb(ssk, skb);
642 return MAPPING_EMPTY;
643 }
644
645 if (!subflow->map_valid)
646 return MAPPING_INVALID;
647
648 goto validate_seq;
649 }
650
651 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
652 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
653 mpext->data_len, mpext->data_fin);
654
655 data_len = mpext->data_len;
656 if (data_len == 0) {
657 pr_err("Infinite mapping not handled");
Florian Westphalfc518952020-03-27 14:48:50 -0700658 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800659 return MAPPING_INVALID;
660 }
661
662 if (mpext->data_fin == 1) {
663 if (data_len == 1) {
664 pr_debug("DATA_FIN with no payload");
665 if (subflow->map_valid) {
666 /* A DATA_FIN might arrive in a DSS
667 * option before the previous mapping
668 * has been fully consumed. Continue
669 * handling the existing mapping.
670 */
671 skb_ext_del(skb, SKB_EXT_MPTCP);
672 return MAPPING_OK;
673 } else {
674 return MAPPING_DATA_FIN;
675 }
676 }
677
678 /* Adjust for DATA_FIN using 1 byte of sequence space */
679 data_len--;
680 }
681
682 if (!mpext->dsn64) {
683 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
684 mpext->data_seq);
Christoph Paascha0c1d0e2020-05-14 08:53:03 -0700685 subflow->use_64bit_ack = 0;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800686 pr_debug("expanded seq=%llu", subflow->map_seq);
687 } else {
688 map_seq = mpext->data_seq;
Christoph Paascha0c1d0e2020-05-14 08:53:03 -0700689 subflow->use_64bit_ack = 1;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800690 }
691
692 if (subflow->map_valid) {
693 /* Allow replacing only with an identical map */
694 if (subflow->map_seq == map_seq &&
695 subflow->map_subflow_seq == mpext->subflow_seq &&
696 subflow->map_data_len == data_len) {
697 skb_ext_del(skb, SKB_EXT_MPTCP);
698 return MAPPING_OK;
699 }
700
701 /* If this skb data are fully covered by the current mapping,
702 * the new map would need caching, which is not supported
703 */
Florian Westphalfc518952020-03-27 14:48:50 -0700704 if (skb_is_fully_mapped(ssk, skb)) {
705 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800706 return MAPPING_INVALID;
Florian Westphalfc518952020-03-27 14:48:50 -0700707 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800708
709 /* will validate the next map after consuming the current one */
710 return MAPPING_OK;
711 }
712
713 subflow->map_seq = map_seq;
714 subflow->map_subflow_seq = mpext->subflow_seq;
715 subflow->map_data_len = data_len;
716 subflow->map_valid = 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800717 subflow->mpc_map = mpext->mpc_map;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800718 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
719 subflow->map_seq, subflow->map_subflow_seq,
720 subflow->map_data_len);
721
722validate_seq:
723 /* we revalidate valid mapping on new skb, because we must ensure
724 * the current skb is completely covered by the available mapping
725 */
726 if (!validate_mapping(ssk, skb))
727 return MAPPING_INVALID;
728
729 skb_ext_del(skb, SKB_EXT_MPTCP);
730 return MAPPING_OK;
731}
732
Florian Westphalbfae9da2020-02-26 10:14:50 +0100733static int subflow_read_actor(read_descriptor_t *desc,
734 struct sk_buff *skb,
735 unsigned int offset, size_t len)
736{
737 size_t copy_len = min(desc->count, len);
738
739 desc->count -= copy_len;
740
741 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
742 return copy_len;
743}
744
Mat Martineau648ef4b2020-01-21 16:56:24 -0800745static bool subflow_check_data_avail(struct sock *ssk)
746{
747 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
748 enum mapping_status status;
749 struct mptcp_sock *msk;
750 struct sk_buff *skb;
751
752 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
753 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
754 if (subflow->data_avail)
755 return true;
756
Mat Martineau648ef4b2020-01-21 16:56:24 -0800757 msk = mptcp_sk(subflow->conn);
758 for (;;) {
759 u32 map_remaining;
760 size_t delta;
761 u64 ack_seq;
762 u64 old_ack;
763
764 status = get_mapping_status(ssk);
765 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
766 if (status == MAPPING_INVALID) {
767 ssk->sk_err = EBADMSG;
768 goto fatal;
769 }
770
771 if (status != MAPPING_OK)
772 return false;
773
774 skb = skb_peek(&ssk->sk_receive_queue);
775 if (WARN_ON_ONCE(!skb))
776 return false;
777
Christoph Paaschd22f4982020-01-21 16:56:32 -0800778 /* if msk lacks the remote key, this subflow must provide an
779 * MP_CAPABLE-based mapping
780 */
781 if (unlikely(!READ_ONCE(msk->can_ack))) {
782 if (!subflow->mpc_map) {
783 ssk->sk_err = EBADMSG;
784 goto fatal;
785 }
786 WRITE_ONCE(msk->remote_key, subflow->remote_key);
787 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
788 WRITE_ONCE(msk->can_ack, true);
789 }
790
Mat Martineau648ef4b2020-01-21 16:56:24 -0800791 old_ack = READ_ONCE(msk->ack_seq);
792 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
793 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
794 ack_seq);
795 if (ack_seq == old_ack)
796 break;
797
798 /* only accept in-sequence mapping. Old values are spurious
799 * retransmission; we can hit "future" values on active backup
800 * subflow switch, we relay on retransmissions to get
801 * in-sequence data.
802 * Cuncurrent subflows support will require subflow data
803 * reordering
804 */
805 map_remaining = subflow->map_data_len -
806 mptcp_subflow_get_map_offset(subflow);
807 if (before64(ack_seq, old_ack))
808 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
809 else
810 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
811
812 /* discard mapped data */
813 pr_debug("discarding %zu bytes, current map len=%d", delta,
814 map_remaining);
815 if (delta) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800816 read_descriptor_t desc = {
817 .count = delta,
Mat Martineau648ef4b2020-01-21 16:56:24 -0800818 };
819 int ret;
820
Florian Westphalbfae9da2020-02-26 10:14:50 +0100821 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800822 if (ret < 0) {
823 ssk->sk_err = -ret;
824 goto fatal;
825 }
826 if (ret < delta)
827 return false;
828 if (delta == map_remaining)
829 subflow->map_valid = 0;
830 }
831 }
832 return true;
833
834fatal:
835 /* fatal protocol error, close the socket */
836 /* This barrier is coupled with smp_rmb() in tcp_poll() */
837 smp_wmb();
838 ssk->sk_error_report(ssk);
839 tcp_set_state(ssk, TCP_CLOSE);
840 tcp_send_active_reset(ssk, GFP_ATOMIC);
841 return false;
842}
843
844bool mptcp_subflow_data_available(struct sock *sk)
845{
846 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
847 struct sk_buff *skb;
848
849 /* check if current mapping is still valid */
850 if (subflow->map_valid &&
851 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
852 subflow->map_valid = 0;
853 subflow->data_avail = 0;
854
855 pr_debug("Done with mapping: seq=%u data_len=%u",
856 subflow->map_subflow_seq,
857 subflow->map_data_len);
858 }
859
860 if (!subflow_check_data_avail(sk)) {
861 subflow->data_avail = 0;
862 return false;
863 }
864
865 skb = skb_peek(&sk->sk_receive_queue);
866 subflow->data_avail = skb &&
867 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
868 return subflow->data_avail;
869}
870
Florian Westphal071c8ed2020-04-24 12:31:50 +0200871/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
872 * not the ssk one.
873 *
874 * In mptcp, rwin is about the mptcp-level connection data.
875 *
876 * Data that is still on the ssk rx queue can thus be ignored,
877 * as far as mptcp peer is concerened that data is still inflight.
878 * DSS ACK is updated when skb is moved to the mptcp rx queue.
879 */
880void mptcp_space(const struct sock *ssk, int *space, int *full_space)
881{
882 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
883 const struct sock *sk = subflow->conn;
884
885 *space = tcp_space(sk);
886 *full_space = tcp_full_space(sk);
887}
888
Mat Martineau648ef4b2020-01-21 16:56:24 -0800889static void subflow_data_ready(struct sock *sk)
890{
891 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
892 struct sock *parent = subflow->conn;
893
Peter Krystadf2962342020-03-27 14:48:39 -0700894 if (!subflow->mp_capable && !subflow->mp_join) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800895 subflow->tcp_data_ready(sk);
896
Paolo Abenidc093db2020-03-13 16:52:42 +0100897 parent->sk_data_ready(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800898 return;
899 }
900
Florian Westphal101f6f82020-02-26 10:14:46 +0100901 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +0100902 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800903}
904
905static void subflow_write_space(struct sock *sk)
906{
907 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
908 struct sock *parent = subflow->conn;
909
910 sk_stream_write_space(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +0100911 if (sk_stream_is_writeable(sk)) {
Florian Westphal1891c4a2020-01-21 16:56:25 -0800912 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
913 smp_mb__after_atomic();
914 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
Mat Martineau648ef4b2020-01-21 16:56:24 -0800915 sk_stream_write_space(parent);
916 }
917}
918
Peter Krystadcec37a62020-01-21 16:56:18 -0800919static struct inet_connection_sock_af_ops *
920subflow_default_af_ops(struct sock *sk)
921{
922#if IS_ENABLED(CONFIG_MPTCP_IPV6)
923 if (sk->sk_family == AF_INET6)
924 return &subflow_v6_specific;
925#endif
926 return &subflow_specific;
927}
928
Peter Krystadcec37a62020-01-21 16:56:18 -0800929#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100930void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
931{
Peter Krystadcec37a62020-01-21 16:56:18 -0800932 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
933 struct inet_connection_sock *icsk = inet_csk(sk);
934 struct inet_connection_sock_af_ops *target;
935
936 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
937
938 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
Mat Martineauedc7e482020-01-24 16:04:03 -0800939 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
Peter Krystadcec37a62020-01-21 16:56:18 -0800940
941 if (likely(icsk->icsk_af_ops == target))
942 return;
943
944 subflow->icsk_af_ops = icsk->icsk_af_ops;
945 icsk->icsk_af_ops = target;
Peter Krystadcec37a62020-01-21 16:56:18 -0800946}
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100947#endif
Peter Krystadcec37a62020-01-21 16:56:18 -0800948
Peter Krystadec3edaa2020-03-27 14:48:40 -0700949static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
950 struct sockaddr_storage *addr)
951{
952 memset(addr, 0, sizeof(*addr));
953 addr->ss_family = info->family;
954 if (addr->ss_family == AF_INET) {
955 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
956
957 in_addr->sin_addr = info->addr;
958 in_addr->sin_port = info->port;
959 }
960#if IS_ENABLED(CONFIG_MPTCP_IPV6)
961 else if (addr->ss_family == AF_INET6) {
962 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
963
964 in6_addr->sin6_addr = info->addr6;
965 in6_addr->sin6_port = info->port;
966 }
967#endif
968}
969
970int __mptcp_subflow_connect(struct sock *sk, int ifindex,
971 const struct mptcp_addr_info *loc,
972 const struct mptcp_addr_info *remote)
973{
974 struct mptcp_sock *msk = mptcp_sk(sk);
975 struct mptcp_subflow_context *subflow;
976 struct sockaddr_storage addr;
977 struct socket *sf;
978 u32 remote_token;
979 int addrlen;
980 int err;
981
982 if (sk->sk_state != TCP_ESTABLISHED)
983 return -ENOTCONN;
984
985 err = mptcp_subflow_create_socket(sk, &sf);
986 if (err)
987 return err;
988
989 subflow = mptcp_subflow_ctx(sf->sk);
990 subflow->remote_key = msk->remote_key;
991 subflow->local_key = msk->local_key;
992 subflow->token = msk->token;
993 mptcp_info2sockaddr(loc, &addr);
994
995 addrlen = sizeof(struct sockaddr_in);
996#if IS_ENABLED(CONFIG_MPTCP_IPV6)
997 if (loc->family == AF_INET6)
998 addrlen = sizeof(struct sockaddr_in6);
999#endif
1000 sf->sk->sk_bound_dev_if = ifindex;
1001 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1002 if (err)
1003 goto failed;
1004
1005 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1006 pr_debug("msk=%p remote_token=%u", msk, remote_token);
1007 subflow->remote_token = remote_token;
1008 subflow->local_id = loc->id;
1009 subflow->request_join = 1;
1010 subflow->request_bkup = 1;
1011 mptcp_info2sockaddr(remote, &addr);
1012
1013 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1014 if (err && err != -EINPROGRESS)
1015 goto failed;
1016
1017 spin_lock_bh(&msk->join_list_lock);
1018 list_add_tail(&subflow->node, &msk->join_list);
1019 spin_unlock_bh(&msk->join_list_lock);
1020
1021 return err;
1022
1023failed:
1024 sock_release(sf);
1025 return err;
1026}
1027
Peter Krystad2303f992020-01-21 16:56:17 -08001028int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1029{
1030 struct mptcp_subflow_context *subflow;
1031 struct net *net = sock_net(sk);
1032 struct socket *sf;
1033 int err;
1034
Peter Krystadcec37a62020-01-21 16:56:18 -08001035 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1036 &sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001037 if (err)
1038 return err;
1039
1040 lock_sock(sf->sk);
1041
1042 /* kernel sockets do not by default acquire net ref, but TCP timer
1043 * needs it.
1044 */
1045 sf->sk->sk_net_refcnt = 1;
1046 get_net(net);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001047#ifdef CONFIG_PROC_FS
Peter Krystad2303f992020-01-21 16:56:17 -08001048 this_cpu_add(*net->core.sock_inuse, 1);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001049#endif
Peter Krystad2303f992020-01-21 16:56:17 -08001050 err = tcp_set_ulp(sf->sk, "mptcp");
1051 release_sock(sf->sk);
1052
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001053 if (err) {
1054 sock_release(sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001055 return err;
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001056 }
Peter Krystad2303f992020-01-21 16:56:17 -08001057
Paolo Abeni7d14b0d2020-05-07 18:53:24 +02001058 /* the newly created socket really belongs to the owning MPTCP master
1059 * socket, even if for additional subflows the allocation is performed
1060 * by a kernel workqueue. Adjust inode references, so that the
1061 * procfs/diag interaces really show this one belonging to the correct
1062 * user.
1063 */
1064 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1065 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1066 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1067
Peter Krystad2303f992020-01-21 16:56:17 -08001068 subflow = mptcp_subflow_ctx(sf->sk);
1069 pr_debug("subflow=%p", subflow);
1070
1071 *new_sock = sf;
Peter Krystad79c09492020-01-21 16:56:20 -08001072 sock_hold(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001073 subflow->conn = sk;
1074
1075 return 0;
1076}
1077
1078static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1079 gfp_t priority)
1080{
1081 struct inet_connection_sock *icsk = inet_csk(sk);
1082 struct mptcp_subflow_context *ctx;
1083
1084 ctx = kzalloc(sizeof(*ctx), priority);
1085 if (!ctx)
1086 return NULL;
1087
1088 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001089 INIT_LIST_HEAD(&ctx->node);
Peter Krystad2303f992020-01-21 16:56:17 -08001090
1091 pr_debug("subflow=%p", ctx);
1092
1093 ctx->tcp_sock = sk;
1094
1095 return ctx;
1096}
1097
Mat Martineau648ef4b2020-01-21 16:56:24 -08001098static void __subflow_state_change(struct sock *sk)
1099{
1100 struct socket_wq *wq;
1101
1102 rcu_read_lock();
1103 wq = rcu_dereference(sk->sk_wq);
1104 if (skwq_has_sleeper(wq))
1105 wake_up_interruptible_all(&wq->wait);
1106 rcu_read_unlock();
1107}
1108
1109static bool subflow_is_done(const struct sock *sk)
1110{
1111 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1112}
1113
1114static void subflow_state_change(struct sock *sk)
1115{
1116 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +01001117 struct sock *parent = subflow->conn;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001118
1119 __subflow_state_change(sk);
1120
1121 /* as recvmsg() does not acquire the subflow socket for ssk selection
1122 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1123 * the data available machinery here.
1124 */
Paolo Abenidc093db2020-03-13 16:52:42 +01001125 if (subflow->mp_capable && mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001126 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001127
Paolo Abenidc093db2020-03-13 16:52:42 +01001128 if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
Mat Martineau648ef4b2020-01-21 16:56:24 -08001129 !subflow->rx_eof && subflow_is_done(sk)) {
1130 subflow->rx_eof = 1;
Florian Westphal59832e22020-04-02 13:44:52 +02001131 mptcp_subflow_eof(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001132 }
1133}
1134
Peter Krystad2303f992020-01-21 16:56:17 -08001135static int subflow_ulp_init(struct sock *sk)
1136{
Peter Krystadcec37a62020-01-21 16:56:18 -08001137 struct inet_connection_sock *icsk = inet_csk(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001138 struct mptcp_subflow_context *ctx;
1139 struct tcp_sock *tp = tcp_sk(sk);
1140 int err = 0;
1141
1142 /* disallow attaching ULP to a socket unless it has been
1143 * created with sock_create_kern()
1144 */
1145 if (!sk->sk_kern_sock) {
1146 err = -EOPNOTSUPP;
1147 goto out;
1148 }
1149
1150 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1151 if (!ctx) {
1152 err = -ENOMEM;
1153 goto out;
1154 }
1155
1156 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1157
1158 tp->is_mptcp = 1;
Peter Krystadcec37a62020-01-21 16:56:18 -08001159 ctx->icsk_af_ops = icsk->icsk_af_ops;
1160 icsk->icsk_af_ops = subflow_default_af_ops(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001161 ctx->tcp_data_ready = sk->sk_data_ready;
1162 ctx->tcp_state_change = sk->sk_state_change;
1163 ctx->tcp_write_space = sk->sk_write_space;
1164 sk->sk_data_ready = subflow_data_ready;
1165 sk->sk_write_space = subflow_write_space;
1166 sk->sk_state_change = subflow_state_change;
Peter Krystad2303f992020-01-21 16:56:17 -08001167out:
1168 return err;
1169}
1170
1171static void subflow_ulp_release(struct sock *sk)
1172{
1173 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1174
1175 if (!ctx)
1176 return;
1177
Peter Krystad79c09492020-01-21 16:56:20 -08001178 if (ctx->conn)
1179 sock_put(ctx->conn);
1180
Peter Krystad2303f992020-01-21 16:56:17 -08001181 kfree_rcu(ctx, rcu);
1182}
1183
Peter Krystadcec37a62020-01-21 16:56:18 -08001184static void subflow_ulp_clone(const struct request_sock *req,
1185 struct sock *newsk,
1186 const gfp_t priority)
1187{
1188 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1189 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1190 struct mptcp_subflow_context *new_ctx;
1191
Peter Krystadf2962342020-03-27 14:48:39 -07001192 if (!tcp_rsk(req)->is_mptcp ||
1193 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001194 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001195 return;
1196 }
1197
1198 new_ctx = subflow_create_ctx(newsk, priority);
Mat Martineauedc7e482020-01-24 16:04:03 -08001199 if (!new_ctx) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001200 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001201 return;
1202 }
1203
1204 new_ctx->conn_finished = 1;
1205 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001206 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1207 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1208 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
Paolo Abeni58b09912020-03-13 16:52:41 +01001209 new_ctx->rel_write_seq = 1;
1210 new_ctx->tcp_sock = newsk;
1211
Peter Krystadf2962342020-03-27 14:48:39 -07001212 if (subflow_req->mp_capable) {
1213 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1214 * is fully established only after we receive the remote key
1215 */
1216 new_ctx->mp_capable = 1;
Peter Krystadf2962342020-03-27 14:48:39 -07001217 new_ctx->local_key = subflow_req->local_key;
1218 new_ctx->token = subflow_req->token;
1219 new_ctx->ssn_offset = subflow_req->ssn_offset;
1220 new_ctx->idsn = subflow_req->idsn;
1221 } else if (subflow_req->mp_join) {
Peter Krystadec3edaa2020-03-27 14:48:40 -07001222 new_ctx->ssn_offset = subflow_req->ssn_offset;
Peter Krystadf2962342020-03-27 14:48:39 -07001223 new_ctx->mp_join = 1;
1224 new_ctx->fully_established = 1;
1225 new_ctx->backup = subflow_req->backup;
1226 new_ctx->local_id = subflow_req->local_id;
1227 new_ctx->token = subflow_req->token;
1228 new_ctx->thmac = subflow_req->thmac;
1229 }
Peter Krystadcec37a62020-01-21 16:56:18 -08001230}
1231
Peter Krystad2303f992020-01-21 16:56:17 -08001232static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1233 .name = "mptcp",
1234 .owner = THIS_MODULE,
1235 .init = subflow_ulp_init,
1236 .release = subflow_ulp_release,
Peter Krystadcec37a62020-01-21 16:56:18 -08001237 .clone = subflow_ulp_clone,
Peter Krystad2303f992020-01-21 16:56:17 -08001238};
1239
Peter Krystadcec37a62020-01-21 16:56:18 -08001240static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1241{
1242 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1243 subflow_ops->slab_name = "request_sock_subflow";
1244
1245 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1246 subflow_ops->obj_size, 0,
1247 SLAB_ACCOUNT |
1248 SLAB_TYPESAFE_BY_RCU,
1249 NULL);
1250 if (!subflow_ops->slab)
1251 return -ENOMEM;
1252
Peter Krystad79c09492020-01-21 16:56:20 -08001253 subflow_ops->destructor = subflow_req_destructor;
1254
Peter Krystadcec37a62020-01-21 16:56:18 -08001255 return 0;
1256}
1257
Peter Krystad2303f992020-01-21 16:56:17 -08001258void mptcp_subflow_init(void)
1259{
Peter Krystadcec37a62020-01-21 16:56:18 -08001260 subflow_request_sock_ops = tcp_request_sock_ops;
1261 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1262 panic("MPTCP: failed to init subflow request sock ops\n");
1263
1264 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1265 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1266
1267 subflow_specific = ipv4_specific;
1268 subflow_specific.conn_request = subflow_v4_conn_request;
1269 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1270 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001271 subflow_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001272
1273#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1274 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1275 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1276
1277 subflow_v6_specific = ipv6_specific;
1278 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1279 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1280 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001281 subflow_v6_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001282
1283 subflow_v6m_specific = subflow_v6_specific;
1284 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1285 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1286 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1287 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1288 subflow_v6m_specific.net_frag_header_len = 0;
1289#endif
1290
Davide Caratti5147dfb2020-03-27 14:48:49 -07001291 mptcp_diag_subflow_init(&subflow_ulp_ops);
1292
Peter Krystad2303f992020-01-21 16:56:17 -08001293 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1294 panic("MPTCP: failed to register subflows to ULP\n");
1295}