blob: 8968b2c065e7cf7983fb9cb31497a39353e325b4 [file] [log] [blame]
Peter Krystad2303f992020-01-21 16:56:17 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
Peter Krystad79c09492020-01-21 16:56:20 -08007#define pr_fmt(fmt) "MPTCP: " fmt
8
Peter Krystad2303f992020-01-21 16:56:17 -08009#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Peter Krystadf2962342020-03-27 14:48:39 -070012#include <crypto/algapi.h>
Todd Malsbarybd697222020-05-21 19:10:49 -070013#include <crypto/sha.h>
Peter Krystad2303f992020-01-21 16:56:17 -080014#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
Peter Krystadcec37a62020-01-21 16:56:18 -080019#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
Peter Krystad2303f992020-01-21 16:56:17 -080022#include <net/mptcp.h>
23#include "protocol.h"
Florian Westphalfc518952020-03-27 14:48:50 -070024#include "mib.h"
25
26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 enum linux_mptcp_mib_field field)
28{
29 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30}
Peter Krystad2303f992020-01-21 16:56:17 -080031
Peter Krystad79c09492020-01-21 16:56:20 -080032static int subflow_rebuild_header(struct sock *sk)
33{
34 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070035 int local_id, err = 0;
Peter Krystad79c09492020-01-21 16:56:20 -080036
37 if (subflow->request_mptcp && !subflow->token) {
38 pr_debug("subflow=%p", sk);
39 err = mptcp_token_new_connect(sk);
Peter Krystadec3edaa2020-03-27 14:48:40 -070040 } else if (subflow->request_join && !subflow->local_nonce) {
41 struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
42
43 pr_debug("subflow=%p", sk);
44
45 do {
46 get_random_bytes(&subflow->local_nonce, sizeof(u32));
47 } while (!subflow->local_nonce);
48
49 if (subflow->local_id)
50 goto out;
51
52 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
53 if (local_id < 0)
54 return -EINVAL;
55
56 subflow->local_id = local_id;
Peter Krystad79c09492020-01-21 16:56:20 -080057 }
58
Peter Krystadec3edaa2020-03-27 14:48:40 -070059out:
Peter Krystad79c09492020-01-21 16:56:20 -080060 if (err)
61 return err;
62
63 return subflow->icsk_af_ops->rebuild_header(sk);
64}
65
66static void subflow_req_destructor(struct request_sock *req)
67{
68 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
69
70 pr_debug("subflow_req=%p", subflow_req);
71
72 if (subflow_req->mp_capable)
73 mptcp_token_destroy_request(subflow_req->token);
74 tcp_request_sock_ops.destructor(req);
75}
76
Peter Krystadf2962342020-03-27 14:48:39 -070077static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
78 void *hmac)
79{
80 u8 msg[8];
81
82 put_unaligned_be32(nonce1, &msg[0]);
83 put_unaligned_be32(nonce2, &msg[4]);
84
85 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
86}
87
88/* validate received token and create truncated hmac and nonce for SYN-ACK */
89static bool subflow_token_join_request(struct request_sock *req,
90 const struct sk_buff *skb)
91{
92 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Todd Malsbarybd697222020-05-21 19:10:49 -070093 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -070094 struct mptcp_sock *msk;
95 int local_id;
96
97 msk = mptcp_token_get_sock(subflow_req->token);
98 if (!msk) {
Florian Westphalfc518952020-03-27 14:48:50 -070099 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
Peter Krystadf2962342020-03-27 14:48:39 -0700100 return false;
101 }
102
103 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
104 if (local_id < 0) {
105 sock_put((struct sock *)msk);
106 return false;
107 }
108 subflow_req->local_id = local_id;
109
110 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
111
112 subflow_generate_hmac(msk->local_key, msk->remote_key,
113 subflow_req->local_nonce,
114 subflow_req->remote_nonce, hmac);
115
116 subflow_req->thmac = get_unaligned_be64(hmac);
117
118 sock_put((struct sock *)msk);
119 return true;
120}
121
Peter Krystadcec37a62020-01-21 16:56:18 -0800122static void subflow_init_req(struct request_sock *req,
123 const struct sock *sk_listener,
124 struct sk_buff *skb)
125{
126 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
127 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Paolo Abenicfde1412020-04-30 15:01:52 +0200128 struct mptcp_options_received mp_opt;
Peter Krystadcec37a62020-01-21 16:56:18 -0800129
130 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
131
Paolo Abenicfde1412020-04-30 15:01:52 +0200132 mptcp_get_options(skb, &mp_opt);
Peter Krystadcec37a62020-01-21 16:56:18 -0800133
134 subflow_req->mp_capable = 0;
Peter Krystadf2962342020-03-27 14:48:39 -0700135 subflow_req->mp_join = 0;
Peter Krystadcec37a62020-01-21 16:56:18 -0800136
137#ifdef CONFIG_TCP_MD5SIG
138 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
139 * TCP option space.
140 */
141 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
142 return;
143#endif
144
Paolo Abenicfde1412020-04-30 15:01:52 +0200145 if (mp_opt.mp_capable) {
Florian Westphalfc518952020-03-27 14:48:50 -0700146 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
147
Paolo Abenicfde1412020-04-30 15:01:52 +0200148 if (mp_opt.mp_join)
Florian Westphalfc518952020-03-27 14:48:50 -0700149 return;
Paolo Abenicfde1412020-04-30 15:01:52 +0200150 } else if (mp_opt.mp_join) {
Florian Westphalfc518952020-03-27 14:48:50 -0700151 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
152 }
Peter Krystadf2962342020-03-27 14:48:39 -0700153
Paolo Abenicfde1412020-04-30 15:01:52 +0200154 if (mp_opt.mp_capable && listener->request_mptcp) {
Peter Krystad79c09492020-01-21 16:56:20 -0800155 int err;
156
157 err = mptcp_token_new_request(req);
158 if (err == 0)
159 subflow_req->mp_capable = 1;
160
Mat Martineau648ef4b2020-01-21 16:56:24 -0800161 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Paolo Abenicfde1412020-04-30 15:01:52 +0200162 } else if (mp_opt.mp_join && listener->request_mptcp) {
Peter Krystadec3edaa2020-03-27 14:48:40 -0700163 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700164 subflow_req->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200165 subflow_req->backup = mp_opt.backup;
166 subflow_req->remote_id = mp_opt.join_id;
167 subflow_req->token = mp_opt.token;
168 subflow_req->remote_nonce = mp_opt.nonce;
Peter Krystadf2962342020-03-27 14:48:39 -0700169 pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
170 subflow_req->remote_nonce);
171 if (!subflow_token_join_request(req, skb)) {
172 subflow_req->mp_join = 0;
173 // @@ need to trigger RST
174 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800175 }
176}
177
178static void subflow_v4_init_req(struct request_sock *req,
179 const struct sock *sk_listener,
180 struct sk_buff *skb)
181{
182 tcp_rsk(req)->is_mptcp = 1;
183
184 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
185
186 subflow_init_req(req, sk_listener, skb);
187}
188
189#if IS_ENABLED(CONFIG_MPTCP_IPV6)
190static void subflow_v6_init_req(struct request_sock *req,
191 const struct sock *sk_listener,
192 struct sk_buff *skb)
193{
194 tcp_rsk(req)->is_mptcp = 1;
195
196 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
197
198 subflow_init_req(req, sk_listener, skb);
199}
200#endif
201
Peter Krystadec3edaa2020-03-27 14:48:40 -0700202/* validate received truncated hmac and create hmac for third ACK */
203static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
204{
Todd Malsbarybd697222020-05-21 19:10:49 -0700205 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadec3edaa2020-03-27 14:48:40 -0700206 u64 thmac;
207
208 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
209 subflow->remote_nonce, subflow->local_nonce,
210 hmac);
211
212 thmac = get_unaligned_be64(hmac);
213 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
214 subflow, subflow->token,
215 (unsigned long long)thmac,
216 (unsigned long long)subflow->thmac);
217
218 return thmac == subflow->thmac;
219}
220
Peter Krystadcec37a62020-01-21 16:56:18 -0800221static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
222{
223 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenicfde1412020-04-30 15:01:52 +0200224 struct mptcp_options_received mp_opt;
Davide Carattic3c123d2020-03-19 22:45:37 +0100225 struct sock *parent = subflow->conn;
Paolo Abeni263e1202020-04-30 15:01:51 +0200226 struct tcp_sock *tp = tcp_sk(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800227
228 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
229
Paolo Abeni12008322020-04-24 13:15:21 +0200230 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
Davide Carattic3c123d2020-03-19 22:45:37 +0100231 inet_sk_state_store(parent, TCP_ESTABLISHED);
232 parent->sk_state_change(parent);
233 }
234
Paolo Abeni263e1202020-04-30 15:01:51 +0200235 /* be sure no special action on any packet other than syn-ack */
236 if (subflow->conn_finished)
237 return;
238
239 subflow->conn_finished = 1;
240
Paolo Abenicfde1412020-04-30 15:01:52 +0200241 mptcp_get_options(skb, &mp_opt);
242 if (subflow->request_mptcp && mp_opt.mp_capable) {
Paolo Abeni263e1202020-04-30 15:01:51 +0200243 subflow->mp_capable = 1;
244 subflow->can_ack = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200245 subflow->remote_key = mp_opt.sndr_key;
Paolo Abeni263e1202020-04-30 15:01:51 +0200246 pr_debug("subflow=%p, remote_key=%llu", subflow,
247 subflow->remote_key);
Paolo Abenicfde1412020-04-30 15:01:52 +0200248 } else if (subflow->request_join && mp_opt.mp_join) {
Paolo Abeni263e1202020-04-30 15:01:51 +0200249 subflow->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200250 subflow->thmac = mp_opt.thmac;
251 subflow->remote_nonce = mp_opt.nonce;
Paolo Abeni263e1202020-04-30 15:01:51 +0200252 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
253 subflow->thmac, subflow->remote_nonce);
254 } else if (subflow->request_mptcp) {
255 tp->is_mptcp = 0;
256 }
257
258 if (!tp->is_mptcp)
Peter Krystadec3edaa2020-03-27 14:48:40 -0700259 return;
260
261 if (subflow->mp_capable) {
Peter Krystadcec37a62020-01-21 16:56:18 -0800262 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
263 subflow->remote_key);
264 mptcp_finish_connect(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800265
266 if (skb) {
267 pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
268 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
269 }
Peter Krystadec3edaa2020-03-27 14:48:40 -0700270 } else if (subflow->mp_join) {
Todd Malsbarybd697222020-05-21 19:10:49 -0700271 u8 hmac[SHA256_DIGEST_SIZE];
272
Peter Krystadec3edaa2020-03-27 14:48:40 -0700273 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
274 subflow, subflow->thmac,
275 subflow->remote_nonce);
276 if (!subflow_thmac_valid(subflow)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700277 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700278 subflow->mp_join = 0;
279 goto do_reset;
280 }
281
282 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
283 subflow->local_nonce,
284 subflow->remote_nonce,
Todd Malsbarybd697222020-05-21 19:10:49 -0700285 hmac);
286
287 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700288
289 if (skb)
290 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
291
292 if (!mptcp_finish_join(sk))
293 goto do_reset;
294
Florian Westphalfc518952020-03-27 14:48:50 -0700295 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700296 } else {
297do_reset:
298 tcp_send_active_reset(sk, GFP_ATOMIC);
299 tcp_done(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800300 }
301}
302
303static struct request_sock_ops subflow_request_sock_ops;
304static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
305
306static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
307{
308 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
309
310 pr_debug("subflow=%p", subflow);
311
312 /* Never answer to SYNs sent to broadcast or multicast */
313 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
314 goto drop;
315
316 return tcp_conn_request(&subflow_request_sock_ops,
317 &subflow_request_sock_ipv4_ops,
318 sk, skb);
319drop:
320 tcp_listendrop(sk);
321 return 0;
322}
323
324#if IS_ENABLED(CONFIG_MPTCP_IPV6)
325static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
326static struct inet_connection_sock_af_ops subflow_v6_specific;
327static struct inet_connection_sock_af_ops subflow_v6m_specific;
328
329static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
330{
331 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
332
333 pr_debug("subflow=%p", subflow);
334
335 if (skb->protocol == htons(ETH_P_IP))
336 return subflow_v4_conn_request(sk, skb);
337
338 if (!ipv6_unicast_destination(skb))
339 goto drop;
340
341 return tcp_conn_request(&subflow_request_sock_ops,
342 &subflow_request_sock_ipv6_ops, sk, skb);
343
344drop:
345 tcp_listendrop(sk);
346 return 0; /* don't send reset */
347}
348#endif
349
Peter Krystadf2962342020-03-27 14:48:39 -0700350/* validate hmac received in third ACK */
351static bool subflow_hmac_valid(const struct request_sock *req,
Paolo Abenicfde1412020-04-30 15:01:52 +0200352 const struct mptcp_options_received *mp_opt)
Peter Krystadf2962342020-03-27 14:48:39 -0700353{
354 const struct mptcp_subflow_request_sock *subflow_req;
Todd Malsbarybd697222020-05-21 19:10:49 -0700355 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -0700356 struct mptcp_sock *msk;
357 bool ret;
358
359 subflow_req = mptcp_subflow_rsk(req);
360 msk = mptcp_token_get_sock(subflow_req->token);
361 if (!msk)
362 return false;
363
364 subflow_generate_hmac(msk->remote_key, msk->local_key,
365 subflow_req->remote_nonce,
366 subflow_req->local_nonce, hmac);
367
368 ret = true;
Todd Malsbarybd697222020-05-21 19:10:49 -0700369 if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
Peter Krystadf2962342020-03-27 14:48:39 -0700370 ret = false;
371
372 sock_put((struct sock *)msk);
373 return ret;
374}
375
Florian Westphaldf1036d2020-04-17 09:28:22 +0200376static void mptcp_sock_destruct(struct sock *sk)
377{
378 /* if new mptcp socket isn't accepted, it is free'd
379 * from the tcp listener sockets request queue, linked
380 * from req->sk. The tcp socket is released.
381 * This calls the ULP release function which will
382 * also remove the mptcp socket, via
383 * sock_put(ctx->conn).
384 *
385 * Problem is that the mptcp socket will not be in
386 * SYN_RECV state and doesn't have SOCK_DEAD flag.
387 * Both result in warnings from inet_sock_destruct.
388 */
389
390 if (sk->sk_state == TCP_SYN_RECV) {
391 sk->sk_state = TCP_CLOSE;
392 WARN_ON_ONCE(sk->sk_socket);
393 sock_orphan(sk);
394 }
395
396 inet_sock_destruct(sk);
397}
398
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200399static void mptcp_force_close(struct sock *sk)
400{
401 inet_sk_state_store(sk, TCP_CLOSE);
402 sk_common_release(sk);
403}
404
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200405static void subflow_ulp_fallback(struct sock *sk,
406 struct mptcp_subflow_context *old_ctx)
407{
408 struct inet_connection_sock *icsk = inet_csk(sk);
409
410 mptcp_subflow_tcp_fallback(sk, old_ctx);
411 icsk->icsk_ulp_ops = NULL;
412 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
413 tcp_sk(sk)->is_mptcp = 0;
414}
415
Peter Krystadcec37a62020-01-21 16:56:18 -0800416static struct sock *subflow_syn_recv_sock(const struct sock *sk,
417 struct sk_buff *skb,
418 struct request_sock *req,
419 struct dst_entry *dst,
420 struct request_sock *req_unhash,
421 bool *own_req)
422{
423 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800424 struct mptcp_subflow_request_sock *subflow_req;
Paolo Abenicfde1412020-04-30 15:01:52 +0200425 struct mptcp_options_received mp_opt;
Peter Krystadf2962342020-03-27 14:48:39 -0700426 bool fallback_is_fatal = false;
Paolo Abeni58b09912020-03-13 16:52:41 +0100427 struct sock *new_msk = NULL;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200428 bool fallback = false;
Peter Krystadcec37a62020-01-21 16:56:18 -0800429 struct sock *child;
430
431 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
432
Paolo Abenicfde1412020-04-30 15:01:52 +0200433 /* we need later a valid 'mp_capable' value even when options are not
434 * parsed
435 */
436 mp_opt.mp_capable = 0;
Florian Westphalae2dd712020-01-29 15:54:46 +0100437 if (tcp_rsk(req)->is_mptcp == 0)
438 goto create_child;
439
Christoph Paaschd22f4982020-01-21 16:56:32 -0800440 /* if the sk is MP_CAPABLE, we try to fetch the client key */
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800441 subflow_req = mptcp_subflow_rsk(req);
442 if (subflow_req->mp_capable) {
Christoph Paaschd22f4982020-01-21 16:56:32 -0800443 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
444 /* here we can receive and accept an in-window,
445 * out-of-order pkt, which will not carry the MP_CAPABLE
446 * opt even on mptcp enabled paths
447 */
Paolo Abeni58b09912020-03-13 16:52:41 +0100448 goto create_msk;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800449 }
450
Paolo Abenicfde1412020-04-30 15:01:52 +0200451 mptcp_get_options(skb, &mp_opt);
452 if (!mp_opt.mp_capable) {
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200453 fallback = true;
Paolo Abeni58b09912020-03-13 16:52:41 +0100454 goto create_child;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800455 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100456
457create_msk:
Paolo Abenicfde1412020-04-30 15:01:52 +0200458 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
Paolo Abeni58b09912020-03-13 16:52:41 +0100459 if (!new_msk)
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200460 fallback = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700461 } else if (subflow_req->mp_join) {
462 fallback_is_fatal = true;
Paolo Abenicfde1412020-04-30 15:01:52 +0200463 mptcp_get_options(skb, &mp_opt);
464 if (!mp_opt.mp_join ||
465 !subflow_hmac_valid(req, &mp_opt)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700466 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
Peter Krystadf2962342020-03-27 14:48:39 -0700467 return NULL;
Florian Westphalfc518952020-03-27 14:48:50 -0700468 }
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800469 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800470
Christoph Paaschd22f4982020-01-21 16:56:32 -0800471create_child:
Peter Krystadcec37a62020-01-21 16:56:18 -0800472 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
473 req_unhash, own_req);
474
475 if (child && *own_req) {
Peter Krystad79c09492020-01-21 16:56:20 -0800476 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
477
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200478 /* we need to fallback on ctx allocation failure and on pre-reqs
479 * checking above. In the latter scenario we additionally need
480 * to reset the context to non MPTCP status.
Peter Krystad79c09492020-01-21 16:56:20 -0800481 */
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200482 if (!ctx || fallback) {
Peter Krystadf2962342020-03-27 14:48:39 -0700483 if (fallback_is_fatal)
484 goto close_child;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200485
486 if (ctx) {
487 subflow_ulp_fallback(child, ctx);
488 kfree_rcu(ctx, rcu);
489 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100490 goto out;
Peter Krystadf2962342020-03-27 14:48:39 -0700491 }
Peter Krystad79c09492020-01-21 16:56:20 -0800492
493 if (ctx->mp_capable) {
Paolo Abeni58b09912020-03-13 16:52:41 +0100494 /* new mpc subflow takes ownership of the newly
495 * created mptcp socket
496 */
Florian Westphaldf1036d2020-04-17 09:28:22 +0200497 new_msk->sk_destruct = mptcp_sock_destruct;
Peter Krystad1b1c7a02020-03-27 14:48:38 -0700498 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
Paolo Abeni58b09912020-03-13 16:52:41 +0100499 ctx->conn = new_msk;
500 new_msk = NULL;
Paolo Abenifca5c822020-04-20 16:25:06 +0200501
502 /* with OoO packets we can reach here without ingress
503 * mpc option
504 */
Paolo Abenicfde1412020-04-30 15:01:52 +0200505 ctx->remote_key = mp_opt.sndr_key;
506 ctx->fully_established = mp_opt.mp_capable;
507 ctx->can_ack = mp_opt.mp_capable;
Peter Krystadf2962342020-03-27 14:48:39 -0700508 } else if (ctx->mp_join) {
509 struct mptcp_sock *owner;
510
511 owner = mptcp_token_get_sock(ctx->token);
512 if (!owner)
513 goto close_child;
514
515 ctx->conn = (struct sock *)owner;
516 if (!mptcp_finish_join(child))
517 goto close_child;
Florian Westphalfc518952020-03-27 14:48:50 -0700518
519 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
Peter Krystadcec37a62020-01-21 16:56:18 -0800520 }
521 }
522
Paolo Abeni58b09912020-03-13 16:52:41 +0100523out:
524 /* dispose of the left over mptcp master, if any */
525 if (unlikely(new_msk))
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200526 mptcp_force_close(new_msk);
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200527
528 /* check for expected invariant - should never trigger, just help
529 * catching eariler subtle bugs
530 */
Paolo Abeniac2b47f2020-04-30 15:03:22 +0200531 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200532 (!mptcp_subflow_ctx(child) ||
533 !mptcp_subflow_ctx(child)->conn));
Peter Krystadcec37a62020-01-21 16:56:18 -0800534 return child;
Peter Krystadf2962342020-03-27 14:48:39 -0700535
536close_child:
537 tcp_send_active_reset(child, GFP_ATOMIC);
538 inet_csk_prepare_forced_close(child);
539 tcp_done(child);
540 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800541}
542
543static struct inet_connection_sock_af_ops subflow_specific;
544
Mat Martineau648ef4b2020-01-21 16:56:24 -0800545enum mapping_status {
546 MAPPING_OK,
547 MAPPING_INVALID,
548 MAPPING_EMPTY,
549 MAPPING_DATA_FIN
550};
551
552static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
553{
554 if ((u32)seq == (u32)old_seq)
555 return old_seq;
556
557 /* Assume map covers data not mapped yet. */
558 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
559}
560
561static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
562{
563 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
564 ssn, subflow->map_subflow_seq, subflow->map_data_len);
565}
566
567static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
568{
569 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
570 unsigned int skb_consumed;
571
572 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
573 if (WARN_ON_ONCE(skb_consumed >= skb->len))
574 return true;
575
576 return skb->len - skb_consumed <= subflow->map_data_len -
577 mptcp_subflow_get_map_offset(subflow);
578}
579
580static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
581{
582 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
583 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
584
585 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
586 /* Mapping covers data later in the subflow stream,
587 * currently unsupported.
588 */
589 warn_bad_map(subflow, ssn);
590 return false;
591 }
592 if (unlikely(!before(ssn, subflow->map_subflow_seq +
593 subflow->map_data_len))) {
594 /* Mapping does covers past subflow data, invalid */
595 warn_bad_map(subflow, ssn + skb->len);
596 return false;
597 }
598 return true;
599}
600
601static enum mapping_status get_mapping_status(struct sock *ssk)
602{
603 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
604 struct mptcp_ext *mpext;
605 struct sk_buff *skb;
606 u16 data_len;
607 u64 map_seq;
608
609 skb = skb_peek(&ssk->sk_receive_queue);
610 if (!skb)
611 return MAPPING_EMPTY;
612
613 mpext = mptcp_get_ext(skb);
614 if (!mpext || !mpext->use_map) {
615 if (!subflow->map_valid && !skb->len) {
616 /* the TCP stack deliver 0 len FIN pkt to the receive
617 * queue, that is the only 0len pkts ever expected here,
618 * and we can admit no mapping only for 0 len pkts
619 */
620 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
621 WARN_ONCE(1, "0len seq %d:%d flags %x",
622 TCP_SKB_CB(skb)->seq,
623 TCP_SKB_CB(skb)->end_seq,
624 TCP_SKB_CB(skb)->tcp_flags);
625 sk_eat_skb(ssk, skb);
626 return MAPPING_EMPTY;
627 }
628
629 if (!subflow->map_valid)
630 return MAPPING_INVALID;
631
632 goto validate_seq;
633 }
634
635 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
636 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
637 mpext->data_len, mpext->data_fin);
638
639 data_len = mpext->data_len;
640 if (data_len == 0) {
641 pr_err("Infinite mapping not handled");
Florian Westphalfc518952020-03-27 14:48:50 -0700642 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800643 return MAPPING_INVALID;
644 }
645
646 if (mpext->data_fin == 1) {
647 if (data_len == 1) {
648 pr_debug("DATA_FIN with no payload");
649 if (subflow->map_valid) {
650 /* A DATA_FIN might arrive in a DSS
651 * option before the previous mapping
652 * has been fully consumed. Continue
653 * handling the existing mapping.
654 */
655 skb_ext_del(skb, SKB_EXT_MPTCP);
656 return MAPPING_OK;
657 } else {
658 return MAPPING_DATA_FIN;
659 }
660 }
661
662 /* Adjust for DATA_FIN using 1 byte of sequence space */
663 data_len--;
664 }
665
666 if (!mpext->dsn64) {
667 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
668 mpext->data_seq);
669 pr_debug("expanded seq=%llu", subflow->map_seq);
670 } else {
671 map_seq = mpext->data_seq;
672 }
673
674 if (subflow->map_valid) {
675 /* Allow replacing only with an identical map */
676 if (subflow->map_seq == map_seq &&
677 subflow->map_subflow_seq == mpext->subflow_seq &&
678 subflow->map_data_len == data_len) {
679 skb_ext_del(skb, SKB_EXT_MPTCP);
680 return MAPPING_OK;
681 }
682
683 /* If this skb data are fully covered by the current mapping,
684 * the new map would need caching, which is not supported
685 */
Florian Westphalfc518952020-03-27 14:48:50 -0700686 if (skb_is_fully_mapped(ssk, skb)) {
687 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800688 return MAPPING_INVALID;
Florian Westphalfc518952020-03-27 14:48:50 -0700689 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800690
691 /* will validate the next map after consuming the current one */
692 return MAPPING_OK;
693 }
694
695 subflow->map_seq = map_seq;
696 subflow->map_subflow_seq = mpext->subflow_seq;
697 subflow->map_data_len = data_len;
698 subflow->map_valid = 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800699 subflow->mpc_map = mpext->mpc_map;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800700 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
701 subflow->map_seq, subflow->map_subflow_seq,
702 subflow->map_data_len);
703
704validate_seq:
705 /* we revalidate valid mapping on new skb, because we must ensure
706 * the current skb is completely covered by the available mapping
707 */
708 if (!validate_mapping(ssk, skb))
709 return MAPPING_INVALID;
710
711 skb_ext_del(skb, SKB_EXT_MPTCP);
712 return MAPPING_OK;
713}
714
Florian Westphalbfae9da2020-02-26 10:14:50 +0100715static int subflow_read_actor(read_descriptor_t *desc,
716 struct sk_buff *skb,
717 unsigned int offset, size_t len)
718{
719 size_t copy_len = min(desc->count, len);
720
721 desc->count -= copy_len;
722
723 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
724 return copy_len;
725}
726
Mat Martineau648ef4b2020-01-21 16:56:24 -0800727static bool subflow_check_data_avail(struct sock *ssk)
728{
729 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
730 enum mapping_status status;
731 struct mptcp_sock *msk;
732 struct sk_buff *skb;
733
734 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
735 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
736 if (subflow->data_avail)
737 return true;
738
Mat Martineau648ef4b2020-01-21 16:56:24 -0800739 msk = mptcp_sk(subflow->conn);
740 for (;;) {
741 u32 map_remaining;
742 size_t delta;
743 u64 ack_seq;
744 u64 old_ack;
745
746 status = get_mapping_status(ssk);
747 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
748 if (status == MAPPING_INVALID) {
749 ssk->sk_err = EBADMSG;
750 goto fatal;
751 }
752
753 if (status != MAPPING_OK)
754 return false;
755
756 skb = skb_peek(&ssk->sk_receive_queue);
757 if (WARN_ON_ONCE(!skb))
758 return false;
759
Christoph Paaschd22f4982020-01-21 16:56:32 -0800760 /* if msk lacks the remote key, this subflow must provide an
761 * MP_CAPABLE-based mapping
762 */
763 if (unlikely(!READ_ONCE(msk->can_ack))) {
764 if (!subflow->mpc_map) {
765 ssk->sk_err = EBADMSG;
766 goto fatal;
767 }
768 WRITE_ONCE(msk->remote_key, subflow->remote_key);
769 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
770 WRITE_ONCE(msk->can_ack, true);
771 }
772
Mat Martineau648ef4b2020-01-21 16:56:24 -0800773 old_ack = READ_ONCE(msk->ack_seq);
774 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
775 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
776 ack_seq);
777 if (ack_seq == old_ack)
778 break;
779
780 /* only accept in-sequence mapping. Old values are spurious
781 * retransmission; we can hit "future" values on active backup
782 * subflow switch, we relay on retransmissions to get
783 * in-sequence data.
784 * Cuncurrent subflows support will require subflow data
785 * reordering
786 */
787 map_remaining = subflow->map_data_len -
788 mptcp_subflow_get_map_offset(subflow);
789 if (before64(ack_seq, old_ack))
790 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
791 else
792 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
793
794 /* discard mapped data */
795 pr_debug("discarding %zu bytes, current map len=%d", delta,
796 map_remaining);
797 if (delta) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800798 read_descriptor_t desc = {
799 .count = delta,
Mat Martineau648ef4b2020-01-21 16:56:24 -0800800 };
801 int ret;
802
Florian Westphalbfae9da2020-02-26 10:14:50 +0100803 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800804 if (ret < 0) {
805 ssk->sk_err = -ret;
806 goto fatal;
807 }
808 if (ret < delta)
809 return false;
810 if (delta == map_remaining)
811 subflow->map_valid = 0;
812 }
813 }
814 return true;
815
816fatal:
817 /* fatal protocol error, close the socket */
818 /* This barrier is coupled with smp_rmb() in tcp_poll() */
819 smp_wmb();
820 ssk->sk_error_report(ssk);
821 tcp_set_state(ssk, TCP_CLOSE);
822 tcp_send_active_reset(ssk, GFP_ATOMIC);
823 return false;
824}
825
826bool mptcp_subflow_data_available(struct sock *sk)
827{
828 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
829 struct sk_buff *skb;
830
831 /* check if current mapping is still valid */
832 if (subflow->map_valid &&
833 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
834 subflow->map_valid = 0;
835 subflow->data_avail = 0;
836
837 pr_debug("Done with mapping: seq=%u data_len=%u",
838 subflow->map_subflow_seq,
839 subflow->map_data_len);
840 }
841
842 if (!subflow_check_data_avail(sk)) {
843 subflow->data_avail = 0;
844 return false;
845 }
846
847 skb = skb_peek(&sk->sk_receive_queue);
848 subflow->data_avail = skb &&
849 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
850 return subflow->data_avail;
851}
852
853static void subflow_data_ready(struct sock *sk)
854{
855 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
856 struct sock *parent = subflow->conn;
857
Peter Krystadf2962342020-03-27 14:48:39 -0700858 if (!subflow->mp_capable && !subflow->mp_join) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800859 subflow->tcp_data_ready(sk);
860
Paolo Abenidc093db2020-03-13 16:52:42 +0100861 parent->sk_data_ready(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800862 return;
863 }
864
Florian Westphal101f6f82020-02-26 10:14:46 +0100865 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +0100866 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800867}
868
869static void subflow_write_space(struct sock *sk)
870{
871 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
872 struct sock *parent = subflow->conn;
873
874 sk_stream_write_space(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +0100875 if (sk_stream_is_writeable(sk)) {
Florian Westphal1891c4a2020-01-21 16:56:25 -0800876 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
877 smp_mb__after_atomic();
878 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
Mat Martineau648ef4b2020-01-21 16:56:24 -0800879 sk_stream_write_space(parent);
880 }
881}
882
Peter Krystadcec37a62020-01-21 16:56:18 -0800883static struct inet_connection_sock_af_ops *
884subflow_default_af_ops(struct sock *sk)
885{
886#if IS_ENABLED(CONFIG_MPTCP_IPV6)
887 if (sk->sk_family == AF_INET6)
888 return &subflow_v6_specific;
889#endif
890 return &subflow_specific;
891}
892
Peter Krystadcec37a62020-01-21 16:56:18 -0800893#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100894void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
895{
Peter Krystadcec37a62020-01-21 16:56:18 -0800896 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
897 struct inet_connection_sock *icsk = inet_csk(sk);
898 struct inet_connection_sock_af_ops *target;
899
900 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
901
902 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
Mat Martineauedc7e482020-01-24 16:04:03 -0800903 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
Peter Krystadcec37a62020-01-21 16:56:18 -0800904
905 if (likely(icsk->icsk_af_ops == target))
906 return;
907
908 subflow->icsk_af_ops = icsk->icsk_af_ops;
909 icsk->icsk_af_ops = target;
Peter Krystadcec37a62020-01-21 16:56:18 -0800910}
Geert Uytterhoeven31484d52020-01-30 10:45:26 +0100911#endif
Peter Krystadcec37a62020-01-21 16:56:18 -0800912
Peter Krystadec3edaa2020-03-27 14:48:40 -0700913static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
914 struct sockaddr_storage *addr)
915{
916 memset(addr, 0, sizeof(*addr));
917 addr->ss_family = info->family;
918 if (addr->ss_family == AF_INET) {
919 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
920
921 in_addr->sin_addr = info->addr;
922 in_addr->sin_port = info->port;
923 }
924#if IS_ENABLED(CONFIG_MPTCP_IPV6)
925 else if (addr->ss_family == AF_INET6) {
926 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
927
928 in6_addr->sin6_addr = info->addr6;
929 in6_addr->sin6_port = info->port;
930 }
931#endif
932}
933
934int __mptcp_subflow_connect(struct sock *sk, int ifindex,
935 const struct mptcp_addr_info *loc,
936 const struct mptcp_addr_info *remote)
937{
938 struct mptcp_sock *msk = mptcp_sk(sk);
939 struct mptcp_subflow_context *subflow;
940 struct sockaddr_storage addr;
941 struct socket *sf;
942 u32 remote_token;
943 int addrlen;
944 int err;
945
946 if (sk->sk_state != TCP_ESTABLISHED)
947 return -ENOTCONN;
948
949 err = mptcp_subflow_create_socket(sk, &sf);
950 if (err)
951 return err;
952
953 subflow = mptcp_subflow_ctx(sf->sk);
954 subflow->remote_key = msk->remote_key;
955 subflow->local_key = msk->local_key;
956 subflow->token = msk->token;
957 mptcp_info2sockaddr(loc, &addr);
958
959 addrlen = sizeof(struct sockaddr_in);
960#if IS_ENABLED(CONFIG_MPTCP_IPV6)
961 if (loc->family == AF_INET6)
962 addrlen = sizeof(struct sockaddr_in6);
963#endif
964 sf->sk->sk_bound_dev_if = ifindex;
965 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
966 if (err)
967 goto failed;
968
969 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
970 pr_debug("msk=%p remote_token=%u", msk, remote_token);
971 subflow->remote_token = remote_token;
972 subflow->local_id = loc->id;
973 subflow->request_join = 1;
974 subflow->request_bkup = 1;
975 mptcp_info2sockaddr(remote, &addr);
976
977 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
978 if (err && err != -EINPROGRESS)
979 goto failed;
980
981 spin_lock_bh(&msk->join_list_lock);
982 list_add_tail(&subflow->node, &msk->join_list);
983 spin_unlock_bh(&msk->join_list_lock);
984
985 return err;
986
987failed:
988 sock_release(sf);
989 return err;
990}
991
Peter Krystad2303f992020-01-21 16:56:17 -0800992int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
993{
994 struct mptcp_subflow_context *subflow;
995 struct net *net = sock_net(sk);
996 struct socket *sf;
997 int err;
998
Peter Krystadcec37a62020-01-21 16:56:18 -0800999 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1000 &sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001001 if (err)
1002 return err;
1003
1004 lock_sock(sf->sk);
1005
1006 /* kernel sockets do not by default acquire net ref, but TCP timer
1007 * needs it.
1008 */
1009 sf->sk->sk_net_refcnt = 1;
1010 get_net(net);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001011#ifdef CONFIG_PROC_FS
Peter Krystad2303f992020-01-21 16:56:17 -08001012 this_cpu_add(*net->core.sock_inuse, 1);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001013#endif
Peter Krystad2303f992020-01-21 16:56:17 -08001014 err = tcp_set_ulp(sf->sk, "mptcp");
1015 release_sock(sf->sk);
1016
1017 if (err)
1018 return err;
1019
Paolo Abeni7d14b0d2020-05-07 18:53:24 +02001020 /* the newly created socket really belongs to the owning MPTCP master
1021 * socket, even if for additional subflows the allocation is performed
1022 * by a kernel workqueue. Adjust inode references, so that the
1023 * procfs/diag interaces really show this one belonging to the correct
1024 * user.
1025 */
1026 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1027 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1028 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1029
Peter Krystad2303f992020-01-21 16:56:17 -08001030 subflow = mptcp_subflow_ctx(sf->sk);
1031 pr_debug("subflow=%p", subflow);
1032
1033 *new_sock = sf;
Peter Krystad79c09492020-01-21 16:56:20 -08001034 sock_hold(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001035 subflow->conn = sk;
1036
1037 return 0;
1038}
1039
1040static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1041 gfp_t priority)
1042{
1043 struct inet_connection_sock *icsk = inet_csk(sk);
1044 struct mptcp_subflow_context *ctx;
1045
1046 ctx = kzalloc(sizeof(*ctx), priority);
1047 if (!ctx)
1048 return NULL;
1049
1050 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001051 INIT_LIST_HEAD(&ctx->node);
Peter Krystad2303f992020-01-21 16:56:17 -08001052
1053 pr_debug("subflow=%p", ctx);
1054
1055 ctx->tcp_sock = sk;
1056
1057 return ctx;
1058}
1059
Mat Martineau648ef4b2020-01-21 16:56:24 -08001060static void __subflow_state_change(struct sock *sk)
1061{
1062 struct socket_wq *wq;
1063
1064 rcu_read_lock();
1065 wq = rcu_dereference(sk->sk_wq);
1066 if (skwq_has_sleeper(wq))
1067 wake_up_interruptible_all(&wq->wait);
1068 rcu_read_unlock();
1069}
1070
1071static bool subflow_is_done(const struct sock *sk)
1072{
1073 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1074}
1075
1076static void subflow_state_change(struct sock *sk)
1077{
1078 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +01001079 struct sock *parent = subflow->conn;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001080
1081 __subflow_state_change(sk);
1082
1083 /* as recvmsg() does not acquire the subflow socket for ssk selection
1084 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1085 * the data available machinery here.
1086 */
Paolo Abenidc093db2020-03-13 16:52:42 +01001087 if (subflow->mp_capable && mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001088 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001089
Paolo Abenidc093db2020-03-13 16:52:42 +01001090 if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
Mat Martineau648ef4b2020-01-21 16:56:24 -08001091 !subflow->rx_eof && subflow_is_done(sk)) {
1092 subflow->rx_eof = 1;
Florian Westphal59832e22020-04-02 13:44:52 +02001093 mptcp_subflow_eof(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001094 }
1095}
1096
Peter Krystad2303f992020-01-21 16:56:17 -08001097static int subflow_ulp_init(struct sock *sk)
1098{
Peter Krystadcec37a62020-01-21 16:56:18 -08001099 struct inet_connection_sock *icsk = inet_csk(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001100 struct mptcp_subflow_context *ctx;
1101 struct tcp_sock *tp = tcp_sk(sk);
1102 int err = 0;
1103
1104 /* disallow attaching ULP to a socket unless it has been
1105 * created with sock_create_kern()
1106 */
1107 if (!sk->sk_kern_sock) {
1108 err = -EOPNOTSUPP;
1109 goto out;
1110 }
1111
1112 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1113 if (!ctx) {
1114 err = -ENOMEM;
1115 goto out;
1116 }
1117
1118 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1119
1120 tp->is_mptcp = 1;
Peter Krystadcec37a62020-01-21 16:56:18 -08001121 ctx->icsk_af_ops = icsk->icsk_af_ops;
1122 icsk->icsk_af_ops = subflow_default_af_ops(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001123 ctx->tcp_data_ready = sk->sk_data_ready;
1124 ctx->tcp_state_change = sk->sk_state_change;
1125 ctx->tcp_write_space = sk->sk_write_space;
1126 sk->sk_data_ready = subflow_data_ready;
1127 sk->sk_write_space = subflow_write_space;
1128 sk->sk_state_change = subflow_state_change;
Peter Krystad2303f992020-01-21 16:56:17 -08001129out:
1130 return err;
1131}
1132
1133static void subflow_ulp_release(struct sock *sk)
1134{
1135 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1136
1137 if (!ctx)
1138 return;
1139
Peter Krystad79c09492020-01-21 16:56:20 -08001140 if (ctx->conn)
1141 sock_put(ctx->conn);
1142
Peter Krystad2303f992020-01-21 16:56:17 -08001143 kfree_rcu(ctx, rcu);
1144}
1145
Peter Krystadcec37a62020-01-21 16:56:18 -08001146static void subflow_ulp_clone(const struct request_sock *req,
1147 struct sock *newsk,
1148 const gfp_t priority)
1149{
1150 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1151 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1152 struct mptcp_subflow_context *new_ctx;
1153
Peter Krystadf2962342020-03-27 14:48:39 -07001154 if (!tcp_rsk(req)->is_mptcp ||
1155 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001156 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001157 return;
1158 }
1159
1160 new_ctx = subflow_create_ctx(newsk, priority);
Mat Martineauedc7e482020-01-24 16:04:03 -08001161 if (!new_ctx) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001162 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001163 return;
1164 }
1165
1166 new_ctx->conn_finished = 1;
1167 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001168 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1169 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1170 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
Paolo Abeni58b09912020-03-13 16:52:41 +01001171 new_ctx->rel_write_seq = 1;
1172 new_ctx->tcp_sock = newsk;
1173
Peter Krystadf2962342020-03-27 14:48:39 -07001174 if (subflow_req->mp_capable) {
1175 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1176 * is fully established only after we receive the remote key
1177 */
1178 new_ctx->mp_capable = 1;
Peter Krystadf2962342020-03-27 14:48:39 -07001179 new_ctx->local_key = subflow_req->local_key;
1180 new_ctx->token = subflow_req->token;
1181 new_ctx->ssn_offset = subflow_req->ssn_offset;
1182 new_ctx->idsn = subflow_req->idsn;
1183 } else if (subflow_req->mp_join) {
Peter Krystadec3edaa2020-03-27 14:48:40 -07001184 new_ctx->ssn_offset = subflow_req->ssn_offset;
Peter Krystadf2962342020-03-27 14:48:39 -07001185 new_ctx->mp_join = 1;
1186 new_ctx->fully_established = 1;
1187 new_ctx->backup = subflow_req->backup;
1188 new_ctx->local_id = subflow_req->local_id;
1189 new_ctx->token = subflow_req->token;
1190 new_ctx->thmac = subflow_req->thmac;
1191 }
Peter Krystadcec37a62020-01-21 16:56:18 -08001192}
1193
Peter Krystad2303f992020-01-21 16:56:17 -08001194static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1195 .name = "mptcp",
1196 .owner = THIS_MODULE,
1197 .init = subflow_ulp_init,
1198 .release = subflow_ulp_release,
Peter Krystadcec37a62020-01-21 16:56:18 -08001199 .clone = subflow_ulp_clone,
Peter Krystad2303f992020-01-21 16:56:17 -08001200};
1201
Peter Krystadcec37a62020-01-21 16:56:18 -08001202static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1203{
1204 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1205 subflow_ops->slab_name = "request_sock_subflow";
1206
1207 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1208 subflow_ops->obj_size, 0,
1209 SLAB_ACCOUNT |
1210 SLAB_TYPESAFE_BY_RCU,
1211 NULL);
1212 if (!subflow_ops->slab)
1213 return -ENOMEM;
1214
Peter Krystad79c09492020-01-21 16:56:20 -08001215 subflow_ops->destructor = subflow_req_destructor;
1216
Peter Krystadcec37a62020-01-21 16:56:18 -08001217 return 0;
1218}
1219
Peter Krystad2303f992020-01-21 16:56:17 -08001220void mptcp_subflow_init(void)
1221{
Peter Krystadcec37a62020-01-21 16:56:18 -08001222 subflow_request_sock_ops = tcp_request_sock_ops;
1223 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1224 panic("MPTCP: failed to init subflow request sock ops\n");
1225
1226 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1227 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1228
1229 subflow_specific = ipv4_specific;
1230 subflow_specific.conn_request = subflow_v4_conn_request;
1231 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1232 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001233 subflow_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001234
1235#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1236 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1237 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1238
1239 subflow_v6_specific = ipv6_specific;
1240 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1241 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1242 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
Peter Krystad79c09492020-01-21 16:56:20 -08001243 subflow_v6_specific.rebuild_header = subflow_rebuild_header;
Peter Krystadcec37a62020-01-21 16:56:18 -08001244
1245 subflow_v6m_specific = subflow_v6_specific;
1246 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1247 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1248 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1249 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1250 subflow_v6m_specific.net_frag_header_len = 0;
1251#endif
1252
Davide Caratti5147dfb2020-03-27 14:48:49 -07001253 mptcp_diag_subflow_init(&subflow_ulp_ops);
1254
Peter Krystad2303f992020-01-21 16:56:17 -08001255 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1256 panic("MPTCP: failed to register subflows to ULP\n");
1257}