blob: bf808f1fabe5b4e566bc0b8e3c17c7f8b4e498a3 [file] [log] [blame]
Peter Krystad2303f992020-01-21 16:56:17 -08001// SPDX-License-Identifier: GPL-2.0
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
Peter Krystad79c09492020-01-21 16:56:20 -08007#define pr_fmt(fmt) "MPTCP: " fmt
8
Peter Krystad2303f992020-01-21 16:56:17 -08009#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
Peter Krystadf2962342020-03-27 14:48:39 -070012#include <crypto/algapi.h>
Todd Malsbarybd697222020-05-21 19:10:49 -070013#include <crypto/sha.h>
Peter Krystad2303f992020-01-21 16:56:17 -080014#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
Peter Krystadcec37a62020-01-21 16:56:18 -080019#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
Peter Krystad2303f992020-01-21 16:56:17 -080022#include <net/mptcp.h>
Paolo Abeni4596a2c2020-09-14 10:01:16 +020023#include <uapi/linux/mptcp.h>
Peter Krystad2303f992020-01-21 16:56:17 -080024#include "protocol.h"
Florian Westphalfc518952020-03-27 14:48:50 -070025#include "mib.h"
26
27static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
28 enum linux_mptcp_mib_field field)
29{
30 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
31}
Peter Krystad2303f992020-01-21 16:56:17 -080032
Peter Krystad79c09492020-01-21 16:56:20 -080033static void subflow_req_destructor(struct request_sock *req)
34{
35 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
36
37 pr_debug("subflow_req=%p", subflow_req);
38
Paolo Abeni8fd4de12020-06-17 12:08:56 +020039 if (subflow_req->msk)
40 sock_put((struct sock *)subflow_req->msk);
41
Paolo Abeni2c5ebd02020-06-26 19:30:00 +020042 mptcp_token_destroy_request(req);
Peter Krystad79c09492020-01-21 16:56:20 -080043 tcp_request_sock_ops.destructor(req);
44}
45
Peter Krystadf2962342020-03-27 14:48:39 -070046static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
47 void *hmac)
48{
49 u8 msg[8];
50
51 put_unaligned_be32(nonce1, &msg[0]);
52 put_unaligned_be32(nonce2, &msg[4]);
53
54 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
55}
56
Paolo Abeni4cf8b7e2020-07-23 13:02:36 +020057static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
58{
59 return mptcp_is_fully_established((void *)msk) &&
60 READ_ONCE(msk->pm.accept_subflow);
61}
62
Peter Krystadf2962342020-03-27 14:48:39 -070063/* validate received token and create truncated hmac and nonce for SYN-ACK */
Paolo Abeni8fd4de12020-06-17 12:08:56 +020064static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
65 const struct sk_buff *skb)
Peter Krystadf2962342020-03-27 14:48:39 -070066{
67 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Todd Malsbarybd697222020-05-21 19:10:49 -070068 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -070069 struct mptcp_sock *msk;
70 int local_id;
71
72 msk = mptcp_token_get_sock(subflow_req->token);
73 if (!msk) {
Florian Westphalfc518952020-03-27 14:48:50 -070074 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020075 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -070076 }
77
78 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
79 if (local_id < 0) {
80 sock_put((struct sock *)msk);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020081 return NULL;
Peter Krystadf2962342020-03-27 14:48:39 -070082 }
83 subflow_req->local_id = local_id;
84
85 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
86
87 subflow_generate_hmac(msk->local_key, msk->remote_key,
88 subflow_req->local_nonce,
89 subflow_req->remote_nonce, hmac);
90
91 subflow_req->thmac = get_unaligned_be64(hmac);
Paolo Abeni8fd4de12020-06-17 12:08:56 +020092 return msk;
Peter Krystadf2962342020-03-27 14:48:39 -070093}
94
Florian Westphal78d8b7b2020-07-30 21:25:52 +020095static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
Peter Krystadcec37a62020-01-21 16:56:18 -080096{
Peter Krystadcec37a62020-01-21 16:56:18 -080097 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
Peter Krystadcec37a62020-01-21 16:56:18 -080098
99 subflow_req->mp_capable = 0;
Peter Krystadf2962342020-03-27 14:48:39 -0700100 subflow_req->mp_join = 0;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200101 subflow_req->msk = NULL;
Paolo Abeni2c5ebd02020-06-26 19:30:00 +0200102 mptcp_token_init_request(req);
Peter Krystadcec37a62020-01-21 16:56:18 -0800103
104#ifdef CONFIG_TCP_MD5SIG
105 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
106 * TCP option space.
107 */
108 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
Florian Westphal78d8b7b2020-07-30 21:25:52 +0200109 return -EINVAL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800110#endif
111
Florian Westphal78d8b7b2020-07-30 21:25:52 +0200112 return 0;
113}
114
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100115/* Init mptcp request socket.
116 *
117 * Returns an error code if a JOIN has failed and a TCP reset
118 * should be sent.
119 */
120static int subflow_init_req(struct request_sock *req,
121 const struct sock *sk_listener,
122 struct sk_buff *skb)
Florian Westphal78d8b7b2020-07-30 21:25:52 +0200123{
124 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
125 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
126 struct mptcp_options_received mp_opt;
127 int ret;
128
129 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
130
131 ret = __subflow_init_req(req, sk_listener);
132 if (ret)
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100133 return 0;
Florian Westphal78d8b7b2020-07-30 21:25:52 +0200134
135 mptcp_get_options(skb, &mp_opt);
136
Paolo Abenicfde1412020-04-30 15:01:52 +0200137 if (mp_opt.mp_capable) {
Florian Westphalfc518952020-03-27 14:48:50 -0700138 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
139
Paolo Abenicfde1412020-04-30 15:01:52 +0200140 if (mp_opt.mp_join)
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100141 return 0;
Paolo Abenicfde1412020-04-30 15:01:52 +0200142 } else if (mp_opt.mp_join) {
Florian Westphalfc518952020-03-27 14:48:50 -0700143 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
144 }
Peter Krystadf2962342020-03-27 14:48:39 -0700145
Paolo Abenicfde1412020-04-30 15:01:52 +0200146 if (mp_opt.mp_capable && listener->request_mptcp) {
Florian Westphal535fb812020-07-30 21:25:51 +0200147 int err, retries = 4;
148
Florian Westphalc83a47e2020-07-30 21:25:54 +0200149 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Florian Westphal535fb812020-07-30 21:25:51 +0200150again:
151 do {
152 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
153 } while (subflow_req->local_key == 0);
Peter Krystad79c09492020-01-21 16:56:20 -0800154
Florian Westphalc83a47e2020-07-30 21:25:54 +0200155 if (unlikely(req->syncookie)) {
156 mptcp_crypto_key_sha(subflow_req->local_key,
157 &subflow_req->token,
158 &subflow_req->idsn);
159 if (mptcp_token_exists(subflow_req->token)) {
160 if (retries-- > 0)
161 goto again;
162 } else {
163 subflow_req->mp_capable = 1;
164 }
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100165 return 0;
Florian Westphalc83a47e2020-07-30 21:25:54 +0200166 }
167
Peter Krystad79c09492020-01-21 16:56:20 -0800168 err = mptcp_token_new_request(req);
169 if (err == 0)
170 subflow_req->mp_capable = 1;
Florian Westphal535fb812020-07-30 21:25:51 +0200171 else if (retries-- > 0)
172 goto again;
Peter Krystad79c09492020-01-21 16:56:20 -0800173
Paolo Abenicfde1412020-04-30 15:01:52 +0200174 } else if (mp_opt.mp_join && listener->request_mptcp) {
Peter Krystadec3edaa2020-03-27 14:48:40 -0700175 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
Peter Krystadf2962342020-03-27 14:48:39 -0700176 subflow_req->mp_join = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200177 subflow_req->backup = mp_opt.backup;
178 subflow_req->remote_id = mp_opt.join_id;
179 subflow_req->token = mp_opt.token;
180 subflow_req->remote_nonce = mp_opt.nonce;
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200181 subflow_req->msk = subflow_token_join_request(req, skb);
Florian Westphal9466a1c2020-07-30 21:25:56 +0200182
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100183 /* Can't fall back to TCP in this case. */
184 if (!subflow_req->msk)
185 return -EPERM;
186
187 if (unlikely(req->syncookie)) {
Florian Westphal9466a1c2020-07-30 21:25:56 +0200188 if (mptcp_can_accept_new_subflow(subflow_req->msk))
189 subflow_init_req_cookie_join_save(subflow_req, skb);
190 }
191
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200192 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
193 subflow_req->remote_nonce, subflow_req->msk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800194 }
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100195
196 return 0;
Peter Krystadcec37a62020-01-21 16:56:18 -0800197}
198
Florian Westphalc83a47e2020-07-30 21:25:54 +0200199int mptcp_subflow_init_cookie_req(struct request_sock *req,
200 const struct sock *sk_listener,
201 struct sk_buff *skb)
202{
203 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
204 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
205 struct mptcp_options_received mp_opt;
206 int err;
207
208 err = __subflow_init_req(req, sk_listener);
209 if (err)
210 return err;
211
212 mptcp_get_options(skb, &mp_opt);
213
214 if (mp_opt.mp_capable && mp_opt.mp_join)
215 return -EINVAL;
216
217 if (mp_opt.mp_capable && listener->request_mptcp) {
218 if (mp_opt.sndr_key == 0)
219 return -EINVAL;
220
221 subflow_req->local_key = mp_opt.rcvr_key;
222 err = mptcp_token_new_request(req);
223 if (err)
224 return err;
225
226 subflow_req->mp_capable = 1;
227 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
Florian Westphal9466a1c2020-07-30 21:25:56 +0200228 } else if (mp_opt.mp_join && listener->request_mptcp) {
229 if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
230 return -EINVAL;
231
232 if (mptcp_can_accept_new_subflow(subflow_req->msk))
233 subflow_req->mp_join = 1;
234
235 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
Florian Westphalc83a47e2020-07-30 21:25:54 +0200236 }
237
238 return 0;
239}
240EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
241
Florian Westphal7ea851d2020-11-30 16:36:30 +0100242static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
243 struct sk_buff *skb,
244 struct flowi *fl,
245 struct request_sock *req)
Peter Krystadcec37a62020-01-21 16:56:18 -0800246{
Florian Westphal7ea851d2020-11-30 16:36:30 +0100247 struct dst_entry *dst;
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100248 int err;
Florian Westphal7ea851d2020-11-30 16:36:30 +0100249
Peter Krystadcec37a62020-01-21 16:56:18 -0800250 tcp_rsk(req)->is_mptcp = 1;
251
Florian Westphal7ea851d2020-11-30 16:36:30 +0100252 dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
253 if (!dst)
254 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800255
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100256 err = subflow_init_req(req, sk, skb);
257 if (err == 0)
258 return dst;
259
260 dst_release(dst);
261 if (!req->syncookie)
262 tcp_request_sock_ops.send_reset(sk, skb);
263 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800264}
265
266#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Florian Westphal7ea851d2020-11-30 16:36:30 +0100267static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
268 struct sk_buff *skb,
269 struct flowi *fl,
270 struct request_sock *req)
Peter Krystadcec37a62020-01-21 16:56:18 -0800271{
Florian Westphal7ea851d2020-11-30 16:36:30 +0100272 struct dst_entry *dst;
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100273 int err;
Florian Westphal7ea851d2020-11-30 16:36:30 +0100274
Peter Krystadcec37a62020-01-21 16:56:18 -0800275 tcp_rsk(req)->is_mptcp = 1;
276
Florian Westphal7ea851d2020-11-30 16:36:30 +0100277 dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
278 if (!dst)
279 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800280
Florian Westphal3ecfbe3e2020-11-30 16:36:31 +0100281 err = subflow_init_req(req, sk, skb);
282 if (err == 0)
283 return dst;
284
285 dst_release(dst);
286 if (!req->syncookie)
287 tcp6_request_sock_ops.send_reset(sk, skb);
288 return NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800289}
290#endif
291
Peter Krystadec3edaa2020-03-27 14:48:40 -0700292/* validate received truncated hmac and create hmac for third ACK */
293static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
294{
Todd Malsbarybd697222020-05-21 19:10:49 -0700295 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadec3edaa2020-03-27 14:48:40 -0700296 u64 thmac;
297
298 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
299 subflow->remote_nonce, subflow->local_nonce,
300 hmac);
301
302 thmac = get_unaligned_be64(hmac);
303 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
304 subflow, subflow->token,
305 (unsigned long long)thmac,
306 (unsigned long long)subflow->thmac);
307
308 return thmac == subflow->thmac;
309}
310
Paolo Abenid5824842020-10-09 19:00:00 +0200311void mptcp_subflow_reset(struct sock *ssk)
312{
Paolo Abeni0e4f35d2020-10-09 19:00:01 +0200313 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
314 struct sock *sk = subflow->conn;
315
Paolo Abenid5824842020-10-09 19:00:00 +0200316 tcp_set_state(ssk, TCP_CLOSE);
317 tcp_send_active_reset(ssk, GFP_ATOMIC);
318 tcp_done(ssk);
Paolo Abeni0e4f35d2020-10-09 19:00:01 +0200319 if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
320 schedule_work(&mptcp_sk(sk)->work))
321 sock_hold(sk);
Paolo Abenid5824842020-10-09 19:00:00 +0200322}
323
Peter Krystadcec37a62020-01-21 16:56:18 -0800324static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
325{
326 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenicfde1412020-04-30 15:01:52 +0200327 struct mptcp_options_received mp_opt;
Davide Carattic3c123d2020-03-19 22:45:37 +0100328 struct sock *parent = subflow->conn;
Peter Krystadcec37a62020-01-21 16:56:18 -0800329
330 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
331
Paolo Abeni12008322020-04-24 13:15:21 +0200332 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
Davide Carattic3c123d2020-03-19 22:45:37 +0100333 inet_sk_state_store(parent, TCP_ESTABLISHED);
334 parent->sk_state_change(parent);
335 }
336
Paolo Abeni263e1202020-04-30 15:01:51 +0200337 /* be sure no special action on any packet other than syn-ack */
338 if (subflow->conn_finished)
339 return;
340
Paolo Abenib0977bb2020-07-23 13:02:29 +0200341 subflow->rel_write_seq = 1;
Paolo Abeni263e1202020-04-30 15:01:51 +0200342 subflow->conn_finished = 1;
Davide Carattie1ff9e82020-06-29 22:26:20 +0200343 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
344 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
Paolo Abeni263e1202020-04-30 15:01:51 +0200345
Paolo Abenicfde1412020-04-30 15:01:52 +0200346 mptcp_get_options(skb, &mp_opt);
Paolo Abenifa25e812020-07-23 13:02:33 +0200347 if (subflow->request_mptcp) {
348 if (!mp_opt.mp_capable) {
349 MPTCP_INC_STATS(sock_net(sk),
350 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
351 mptcp_do_fallback(sk);
352 pr_fallback(mptcp_sk(subflow->conn));
353 goto fallback;
354 }
355
Paolo Abeni263e1202020-04-30 15:01:51 +0200356 subflow->mp_capable = 1;
357 subflow->can_ack = 1;
Paolo Abenicfde1412020-04-30 15:01:52 +0200358 subflow->remote_key = mp_opt.sndr_key;
Paolo Abeni263e1202020-04-30 15:01:51 +0200359 pr_debug("subflow=%p, remote_key=%llu", subflow,
360 subflow->remote_key);
Paolo Abenifa25e812020-07-23 13:02:33 +0200361 mptcp_finish_connect(sk);
362 } else if (subflow->request_join) {
363 u8 hmac[SHA256_DIGEST_SIZE];
364
365 if (!mp_opt.mp_join)
366 goto do_reset;
367
Paolo Abenicfde1412020-04-30 15:01:52 +0200368 subflow->thmac = mp_opt.thmac;
369 subflow->remote_nonce = mp_opt.nonce;
Paolo Abeni263e1202020-04-30 15:01:51 +0200370 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
371 subflow->thmac, subflow->remote_nonce);
Paolo Abeni263e1202020-04-30 15:01:51 +0200372
Peter Krystadec3edaa2020-03-27 14:48:40 -0700373 if (!subflow_thmac_valid(subflow)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700374 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700375 goto do_reset;
376 }
377
378 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
379 subflow->local_nonce,
380 subflow->remote_nonce,
Todd Malsbarybd697222020-05-21 19:10:49 -0700381 hmac);
Todd Malsbarybd697222020-05-21 19:10:49 -0700382 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadec3edaa2020-03-27 14:48:40 -0700383
Peter Krystadec3edaa2020-03-27 14:48:40 -0700384 if (!mptcp_finish_join(sk))
385 goto do_reset;
386
Paolo Abenifa25e812020-07-23 13:02:33 +0200387 subflow->mp_join = 1;
Florian Westphalfc518952020-03-27 14:48:50 -0700388 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
Paolo Abenifa25e812020-07-23 13:02:33 +0200389 } else if (mptcp_check_fallback(sk)) {
390fallback:
391 mptcp_rcv_space_init(mptcp_sk(parent), sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800392 }
Paolo Abenifa25e812020-07-23 13:02:33 +0200393 return;
394
395do_reset:
Paolo Abenid5824842020-10-09 19:00:00 +0200396 mptcp_subflow_reset(sk);
Peter Krystadcec37a62020-01-21 16:56:18 -0800397}
398
Florian Westphal08b8d082020-07-30 21:25:53 +0200399struct request_sock_ops mptcp_subflow_request_sock_ops;
400EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
Peter Krystadcec37a62020-01-21 16:56:18 -0800401static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
402
403static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
404{
405 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
406
407 pr_debug("subflow=%p", subflow);
408
409 /* Never answer to SYNs sent to broadcast or multicast */
410 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
411 goto drop;
412
Florian Westphal08b8d082020-07-30 21:25:53 +0200413 return tcp_conn_request(&mptcp_subflow_request_sock_ops,
Peter Krystadcec37a62020-01-21 16:56:18 -0800414 &subflow_request_sock_ipv4_ops,
415 sk, skb);
416drop:
417 tcp_listendrop(sk);
418 return 0;
419}
420
421#if IS_ENABLED(CONFIG_MPTCP_IPV6)
422static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
423static struct inet_connection_sock_af_ops subflow_v6_specific;
424static struct inet_connection_sock_af_ops subflow_v6m_specific;
425
426static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
427{
428 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
429
430 pr_debug("subflow=%p", subflow);
431
432 if (skb->protocol == htons(ETH_P_IP))
433 return subflow_v4_conn_request(sk, skb);
434
435 if (!ipv6_unicast_destination(skb))
436 goto drop;
437
Florian Westphal08b8d082020-07-30 21:25:53 +0200438 return tcp_conn_request(&mptcp_subflow_request_sock_ops,
Peter Krystadcec37a62020-01-21 16:56:18 -0800439 &subflow_request_sock_ipv6_ops, sk, skb);
440
441drop:
442 tcp_listendrop(sk);
443 return 0; /* don't send reset */
444}
445#endif
446
Peter Krystadf2962342020-03-27 14:48:39 -0700447/* validate hmac received in third ACK */
448static bool subflow_hmac_valid(const struct request_sock *req,
Paolo Abenicfde1412020-04-30 15:01:52 +0200449 const struct mptcp_options_received *mp_opt)
Peter Krystadf2962342020-03-27 14:48:39 -0700450{
451 const struct mptcp_subflow_request_sock *subflow_req;
Todd Malsbarybd697222020-05-21 19:10:49 -0700452 u8 hmac[SHA256_DIGEST_SIZE];
Peter Krystadf2962342020-03-27 14:48:39 -0700453 struct mptcp_sock *msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700454
455 subflow_req = mptcp_subflow_rsk(req);
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200456 msk = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700457 if (!msk)
458 return false;
459
460 subflow_generate_hmac(msk->remote_key, msk->local_key,
461 subflow_req->remote_nonce,
462 subflow_req->local_nonce, hmac);
463
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200464 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
Peter Krystadf2962342020-03-27 14:48:39 -0700465}
466
Florian Westphaldf1036d2020-04-17 09:28:22 +0200467static void mptcp_sock_destruct(struct sock *sk)
468{
469 /* if new mptcp socket isn't accepted, it is free'd
470 * from the tcp listener sockets request queue, linked
471 * from req->sk. The tcp socket is released.
472 * This calls the ULP release function which will
473 * also remove the mptcp socket, via
474 * sock_put(ctx->conn).
475 *
Paolo Abeni7ee24922020-08-07 19:03:53 +0200476 * Problem is that the mptcp socket will be in
477 * ESTABLISHED state and will not have the SOCK_DEAD flag.
Florian Westphaldf1036d2020-04-17 09:28:22 +0200478 * Both result in warnings from inet_sock_destruct.
479 */
480
Paolo Abeni7ee24922020-08-07 19:03:53 +0200481 if (sk->sk_state == TCP_ESTABLISHED) {
Florian Westphaldf1036d2020-04-17 09:28:22 +0200482 sk->sk_state = TCP_CLOSE;
483 WARN_ON_ONCE(sk->sk_socket);
484 sock_orphan(sk);
485 }
486
Geliang Tang5c8c1642020-09-24 08:29:57 +0800487 mptcp_destroy_common(mptcp_sk(sk));
Florian Westphaldf1036d2020-04-17 09:28:22 +0200488 inet_sock_destruct(sk);
489}
490
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200491static void mptcp_force_close(struct sock *sk)
492{
493 inet_sk_state_store(sk, TCP_CLOSE);
494 sk_common_release(sk);
495}
496
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200497static void subflow_ulp_fallback(struct sock *sk,
498 struct mptcp_subflow_context *old_ctx)
499{
500 struct inet_connection_sock *icsk = inet_csk(sk);
501
502 mptcp_subflow_tcp_fallback(sk, old_ctx);
503 icsk->icsk_ulp_ops = NULL;
504 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
505 tcp_sk(sk)->is_mptcp = 0;
506}
507
Paolo Abeni39884602020-05-29 17:49:18 +0200508static void subflow_drop_ctx(struct sock *ssk)
509{
510 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
511
512 if (!ctx)
513 return;
514
515 subflow_ulp_fallback(ssk, ctx);
516 if (ctx->conn)
517 sock_put(ctx->conn);
518
519 kfree_rcu(ctx, rcu);
520}
521
Paolo Abenib93df082020-07-23 13:02:32 +0200522void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
523 struct mptcp_options_received *mp_opt)
524{
525 struct mptcp_sock *msk = mptcp_sk(subflow->conn);
526
527 subflow->remote_key = mp_opt->sndr_key;
528 subflow->fully_established = 1;
529 subflow->can_ack = 1;
530 WRITE_ONCE(msk->fully_established, true);
531}
532
Peter Krystadcec37a62020-01-21 16:56:18 -0800533static struct sock *subflow_syn_recv_sock(const struct sock *sk,
534 struct sk_buff *skb,
535 struct request_sock *req,
536 struct dst_entry *dst,
537 struct request_sock *req_unhash,
538 bool *own_req)
539{
540 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800541 struct mptcp_subflow_request_sock *subflow_req;
Paolo Abenicfde1412020-04-30 15:01:52 +0200542 struct mptcp_options_received mp_opt;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200543 bool fallback, fallback_is_fatal;
Paolo Abeni58b09912020-03-13 16:52:41 +0100544 struct sock *new_msk = NULL;
Peter Krystadcec37a62020-01-21 16:56:18 -0800545 struct sock *child;
546
547 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
548
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200549 /* After child creation we must look for 'mp_capable' even when options
550 * are not parsed
Paolo Abenicfde1412020-04-30 15:01:52 +0200551 */
552 mp_opt.mp_capable = 0;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200553
554 /* hopefully temporary handling for MP_JOIN+syncookie */
555 subflow_req = mptcp_subflow_rsk(req);
Paolo Abenib7514692020-07-23 13:02:34 +0200556 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200557 fallback = !tcp_rsk(req)->is_mptcp;
558 if (fallback)
Florian Westphalae2dd712020-01-29 15:54:46 +0100559 goto create_child;
560
Christoph Paaschd22f4982020-01-21 16:56:32 -0800561 /* if the sk is MP_CAPABLE, we try to fetch the client key */
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800562 if (subflow_req->mp_capable) {
Christoph Paaschd22f4982020-01-21 16:56:32 -0800563 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
564 /* here we can receive and accept an in-window,
565 * out-of-order pkt, which will not carry the MP_CAPABLE
566 * opt even on mptcp enabled paths
567 */
Paolo Abeni58b09912020-03-13 16:52:41 +0100568 goto create_msk;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800569 }
570
Paolo Abenicfde1412020-04-30 15:01:52 +0200571 mptcp_get_options(skb, &mp_opt);
572 if (!mp_opt.mp_capable) {
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200573 fallback = true;
Paolo Abeni58b09912020-03-13 16:52:41 +0100574 goto create_child;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800575 }
Paolo Abeni58b09912020-03-13 16:52:41 +0100576
577create_msk:
Paolo Abenicfde1412020-04-30 15:01:52 +0200578 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
Paolo Abeni58b09912020-03-13 16:52:41 +0100579 if (!new_msk)
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200580 fallback = true;
Peter Krystadf2962342020-03-27 14:48:39 -0700581 } else if (subflow_req->mp_join) {
Paolo Abenicfde1412020-04-30 15:01:52 +0200582 mptcp_get_options(skb, &mp_opt);
Paolo Abenid3ab7882020-11-26 15:17:53 +0100583 if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
584 !mptcp_can_accept_new_subflow(subflow_req->msk)) {
Florian Westphalfc518952020-03-27 14:48:50 -0700585 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
Paolo Abeni9e365ff2020-06-17 12:08:57 +0200586 fallback = true;
Florian Westphalfc518952020-03-27 14:48:50 -0700587 }
Christoph Paaschcc7972e2020-01-21 16:56:31 -0800588 }
Peter Krystadcec37a62020-01-21 16:56:18 -0800589
Christoph Paaschd22f4982020-01-21 16:56:32 -0800590create_child:
Peter Krystadcec37a62020-01-21 16:56:18 -0800591 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
592 req_unhash, own_req);
593
594 if (child && *own_req) {
Peter Krystad79c09492020-01-21 16:56:20 -0800595 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
596
Paolo Abeni90bf4512020-05-15 19:22:15 +0200597 tcp_rsk(req)->drop_req = false;
598
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200599 /* we need to fallback on ctx allocation failure and on pre-reqs
600 * checking above. In the latter scenario we additionally need
601 * to reset the context to non MPTCP status.
Peter Krystad79c09492020-01-21 16:56:20 -0800602 */
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200603 if (!ctx || fallback) {
Peter Krystadf2962342020-03-27 14:48:39 -0700604 if (fallback_is_fatal)
Paolo Abeni729cd642020-05-15 19:22:17 +0200605 goto dispose_child;
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200606
Paolo Abeni39884602020-05-29 17:49:18 +0200607 subflow_drop_ctx(child);
Paolo Abeni58b09912020-03-13 16:52:41 +0100608 goto out;
Peter Krystadf2962342020-03-27 14:48:39 -0700609 }
Peter Krystad79c09492020-01-21 16:56:20 -0800610
611 if (ctx->mp_capable) {
Paolo Abenib93df082020-07-23 13:02:32 +0200612 /* this can't race with mptcp_close(), as the msk is
613 * not yet exposted to user-space
614 */
615 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
616
Paolo Abeni5b950ff2020-12-09 12:03:29 +0100617 /* record the newly created socket as the first msk
618 * subflow, but don't link it yet into conn_list
619 */
Paolo Abeni0397c6d2020-11-19 11:45:58 -0800620 WRITE_ONCE(mptcp_sk(new_msk)->first, child);
621
Paolo Abeni58b09912020-03-13 16:52:41 +0100622 /* new mpc subflow takes ownership of the newly
623 * created mptcp socket
624 */
Florian Westphaldf1036d2020-04-17 09:28:22 +0200625 new_msk->sk_destruct = mptcp_sock_destruct;
Peter Krystad1b1c7a02020-03-27 14:48:38 -0700626 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
Paolo Abeni2c5ebd02020-06-26 19:30:00 +0200627 mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
Paolo Abeni58b09912020-03-13 16:52:41 +0100628 ctx->conn = new_msk;
629 new_msk = NULL;
Paolo Abenifca5c822020-04-20 16:25:06 +0200630
631 /* with OoO packets we can reach here without ingress
632 * mpc option
633 */
Paolo Abenib93df082020-07-23 13:02:32 +0200634 if (mp_opt.mp_capable)
635 mptcp_subflow_fully_established(ctx, &mp_opt);
Peter Krystadf2962342020-03-27 14:48:39 -0700636 } else if (ctx->mp_join) {
637 struct mptcp_sock *owner;
638
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200639 owner = subflow_req->msk;
Peter Krystadf2962342020-03-27 14:48:39 -0700640 if (!owner)
Paolo Abeni729cd642020-05-15 19:22:17 +0200641 goto dispose_child;
Peter Krystadf2962342020-03-27 14:48:39 -0700642
Paolo Abeni8fd4de12020-06-17 12:08:56 +0200643 /* move the msk reference ownership to the subflow */
644 subflow_req->msk = NULL;
Peter Krystadf2962342020-03-27 14:48:39 -0700645 ctx->conn = (struct sock *)owner;
646 if (!mptcp_finish_join(child))
Paolo Abeni729cd642020-05-15 19:22:17 +0200647 goto dispose_child;
Florian Westphalfc518952020-03-27 14:48:50 -0700648
649 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
Paolo Abeni90bf4512020-05-15 19:22:15 +0200650 tcp_rsk(req)->drop_req = true;
Peter Krystadcec37a62020-01-21 16:56:18 -0800651 }
652 }
653
Paolo Abeni58b09912020-03-13 16:52:41 +0100654out:
655 /* dispose of the left over mptcp master, if any */
656 if (unlikely(new_msk))
Florian Westphal9f5ca6a2020-04-17 09:28:23 +0200657 mptcp_force_close(new_msk);
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200658
659 /* check for expected invariant - should never trigger, just help
660 * catching eariler subtle bugs
661 */
Paolo Abeniac2b47f2020-04-30 15:03:22 +0200662 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
Paolo Abeni4c8941d2020-04-20 16:25:05 +0200663 (!mptcp_subflow_ctx(child) ||
664 !mptcp_subflow_ctx(child)->conn));
Peter Krystadcec37a62020-01-21 16:56:18 -0800665 return child;
Peter Krystadf2962342020-03-27 14:48:39 -0700666
Paolo Abeni729cd642020-05-15 19:22:17 +0200667dispose_child:
Paolo Abeni39884602020-05-29 17:49:18 +0200668 subflow_drop_ctx(child);
Paolo Abeni729cd642020-05-15 19:22:17 +0200669 tcp_rsk(req)->drop_req = true;
Paolo Abeni729cd642020-05-15 19:22:17 +0200670 inet_csk_prepare_for_destroy_sock(child);
Peter Krystadf2962342020-03-27 14:48:39 -0700671 tcp_done(child);
Paolo Abeni97e61752020-07-23 13:02:35 +0200672 req->rsk_ops->send_reset(sk, skb);
Paolo Abeni729cd642020-05-15 19:22:17 +0200673
674 /* The last child reference will be released by the caller */
675 return child;
Peter Krystadcec37a62020-01-21 16:56:18 -0800676}
677
678static struct inet_connection_sock_af_ops subflow_specific;
679
Mat Martineau648ef4b2020-01-21 16:56:24 -0800680enum mapping_status {
681 MAPPING_OK,
682 MAPPING_INVALID,
683 MAPPING_EMPTY,
Davide Carattie1ff9e82020-06-29 22:26:20 +0200684 MAPPING_DATA_FIN,
685 MAPPING_DUMMY
Mat Martineau648ef4b2020-01-21 16:56:24 -0800686};
687
688static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
689{
690 if ((u32)seq == (u32)old_seq)
691 return old_seq;
692
693 /* Assume map covers data not mapped yet. */
694 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
695}
696
697static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
698{
699 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
700 ssn, subflow->map_subflow_seq, subflow->map_data_len);
701}
702
703static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
704{
705 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
706 unsigned int skb_consumed;
707
708 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
709 if (WARN_ON_ONCE(skb_consumed >= skb->len))
710 return true;
711
712 return skb->len - skb_consumed <= subflow->map_data_len -
713 mptcp_subflow_get_map_offset(subflow);
714}
715
716static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
717{
718 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
719 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
720
721 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
722 /* Mapping covers data later in the subflow stream,
723 * currently unsupported.
724 */
725 warn_bad_map(subflow, ssn);
726 return false;
727 }
728 if (unlikely(!before(ssn, subflow->map_subflow_seq +
729 subflow->map_data_len))) {
730 /* Mapping does covers past subflow data, invalid */
731 warn_bad_map(subflow, ssn + skb->len);
732 return false;
733 }
734 return true;
735}
736
Mat Martineau43b54c62020-07-28 15:12:06 -0700737static enum mapping_status get_mapping_status(struct sock *ssk,
738 struct mptcp_sock *msk)
Mat Martineau648ef4b2020-01-21 16:56:24 -0800739{
740 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
741 struct mptcp_ext *mpext;
742 struct sk_buff *skb;
743 u16 data_len;
744 u64 map_seq;
745
746 skb = skb_peek(&ssk->sk_receive_queue);
747 if (!skb)
748 return MAPPING_EMPTY;
749
Davide Carattie1ff9e82020-06-29 22:26:20 +0200750 if (mptcp_check_fallback(ssk))
751 return MAPPING_DUMMY;
752
Mat Martineau648ef4b2020-01-21 16:56:24 -0800753 mpext = mptcp_get_ext(skb);
754 if (!mpext || !mpext->use_map) {
755 if (!subflow->map_valid && !skb->len) {
756 /* the TCP stack deliver 0 len FIN pkt to the receive
757 * queue, that is the only 0len pkts ever expected here,
758 * and we can admit no mapping only for 0 len pkts
759 */
760 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
761 WARN_ONCE(1, "0len seq %d:%d flags %x",
762 TCP_SKB_CB(skb)->seq,
763 TCP_SKB_CB(skb)->end_seq,
764 TCP_SKB_CB(skb)->tcp_flags);
765 sk_eat_skb(ssk, skb);
766 return MAPPING_EMPTY;
767 }
768
769 if (!subflow->map_valid)
770 return MAPPING_INVALID;
771
772 goto validate_seq;
773 }
774
775 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
776 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
777 mpext->data_len, mpext->data_fin);
778
779 data_len = mpext->data_len;
780 if (data_len == 0) {
781 pr_err("Infinite mapping not handled");
Florian Westphalfc518952020-03-27 14:48:50 -0700782 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800783 return MAPPING_INVALID;
784 }
785
786 if (mpext->data_fin == 1) {
787 if (data_len == 1) {
Mat Martineau1a49b2c2020-09-29 15:08:20 -0700788 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
789 mpext->dsn64);
Mat Martineau43b54c62020-07-28 15:12:06 -0700790 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800791 if (subflow->map_valid) {
792 /* A DATA_FIN might arrive in a DSS
793 * option before the previous mapping
794 * has been fully consumed. Continue
795 * handling the existing mapping.
796 */
797 skb_ext_del(skb, SKB_EXT_MPTCP);
798 return MAPPING_OK;
799 } else {
Mat Martineauef59b192020-09-21 16:57:58 +0200800 if (updated && schedule_work(&msk->work))
801 sock_hold((struct sock *)msk);
802
Mat Martineau648ef4b2020-01-21 16:56:24 -0800803 return MAPPING_DATA_FIN;
804 }
Mat Martineau43b54c62020-07-28 15:12:06 -0700805 } else {
Paolo Abeni017512a2020-10-05 12:01:06 +0200806 u64 data_fin_seq = mpext->data_seq + data_len - 1;
Mat Martineau1a49b2c2020-09-29 15:08:20 -0700807
808 /* If mpext->data_seq is a 32-bit value, data_fin_seq
809 * must also be limited to 32 bits.
810 */
811 if (!mpext->dsn64)
812 data_fin_seq &= GENMASK_ULL(31, 0);
813
814 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
815 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
816 data_fin_seq, mpext->dsn64);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800817 }
818
819 /* Adjust for DATA_FIN using 1 byte of sequence space */
820 data_len--;
821 }
822
823 if (!mpext->dsn64) {
824 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
825 mpext->data_seq);
826 pr_debug("expanded seq=%llu", subflow->map_seq);
827 } else {
828 map_seq = mpext->data_seq;
829 }
Davide Caratti37198e92020-10-06 18:26:17 +0200830 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800831
832 if (subflow->map_valid) {
833 /* Allow replacing only with an identical map */
834 if (subflow->map_seq == map_seq &&
835 subflow->map_subflow_seq == mpext->subflow_seq &&
836 subflow->map_data_len == data_len) {
837 skb_ext_del(skb, SKB_EXT_MPTCP);
838 return MAPPING_OK;
839 }
840
841 /* If this skb data are fully covered by the current mapping,
842 * the new map would need caching, which is not supported
843 */
Florian Westphalfc518952020-03-27 14:48:50 -0700844 if (skb_is_fully_mapped(ssk, skb)) {
845 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800846 return MAPPING_INVALID;
Florian Westphalfc518952020-03-27 14:48:50 -0700847 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800848
849 /* will validate the next map after consuming the current one */
850 return MAPPING_OK;
851 }
852
853 subflow->map_seq = map_seq;
854 subflow->map_subflow_seq = mpext->subflow_seq;
855 subflow->map_data_len = data_len;
856 subflow->map_valid = 1;
Christoph Paaschd22f4982020-01-21 16:56:32 -0800857 subflow->mpc_map = mpext->mpc_map;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800858 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
859 subflow->map_seq, subflow->map_subflow_seq,
860 subflow->map_data_len);
861
862validate_seq:
863 /* we revalidate valid mapping on new skb, because we must ensure
864 * the current skb is completely covered by the available mapping
865 */
866 if (!validate_mapping(ssk, skb))
867 return MAPPING_INVALID;
868
869 skb_ext_del(skb, SKB_EXT_MPTCP);
870 return MAPPING_OK;
871}
872
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200873static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
Paolo Abeni1d39cd82020-09-17 23:07:24 +0200874 u64 limit)
Paolo Abeni67193312020-09-14 10:01:09 +0200875{
876 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200877 bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
878 u32 incr;
Paolo Abeni67193312020-09-14 10:01:09 +0200879
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200880 incr = limit >= skb->len ? skb->len + fin : limit;
Paolo Abeni67193312020-09-14 10:01:09 +0200881
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200882 pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
883 subflow->map_subflow_seq);
Paolo Abeni06242e42020-09-14 10:01:14 +0200884 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200885 tcp_sk(ssk)->copied_seq += incr;
886 if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
887 sk_eat_skb(ssk, skb);
888 if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
889 subflow->map_valid = 0;
Paolo Abeni67193312020-09-14 10:01:09 +0200890}
891
Mat Martineau648ef4b2020-01-21 16:56:24 -0800892static bool subflow_check_data_avail(struct sock *ssk)
893{
894 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
895 enum mapping_status status;
896 struct mptcp_sock *msk;
897 struct sk_buff *skb;
898
899 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
900 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
Paolo Abeni47bebdf2020-09-14 10:01:08 +0200901 if (!skb_peek(&ssk->sk_receive_queue))
902 subflow->data_avail = 0;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800903 if (subflow->data_avail)
904 return true;
905
Mat Martineau648ef4b2020-01-21 16:56:24 -0800906 msk = mptcp_sk(subflow->conn);
907 for (;;) {
Mat Martineau648ef4b2020-01-21 16:56:24 -0800908 u64 ack_seq;
909 u64 old_ack;
910
Mat Martineau43b54c62020-07-28 15:12:06 -0700911 status = get_mapping_status(ssk, msk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800912 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
913 if (status == MAPPING_INVALID) {
914 ssk->sk_err = EBADMSG;
915 goto fatal;
916 }
Davide Carattie1ff9e82020-06-29 22:26:20 +0200917 if (status == MAPPING_DUMMY) {
918 __mptcp_do_fallback(msk);
919 skb = skb_peek(&ssk->sk_receive_queue);
920 subflow->map_valid = 1;
921 subflow->map_seq = READ_ONCE(msk->ack_seq);
922 subflow->map_data_len = skb->len;
923 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
924 subflow->ssn_offset;
Paolo Abeni67193312020-09-14 10:01:09 +0200925 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
Davide Carattie1ff9e82020-06-29 22:26:20 +0200926 return true;
927 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800928
929 if (status != MAPPING_OK)
930 return false;
931
932 skb = skb_peek(&ssk->sk_receive_queue);
933 if (WARN_ON_ONCE(!skb))
934 return false;
935
Christoph Paaschd22f4982020-01-21 16:56:32 -0800936 /* if msk lacks the remote key, this subflow must provide an
937 * MP_CAPABLE-based mapping
938 */
939 if (unlikely(!READ_ONCE(msk->can_ack))) {
940 if (!subflow->mpc_map) {
941 ssk->sk_err = EBADMSG;
942 goto fatal;
943 }
944 WRITE_ONCE(msk->remote_key, subflow->remote_key);
945 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
946 WRITE_ONCE(msk->can_ack, true);
947 }
948
Mat Martineau648ef4b2020-01-21 16:56:24 -0800949 old_ack = READ_ONCE(msk->ack_seq);
950 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
951 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
952 ack_seq);
Paolo Abeni47bebdf2020-09-14 10:01:08 +0200953 if (ack_seq == old_ack) {
Paolo Abeni67193312020-09-14 10:01:09 +0200954 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
955 break;
956 } else if (after64(ack_seq, old_ack)) {
957 subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800958 break;
Paolo Abeni47bebdf2020-09-14 10:01:08 +0200959 }
Mat Martineau648ef4b2020-01-21 16:56:24 -0800960
961 /* only accept in-sequence mapping. Old values are spurious
Paolo Abeni67193312020-09-14 10:01:09 +0200962 * retransmission
Mat Martineau648ef4b2020-01-21 16:56:24 -0800963 */
Paolo Abeni04e4cd42020-09-14 10:01:13 +0200964 mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800965 }
966 return true;
967
968fatal:
969 /* fatal protocol error, close the socket */
970 /* This barrier is coupled with smp_rmb() in tcp_poll() */
971 smp_wmb();
972 ssk->sk_error_report(ssk);
973 tcp_set_state(ssk, TCP_CLOSE);
974 tcp_send_active_reset(ssk, GFP_ATOMIC);
Paolo Abeni47bebdf2020-09-14 10:01:08 +0200975 subflow->data_avail = 0;
Mat Martineau648ef4b2020-01-21 16:56:24 -0800976 return false;
977}
978
979bool mptcp_subflow_data_available(struct sock *sk)
980{
981 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800982
983 /* check if current mapping is still valid */
984 if (subflow->map_valid &&
985 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
986 subflow->map_valid = 0;
987 subflow->data_avail = 0;
988
989 pr_debug("Done with mapping: seq=%u data_len=%u",
990 subflow->map_subflow_seq,
991 subflow->map_data_len);
992 }
993
Paolo Abeni47bebdf2020-09-14 10:01:08 +0200994 return subflow_check_data_avail(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -0800995}
996
Florian Westphal071c8ed2020-04-24 12:31:50 +0200997/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
998 * not the ssk one.
999 *
1000 * In mptcp, rwin is about the mptcp-level connection data.
1001 *
1002 * Data that is still on the ssk rx queue can thus be ignored,
1003 * as far as mptcp peer is concerened that data is still inflight.
1004 * DSS ACK is updated when skb is moved to the mptcp rx queue.
1005 */
1006void mptcp_space(const struct sock *ssk, int *space, int *full_space)
1007{
1008 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1009 const struct sock *sk = subflow->conn;
1010
Paolo Abeniea4ca582020-11-19 11:46:03 -08001011 *space = __mptcp_space(sk);
Florian Westphal071c8ed2020-04-24 12:31:50 +02001012 *full_space = tcp_full_space(sk);
1013}
1014
Mat Martineau648ef4b2020-01-21 16:56:24 -08001015static void subflow_data_ready(struct sock *sk)
1016{
1017 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Davide Caratti8c728942020-07-15 22:27:05 +02001018 u16 state = 1 << inet_sk_state_load(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001019 struct sock *parent = subflow->conn;
Davide Carattie1ff9e82020-06-29 22:26:20 +02001020 struct mptcp_sock *msk;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001021
Davide Carattie1ff9e82020-06-29 22:26:20 +02001022 msk = mptcp_sk(parent);
Davide Caratti8c728942020-07-15 22:27:05 +02001023 if (state & TCPF_LISTEN) {
Davide Carattie1ff9e82020-06-29 22:26:20 +02001024 set_bit(MPTCP_DATA_READY, &msk->flags);
Paolo Abenidc093db2020-03-13 16:52:42 +01001025 parent->sk_data_ready(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001026 return;
1027 }
1028
Davide Carattie1ff9e82020-06-29 22:26:20 +02001029 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
Davide Caratti8c728942020-07-15 22:27:05 +02001030 !subflow->mp_join && !(state & TCPF_CLOSE));
Davide Carattie1ff9e82020-06-29 22:26:20 +02001031
Florian Westphal101f6f82020-02-26 10:14:46 +01001032 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001033 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001034}
1035
Paolo Abeni6e628cd2020-11-27 11:10:27 +01001036static void subflow_write_space(struct sock *ssk)
Mat Martineau648ef4b2020-01-21 16:56:24 -08001037{
Paolo Abeni6e628cd2020-11-27 11:10:27 +01001038 /* we take action in __mptcp_clean_una() */
Mat Martineau648ef4b2020-01-21 16:56:24 -08001039}
1040
Peter Krystadcec37a62020-01-21 16:56:18 -08001041static struct inet_connection_sock_af_ops *
1042subflow_default_af_ops(struct sock *sk)
1043{
1044#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1045 if (sk->sk_family == AF_INET6)
1046 return &subflow_v6_specific;
1047#endif
1048 return &subflow_specific;
1049}
1050
Peter Krystadcec37a62020-01-21 16:56:18 -08001051#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Geert Uytterhoeven31484d52020-01-30 10:45:26 +01001052void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
1053{
Peter Krystadcec37a62020-01-21 16:56:18 -08001054 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1055 struct inet_connection_sock *icsk = inet_csk(sk);
1056 struct inet_connection_sock_af_ops *target;
1057
1058 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
1059
1060 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
Mat Martineauedc7e482020-01-24 16:04:03 -08001061 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
Peter Krystadcec37a62020-01-21 16:56:18 -08001062
1063 if (likely(icsk->icsk_af_ops == target))
1064 return;
1065
1066 subflow->icsk_af_ops = icsk->icsk_af_ops;
1067 icsk->icsk_af_ops = target;
Peter Krystadcec37a62020-01-21 16:56:18 -08001068}
Geert Uytterhoeven31484d52020-01-30 10:45:26 +01001069#endif
Peter Krystadcec37a62020-01-21 16:56:18 -08001070
Peter Krystadec3edaa2020-03-27 14:48:40 -07001071static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
1072 struct sockaddr_storage *addr)
1073{
1074 memset(addr, 0, sizeof(*addr));
1075 addr->ss_family = info->family;
1076 if (addr->ss_family == AF_INET) {
1077 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
1078
1079 in_addr->sin_addr = info->addr;
1080 in_addr->sin_port = info->port;
1081 }
1082#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1083 else if (addr->ss_family == AF_INET6) {
1084 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
1085
1086 in6_addr->sin6_addr = info->addr6;
1087 in6_addr->sin6_port = info->port;
1088 }
1089#endif
1090}
1091
Paolo Abenief0da3b2020-09-14 10:01:15 +02001092int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
Peter Krystadec3edaa2020-03-27 14:48:40 -07001093 const struct mptcp_addr_info *remote)
1094{
1095 struct mptcp_sock *msk = mptcp_sk(sk);
1096 struct mptcp_subflow_context *subflow;
1097 struct sockaddr_storage addr;
Geliang Tang2ff0e562020-09-08 10:49:39 +08001098 int remote_id = remote->id;
Paolo Abeni6bad9122020-06-30 16:38:26 +02001099 int local_id = loc->id;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001100 struct socket *sf;
Paolo Abeni6bad9122020-06-30 16:38:26 +02001101 struct sock *ssk;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001102 u32 remote_token;
1103 int addrlen;
1104 int err;
1105
Paolo Abenib93df082020-07-23 13:02:32 +02001106 if (!mptcp_is_fully_established(sk))
Peter Krystadec3edaa2020-03-27 14:48:40 -07001107 return -ENOTCONN;
1108
1109 err = mptcp_subflow_create_socket(sk, &sf);
1110 if (err)
1111 return err;
1112
Paolo Abeni6bad9122020-06-30 16:38:26 +02001113 ssk = sf->sk;
1114 subflow = mptcp_subflow_ctx(ssk);
1115 do {
1116 get_random_bytes(&subflow->local_nonce, sizeof(u32));
1117 } while (!subflow->local_nonce);
1118
1119 if (!local_id) {
1120 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
1121 if (err < 0)
1122 goto failed;
1123
1124 local_id = err;
1125 }
1126
Peter Krystadec3edaa2020-03-27 14:48:40 -07001127 subflow->remote_key = msk->remote_key;
1128 subflow->local_key = msk->local_key;
1129 subflow->token = msk->token;
1130 mptcp_info2sockaddr(loc, &addr);
1131
1132 addrlen = sizeof(struct sockaddr_in);
1133#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1134 if (loc->family == AF_INET6)
1135 addrlen = sizeof(struct sockaddr_in6);
1136#endif
Paolo Abenief0da3b2020-09-14 10:01:15 +02001137 ssk->sk_bound_dev_if = loc->ifindex;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001138 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1139 if (err)
1140 goto failed;
1141
1142 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
Geliang Tang2ff0e562020-09-08 10:49:39 +08001143 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
1144 remote_token, local_id, remote_id);
Peter Krystadec3edaa2020-03-27 14:48:40 -07001145 subflow->remote_token = remote_token;
Paolo Abeni6bad9122020-06-30 16:38:26 +02001146 subflow->local_id = local_id;
Geliang Tang2ff0e562020-09-08 10:49:39 +08001147 subflow->remote_id = remote_id;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001148 subflow->request_join = 1;
Paolo Abeni4596a2c2020-09-14 10:01:16 +02001149 subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
Peter Krystadec3edaa2020-03-27 14:48:40 -07001150 mptcp_info2sockaddr(remote, &addr);
1151
Paolo Abeni5b950ff2020-12-09 12:03:29 +01001152 mptcp_add_pending_subflow(msk, subflow);
Peter Krystadec3edaa2020-03-27 14:48:40 -07001153 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1154 if (err && err != -EINPROGRESS)
Paolo Abeni5b950ff2020-12-09 12:03:29 +01001155 goto failed_unlink;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001156
Peter Krystadec3edaa2020-03-27 14:48:40 -07001157 return err;
1158
Paolo Abeni5b950ff2020-12-09 12:03:29 +01001159failed_unlink:
1160 spin_lock_bh(&msk->join_list_lock);
1161 list_del(&subflow->node);
1162 spin_unlock_bh(&msk->join_list_lock);
1163
Peter Krystadec3edaa2020-03-27 14:48:40 -07001164failed:
Paolo Abenie16163b2020-11-16 10:48:09 +01001165 subflow->disposable = 1;
Peter Krystadec3edaa2020-03-27 14:48:40 -07001166 sock_release(sf);
1167 return err;
1168}
1169
Nicolas Rybowski3764b0c2020-12-10 14:24:58 -08001170static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
1171{
1172#ifdef CONFIG_SOCK_CGROUP_DATA
1173 struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
1174 *child_skcd = &child->sk_cgrp_data;
1175
1176 /* only the additional subflows created by kworkers have to be modified */
1177 if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
1178 cgroup_id(sock_cgroup_ptr(child_skcd))) {
1179#ifdef CONFIG_MEMCG
1180 struct mem_cgroup *memcg = parent->sk_memcg;
1181
1182 mem_cgroup_sk_free(child);
1183 if (memcg && css_tryget(&memcg->css))
1184 child->sk_memcg = memcg;
1185#endif /* CONFIG_MEMCG */
1186
1187 cgroup_sk_free(child_skcd);
1188 *child_skcd = *parent_skcd;
1189 cgroup_sk_clone(child_skcd);
1190 }
1191#endif /* CONFIG_SOCK_CGROUP_DATA */
1192}
1193
Peter Krystad2303f992020-01-21 16:56:17 -08001194int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1195{
1196 struct mptcp_subflow_context *subflow;
1197 struct net *net = sock_net(sk);
1198 struct socket *sf;
1199 int err;
1200
Paolo Abeniadf73412020-08-04 18:31:06 +02001201 /* un-accepted server sockets can reach here - on bad configuration
1202 * bail early to avoid greater trouble later
1203 */
1204 if (unlikely(!sk->sk_socket))
1205 return -EINVAL;
1206
Peter Krystadcec37a62020-01-21 16:56:18 -08001207 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1208 &sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001209 if (err)
1210 return err;
1211
1212 lock_sock(sf->sk);
1213
Nicolas Rybowski3764b0c2020-12-10 14:24:58 -08001214 /* the newly created socket has to be in the same cgroup as its parent */
1215 mptcp_attach_cgroup(sk, sf->sk);
1216
Peter Krystad2303f992020-01-21 16:56:17 -08001217 /* kernel sockets do not by default acquire net ref, but TCP timer
1218 * needs it.
1219 */
1220 sf->sk->sk_net_refcnt = 1;
1221 get_net(net);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001222#ifdef CONFIG_PROC_FS
Peter Krystad2303f992020-01-21 16:56:17 -08001223 this_cpu_add(*net->core.sock_inuse, 1);
David S. Millerf6f7d8c2020-01-29 10:39:23 +01001224#endif
Peter Krystad2303f992020-01-21 16:56:17 -08001225 err = tcp_set_ulp(sf->sk, "mptcp");
1226 release_sock(sf->sk);
1227
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001228 if (err) {
1229 sock_release(sf);
Peter Krystad2303f992020-01-21 16:56:17 -08001230 return err;
Wei Yongjunb8ad5402020-06-15 09:35:22 +08001231 }
Peter Krystad2303f992020-01-21 16:56:17 -08001232
Paolo Abeni7d14b0d2020-05-07 18:53:24 +02001233 /* the newly created socket really belongs to the owning MPTCP master
1234 * socket, even if for additional subflows the allocation is performed
1235 * by a kernel workqueue. Adjust inode references, so that the
1236 * procfs/diag interaces really show this one belonging to the correct
1237 * user.
1238 */
1239 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1240 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1241 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1242
Peter Krystad2303f992020-01-21 16:56:17 -08001243 subflow = mptcp_subflow_ctx(sf->sk);
1244 pr_debug("subflow=%p", subflow);
1245
1246 *new_sock = sf;
Peter Krystad79c09492020-01-21 16:56:20 -08001247 sock_hold(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001248 subflow->conn = sk;
1249
1250 return 0;
1251}
1252
1253static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1254 gfp_t priority)
1255{
1256 struct inet_connection_sock *icsk = inet_csk(sk);
1257 struct mptcp_subflow_context *ctx;
1258
1259 ctx = kzalloc(sizeof(*ctx), priority);
1260 if (!ctx)
1261 return NULL;
1262
1263 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001264 INIT_LIST_HEAD(&ctx->node);
Peter Krystad2303f992020-01-21 16:56:17 -08001265
1266 pr_debug("subflow=%p", ctx);
1267
1268 ctx->tcp_sock = sk;
1269
1270 return ctx;
1271}
1272
Mat Martineau648ef4b2020-01-21 16:56:24 -08001273static void __subflow_state_change(struct sock *sk)
1274{
1275 struct socket_wq *wq;
1276
1277 rcu_read_lock();
1278 wq = rcu_dereference(sk->sk_wq);
1279 if (skwq_has_sleeper(wq))
1280 wake_up_interruptible_all(&wq->wait);
1281 rcu_read_unlock();
1282}
1283
1284static bool subflow_is_done(const struct sock *sk)
1285{
1286 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1287}
1288
1289static void subflow_state_change(struct sock *sk)
1290{
1291 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
Paolo Abenidc093db2020-03-13 16:52:42 +01001292 struct sock *parent = subflow->conn;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001293
1294 __subflow_state_change(sk);
1295
Davide Caratti8fd73802020-06-29 22:26:21 +02001296 if (subflow_simultaneous_connect(sk)) {
1297 mptcp_do_fallback(sk);
Florian Westphala6b118f2020-06-30 21:24:45 +02001298 mptcp_rcv_space_init(mptcp_sk(parent), sk);
Davide Caratti8fd73802020-06-29 22:26:21 +02001299 pr_fallback(mptcp_sk(parent));
1300 subflow->conn_finished = 1;
1301 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1302 inet_sk_state_store(parent, TCP_ESTABLISHED);
1303 parent->sk_state_change(parent);
1304 }
1305 }
1306
Mat Martineau648ef4b2020-01-21 16:56:24 -08001307 /* as recvmsg() does not acquire the subflow socket for ssk selection
1308 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1309 * the data available machinery here.
1310 */
Davide Carattie1ff9e82020-06-29 22:26:20 +02001311 if (mptcp_subflow_data_available(sk))
Florian Westphal2e522132020-02-26 10:14:51 +01001312 mptcp_data_ready(parent, sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001313
Mat Martineau067a0b32020-07-28 15:12:07 -07001314 if (__mptcp_check_fallback(mptcp_sk(parent)) &&
Mat Martineau648ef4b2020-01-21 16:56:24 -08001315 !subflow->rx_eof && subflow_is_done(sk)) {
1316 subflow->rx_eof = 1;
Florian Westphal59832e22020-04-02 13:44:52 +02001317 mptcp_subflow_eof(parent);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001318 }
1319}
1320
Peter Krystad2303f992020-01-21 16:56:17 -08001321static int subflow_ulp_init(struct sock *sk)
1322{
Peter Krystadcec37a62020-01-21 16:56:18 -08001323 struct inet_connection_sock *icsk = inet_csk(sk);
Peter Krystad2303f992020-01-21 16:56:17 -08001324 struct mptcp_subflow_context *ctx;
1325 struct tcp_sock *tp = tcp_sk(sk);
1326 int err = 0;
1327
1328 /* disallow attaching ULP to a socket unless it has been
1329 * created with sock_create_kern()
1330 */
1331 if (!sk->sk_kern_sock) {
1332 err = -EOPNOTSUPP;
1333 goto out;
1334 }
1335
1336 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1337 if (!ctx) {
1338 err = -ENOMEM;
1339 goto out;
1340 }
1341
1342 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1343
1344 tp->is_mptcp = 1;
Peter Krystadcec37a62020-01-21 16:56:18 -08001345 ctx->icsk_af_ops = icsk->icsk_af_ops;
1346 icsk->icsk_af_ops = subflow_default_af_ops(sk);
Mat Martineau648ef4b2020-01-21 16:56:24 -08001347 ctx->tcp_data_ready = sk->sk_data_ready;
1348 ctx->tcp_state_change = sk->sk_state_change;
1349 ctx->tcp_write_space = sk->sk_write_space;
1350 sk->sk_data_ready = subflow_data_ready;
1351 sk->sk_write_space = subflow_write_space;
1352 sk->sk_state_change = subflow_state_change;
Peter Krystad2303f992020-01-21 16:56:17 -08001353out:
1354 return err;
1355}
1356
Paolo Abenie16163b2020-11-16 10:48:09 +01001357static void subflow_ulp_release(struct sock *ssk)
Peter Krystad2303f992020-01-21 16:56:17 -08001358{
Paolo Abenie16163b2020-11-16 10:48:09 +01001359 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
1360 bool release = true;
1361 struct sock *sk;
Peter Krystad2303f992020-01-21 16:56:17 -08001362
1363 if (!ctx)
1364 return;
1365
Paolo Abenie16163b2020-11-16 10:48:09 +01001366 sk = ctx->conn;
1367 if (sk) {
1368 /* if the msk has been orphaned, keep the ctx
Paolo Abeni0597d0f2020-12-09 12:03:30 +01001369 * alive, will be freed by __mptcp_close_ssk(),
1370 * when the subflow is still unaccepted
Paolo Abenie16163b2020-11-16 10:48:09 +01001371 */
Paolo Abeni0597d0f2020-12-09 12:03:30 +01001372 release = ctx->disposable || list_empty(&ctx->node);
Paolo Abenie16163b2020-11-16 10:48:09 +01001373 sock_put(sk);
1374 }
Peter Krystad79c09492020-01-21 16:56:20 -08001375
Paolo Abenie16163b2020-11-16 10:48:09 +01001376 if (release)
1377 kfree_rcu(ctx, rcu);
Peter Krystad2303f992020-01-21 16:56:17 -08001378}
1379
Peter Krystadcec37a62020-01-21 16:56:18 -08001380static void subflow_ulp_clone(const struct request_sock *req,
1381 struct sock *newsk,
1382 const gfp_t priority)
1383{
1384 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1385 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1386 struct mptcp_subflow_context *new_ctx;
1387
Peter Krystadf2962342020-03-27 14:48:39 -07001388 if (!tcp_rsk(req)->is_mptcp ||
1389 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001390 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001391 return;
1392 }
1393
1394 new_ctx = subflow_create_ctx(newsk, priority);
Mat Martineauedc7e482020-01-24 16:04:03 -08001395 if (!new_ctx) {
Mat Martineau648ef4b2020-01-21 16:56:24 -08001396 subflow_ulp_fallback(newsk, old_ctx);
Peter Krystadcec37a62020-01-21 16:56:18 -08001397 return;
1398 }
1399
1400 new_ctx->conn_finished = 1;
1401 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
Mat Martineau648ef4b2020-01-21 16:56:24 -08001402 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1403 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1404 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
Paolo Abeni58b09912020-03-13 16:52:41 +01001405 new_ctx->rel_write_seq = 1;
1406 new_ctx->tcp_sock = newsk;
1407
Peter Krystadf2962342020-03-27 14:48:39 -07001408 if (subflow_req->mp_capable) {
1409 /* see comments in subflow_syn_recv_sock(), MPTCP connection
1410 * is fully established only after we receive the remote key
1411 */
1412 new_ctx->mp_capable = 1;
Peter Krystadf2962342020-03-27 14:48:39 -07001413 new_ctx->local_key = subflow_req->local_key;
1414 new_ctx->token = subflow_req->token;
1415 new_ctx->ssn_offset = subflow_req->ssn_offset;
1416 new_ctx->idsn = subflow_req->idsn;
1417 } else if (subflow_req->mp_join) {
Peter Krystadec3edaa2020-03-27 14:48:40 -07001418 new_ctx->ssn_offset = subflow_req->ssn_offset;
Peter Krystadf2962342020-03-27 14:48:39 -07001419 new_ctx->mp_join = 1;
1420 new_ctx->fully_established = 1;
1421 new_ctx->backup = subflow_req->backup;
1422 new_ctx->local_id = subflow_req->local_id;
Geliang Tang2ff0e562020-09-08 10:49:39 +08001423 new_ctx->remote_id = subflow_req->remote_id;
Peter Krystadf2962342020-03-27 14:48:39 -07001424 new_ctx->token = subflow_req->token;
1425 new_ctx->thmac = subflow_req->thmac;
1426 }
Peter Krystadcec37a62020-01-21 16:56:18 -08001427}
1428
Peter Krystad2303f992020-01-21 16:56:17 -08001429static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1430 .name = "mptcp",
1431 .owner = THIS_MODULE,
1432 .init = subflow_ulp_init,
1433 .release = subflow_ulp_release,
Peter Krystadcec37a62020-01-21 16:56:18 -08001434 .clone = subflow_ulp_clone,
Peter Krystad2303f992020-01-21 16:56:17 -08001435};
1436
Peter Krystadcec37a62020-01-21 16:56:18 -08001437static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1438{
1439 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1440 subflow_ops->slab_name = "request_sock_subflow";
1441
1442 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1443 subflow_ops->obj_size, 0,
1444 SLAB_ACCOUNT |
1445 SLAB_TYPESAFE_BY_RCU,
1446 NULL);
1447 if (!subflow_ops->slab)
1448 return -ENOMEM;
1449
Peter Krystad79c09492020-01-21 16:56:20 -08001450 subflow_ops->destructor = subflow_req_destructor;
1451
Peter Krystadcec37a62020-01-21 16:56:18 -08001452 return 0;
1453}
1454
Paolo Abenid39dcec2020-06-26 19:29:59 +02001455void __init mptcp_subflow_init(void)
Peter Krystad2303f992020-01-21 16:56:17 -08001456{
Florian Westphal08b8d082020-07-30 21:25:53 +02001457 mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
1458 if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
Peter Krystadcec37a62020-01-21 16:56:18 -08001459 panic("MPTCP: failed to init subflow request sock ops\n");
1460
1461 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
Florian Westphal7ea851d2020-11-30 16:36:30 +01001462 subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req;
Peter Krystadcec37a62020-01-21 16:56:18 -08001463
1464 subflow_specific = ipv4_specific;
1465 subflow_specific.conn_request = subflow_v4_conn_request;
1466 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1467 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1468
1469#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1470 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
Florian Westphal7ea851d2020-11-30 16:36:30 +01001471 subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
Peter Krystadcec37a62020-01-21 16:56:18 -08001472
1473 subflow_v6_specific = ipv6_specific;
1474 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1475 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1476 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1477
1478 subflow_v6m_specific = subflow_v6_specific;
1479 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1480 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1481 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1482 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1483 subflow_v6m_specific.net_frag_header_len = 0;
1484#endif
1485
Davide Caratti5147dfb2020-03-27 14:48:49 -07001486 mptcp_diag_subflow_init(&subflow_ulp_ops);
1487
Peter Krystad2303f992020-01-21 16:56:17 -08001488 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1489 panic("MPTCP: failed to register subflows to ULP\n");
1490}