blob: 1c7326e04f9bee463500e17ea7c6eb840a31d89f [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * Implementation of the Transmission Control Protocol(TCP).
8 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * IPv4 specific functions
10 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
18
19/*
20 * Changes:
21 * David S. Miller : New socket lookup architecture.
22 * This code is dedicated to John Dyson.
23 * David S. Miller : Change semantics of established hash,
24 * half is devoted to TIME_WAIT sockets
25 * and the rest go in the other half.
26 * Andi Kleen : Add support for syncookies and fixed
27 * some bugs: ip options weren't passed to
28 * the TCP layer, missed a check for an
29 * ACK bit.
30 * Andi Kleen : Implemented fast path mtu discovery.
31 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070032 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 * most of it into the af independent code.
34 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080035 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * Mike McLagan : Routing by source
37 * Juan Jose Ciarlante: ip_dynaddr bits
38 * Andi Kleen: various fixes.
39 * Vitaly E. Lavrov : Transparent proxy revived after year
40 * coma.
41 * Andi Kleen : Fix new listen.
42 * Andi Kleen : Fix accept error reporting.
43 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
44 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
45 * a single port at the same time.
46 */
47
Joe Perchesafd465032012-03-12 07:03:32 +000048#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Herbert Xueb4dea52008-12-29 23:04:08 -080050#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
52#include <linux/fcntl.h>
53#include <linux/module.h>
54#include <linux/random.h>
55#include <linux/cache.h>
56#include <linux/jhash.h>
57#include <linux/init.h>
58#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090059#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020061#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070063#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030065#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/ipv6.h>
67#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080068#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/xfrm.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070070#include <net/secure_seq.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030071#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73#include <linux/inet.h>
74#include <linux/ipv6.h>
75#include <linux/stddef.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
Ivan Delalande67973182017-06-15 18:07:06 -070078#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
Herbert Xucf80e0e2016-01-24 21:20:23 +080080#include <crypto/hash.h>
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080081#include <linux/scatterlist.h>
82
Song Liuc24b14c2017-10-23 09:20:24 -070083#include <trace/events/tcp.h>
84
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080085#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000086static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040087 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080088#endif
89
Eric Dumazet5caea4e2008-11-20 00:40:07 -080090struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000091EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
Eric Dumazet84b114b2017-05-05 06:56:54 -070093static u32 tcp_v4_init_seq(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070094{
Eric Dumazet84b114b2017-05-05 06:56:54 -070095 return secure_tcp_seq(ip_hdr(skb)->daddr,
96 ip_hdr(skb)->saddr,
97 tcp_hdr(skb)->dest,
98 tcp_hdr(skb)->source);
99}
100
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700101static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700102{
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700103 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104}
105
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
107{
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700108 const struct inet_timewait_sock *tw = inet_twsk(sktw);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800109 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110 struct tcp_sock *tp = tcp_sk(sk);
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700111 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
112
113 if (reuse == 2) {
114 /* Still does not detect *everything* that goes through
115 * lo, since we require a loopback src or dst address
116 * or direct binding to 'lo' interface.
117 */
118 bool loopback = false;
119 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
120 loopback = true;
121#if IS_ENABLED(CONFIG_IPV6)
122 if (tw->tw_family == AF_INET6) {
123 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
Eric Dumazetbe2644a2019-10-01 10:49:06 -0700124 ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700125 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
Eric Dumazetbe2644a2019-10-01 10:49:06 -0700126 ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700127 loopback = true;
128 } else
129#endif
130 {
131 if (ipv4_is_loopback(tw->tw_daddr) ||
132 ipv4_is_loopback(tw->tw_rcv_saddr))
133 loopback = true;
134 }
135 if (!loopback)
136 reuse = 0;
137 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800138
139 /* With PAWS, it is safe from the viewpoint
140 of data integrity. Even without PAWS it is safe provided sequence
141 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
142
143 Actually, the idea is close to VJ's one, only timestamp cache is
144 held not per host, but per port pair and TW bucket is used as state
145 holder.
146
147 If TW bucket has been already destroyed we fall back to VJ's scheme
148 and use initial timestamp retrieved from peer table.
149 */
150 if (tcptw->tw_ts_recent_stamp &&
Arnd Bergmanncca9bab2018-07-11 12:16:12 +0200151 (!twp || (reuse && time_after32(ktime_get_seconds(),
152 tcptw->tw_ts_recent_stamp)))) {
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400153 /* In case of repair and re-using TIME-WAIT sockets we still
154 * want to be sure that it is safe as above but honor the
155 * sequence numbers and time stamps set as part of the repair
156 * process.
157 *
158 * Without this check re-using a TIME-WAIT socket with TCP
159 * repair would accumulate a -1 on the repair assigned
160 * sequence number. The first time it is reused the sequence
161 * is -1, the second time -2, etc. This fixes that issue
162 * without appearing to create any others.
163 */
164 if (likely(!tp->repair)) {
Eric Dumazet0f317462019-10-10 20:17:41 -0700165 u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
166
167 if (!seq)
168 seq = 1;
169 WRITE_ONCE(tp->write_seq, seq);
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
172 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800173 sock_hold(sktw);
174 return 1;
175 }
176
177 return 0;
178}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800179EXPORT_SYMBOL_GPL(tcp_twsk_unique);
180
Andrey Ignatovd74bad42018-03-30 15:08:05 -0700181static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
182 int addr_len)
183{
184 /* This check is replicated from tcp_v4_connect() and intended to
185 * prevent BPF program called below from accessing bytes that are out
186 * of the bound specified by user in addr_len.
187 */
188 if (addr_len < sizeof(struct sockaddr_in))
189 return -EINVAL;
190
191 sock_owned_by_me(sk);
192
193 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
194}
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196/* This will initiate an outgoing connection. */
197int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
198{
David S. Miller2d7192d2011-04-26 13:28:44 -0700199 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 struct inet_sock *inet = inet_sk(sk);
201 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800202 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700203 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700204 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700205 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000207 struct ip_options_rcu *inet_opt;
Haishuang Yan1946e672016-12-28 17:52:32 +0800208 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
210 if (addr_len < sizeof(struct sockaddr_in))
211 return -EINVAL;
212
213 if (usin->sin_family != AF_INET)
214 return -EAFNOSUPPORT;
215
216 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000217 inet_opt = rcu_dereference_protected(inet->inet_opt,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200218 lockdep_sock_is_held(sk));
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000219 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 if (!daddr)
221 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000222 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
224
David S. Millerdca8b082011-02-24 13:38:12 -0800225 orig_sport = inet->inet_sport;
226 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700227 fl4 = &inet->cork.fl.u.ip4;
228 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800229 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
230 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200231 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800232 if (IS_ERR(rt)) {
233 err = PTR_ERR(rt);
234 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800235 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800236 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
240 ip_rt_put(rt);
241 return -ENETUNREACH;
242 }
243
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000244 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700245 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000247 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700248 inet->inet_saddr = fl4->saddr;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700249 sk_rcv_saddr_set(sk, inet->inet_saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000251 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 /* Reset inherited state */
253 tp->rx_opt.ts_recent = 0;
254 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000255 if (likely(!tp->repair))
Eric Dumazet0f317462019-10-10 20:17:41 -0700256 WRITE_ONCE(tp->write_seq, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 }
258
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000259 inet->inet_dport = usin->sin_port;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700260 sk_daddr_set(sk, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800262 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000263 if (inet_opt)
264 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000266 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
268 /* Socket identity is still unknown (sport may be zero).
269 * However we set state to SYN-SENT and not releasing socket
270 * lock select source port, enter ourselves into the hash tables and
271 * complete initialization after this.
272 */
273 tcp_set_state(sk, TCP_SYN_SENT);
Haishuang Yan1946e672016-12-28 17:52:32 +0800274 err = inet_hash_connect(tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (err)
276 goto failure;
277
Tom Herbert877d1f62015-07-28 16:02:05 -0700278 sk_set_txhash(sk);
Sathya Perla9e7ceb02014-10-22 21:42:01 +0530279
David S. Millerda905bd2011-05-06 16:11:19 -0700280 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800281 inet->inet_sport, inet->inet_dport, sk);
282 if (IS_ERR(rt)) {
283 err = PTR_ERR(rt);
284 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800286 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700288 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700289 sk_setup_caps(sk, &rt->dst);
Wei Wang19f6d3f2017-01-23 10:59:22 -0800290 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300292 if (likely(!tp->repair)) {
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300293 if (!tp->write_seq)
Eric Dumazet0f317462019-10-10 20:17:41 -0700294 WRITE_ONCE(tp->write_seq,
295 secure_tcp_seq(inet->inet_saddr,
296 inet->inet_daddr,
297 inet->inet_sport,
298 usin->sin_port));
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700299 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
300 inet->inet_saddr,
Eric Dumazet84b114b2017-05-05 06:56:54 -0700301 inet->inet_daddr);
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300302 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303
Eric Dumazeta904a062019-11-01 10:32:19 -0700304 inet->inet_id = prandom_u32();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Wei Wang19f6d3f2017-01-23 10:59:22 -0800306 if (tcp_fastopen_defer_connect(sk, &err))
307 return err;
308 if (err)
309 goto failure;
310
Andrey Vagin2b916472012-11-22 01:13:58 +0000311 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 if (err)
314 goto failure;
315
316 return 0;
317
318failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200319 /*
320 * This unhashes the socket and releases the local port,
321 * if necessary.
322 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 tcp_set_state(sk, TCP_CLOSE);
324 ip_rt_put(rt);
325 sk->sk_route_caps = 0;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000326 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 return err;
328}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000329EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200332 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
333 * It can be called through tcp_release_cb() if socket was owned by user
334 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400336void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800339 struct dst_entry *dst;
340 u32 mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800342 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
343 return;
344 mtu = tcp_sk(sk)->mtu_info;
David S. Miller80d0a692012-07-16 03:28:06 -0700345 dst = inet_csk_update_pmtu(sk, mtu);
346 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 return;
348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 /* Something is about to be wrong... Remember soft error
350 * for the case, if this connection will not able to recover.
351 */
352 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
353 sk->sk_err_soft = EMSGSIZE;
354
355 mtu = dst_mtu(dst);
356
357 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100358 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800359 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 tcp_sync_mss(sk, mtu);
361
362 /* Resend the TCP packet because it's
363 * clear that the old packet has been
364 * dropped. This is the new "fast" path mtu
365 * discovery.
366 */
367 tcp_simple_retransmit(sk);
368 } /* else let the usual retransmit timer handle it */
369}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400370EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
David S. Miller55be7a92012-07-11 21:27:49 -0700372static void do_redirect(struct sk_buff *skb, struct sock *sk)
373{
374 struct dst_entry *dst = __sk_dst_check(sk, 0);
375
David S. Miller1ed5c482012-07-12 00:41:25 -0700376 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700377 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700378}
379
Eric Dumazet26e37362015-03-22 10:22:22 -0700380
381/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
Eric Dumazet9cf74902016-02-02 19:31:12 -0800382void tcp_req_err(struct sock *sk, u32 seq, bool abort)
Eric Dumazet26e37362015-03-22 10:22:22 -0700383{
384 struct request_sock *req = inet_reqsk(sk);
385 struct net *net = sock_net(sk);
386
387 /* ICMPs are not backlogged, hence we cannot get
388 * an established socket here.
389 */
Eric Dumazet26e37362015-03-22 10:22:22 -0700390 if (seq != tcp_rsk(req)->snt_isn) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700391 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Eric Dumazet9cf74902016-02-02 19:31:12 -0800392 } else if (abort) {
Eric Dumazet26e37362015-03-22 10:22:22 -0700393 /*
394 * Still in SYN_RECV, just remove it silently.
395 * There is no good way to pass the error to the newly
396 * created socket, and POSIX does not want network
397 * errors returned from accept().
398 */
Fan Duc6973662015-03-23 15:00:41 -0700399 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
Eric Dumazet9caad862016-04-01 08:52:20 -0700400 tcp_listendrop(req->rsk_listener);
Eric Dumazet26e37362015-03-22 10:22:22 -0700401 }
Eric Dumazetef84d8c2015-10-14 11:16:26 -0700402 reqsk_put(req);
Eric Dumazet26e37362015-03-22 10:22:22 -0700403}
404EXPORT_SYMBOL(tcp_req_err);
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406/*
407 * This routine is called by the ICMP module when it gets some
408 * sort of error condition. If err < 0 then the socket should
409 * be closed and the error returned to the user. If err > 0
410 * it's just the icmp type << 8 | icmp code. After adjustment
411 * header points to the first 8 bytes of the tcp header. We need
412 * to find the appropriate port.
413 *
414 * The locking strategy used here is very "optimistic". When
415 * someone else accesses the socket the ICMP is just dropped
416 * and for some paths there is no check at all.
417 * A more general error queue to queue errors for later handling
418 * is probably better.
419 *
420 */
421
Stefano Brivio32bbd872018-11-08 12:19:21 +0100422int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000424 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000425 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000426 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 struct tcp_sock *tp;
428 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000429 const int type = icmp_hdr(icmp_skb)->type;
430 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000432 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700433 struct request_sock *fastopen;
Eric Dumazet9a568de2017-05-16 14:00:14 -0700434 u32 seq, snd_una;
435 s32 remaining;
436 u32 delta_us;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000438 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
Eric Dumazet26e37362015-03-22 10:22:22 -0700440 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
441 th->dest, iph->saddr, ntohs(th->source),
David Ahern3fa6f612017-08-07 08:44:17 -0700442 inet_iif(icmp_skb), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 if (!sk) {
Eric Dumazet5d3848b2016-04-27 16:44:29 -0700444 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100445 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
447 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700448 inet_twsk_put(inet_twsk(sk));
Stefano Brivio32bbd872018-11-08 12:19:21 +0100449 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 }
Eric Dumazet26e37362015-03-22 10:22:22 -0700451 seq = ntohl(th->seq);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100452 if (sk->sk_state == TCP_NEW_SYN_RECV) {
453 tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
454 type == ICMP_TIME_EXCEEDED ||
455 (type == ICMP_DEST_UNREACH &&
456 (code == ICMP_NET_UNREACH ||
457 code == ICMP_HOST_UNREACH)));
458 return 0;
459 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
461 bh_lock_sock(sk);
462 /* If too many ICMPs get dropped on busy
463 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200464 * We do take care of PMTU discovery (RFC1191) special case :
465 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000467 if (sock_owned_by_user(sk)) {
468 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700469 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
Eric Dumazetb74aa932013-01-19 16:10:37 +0000470 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 if (sk->sk_state == TCP_CLOSE)
472 goto out;
473
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000474 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700475 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000476 goto out;
477 }
478
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000479 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 tp = tcp_sk(sk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700481 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
Eric Dumazetd983ea62019-10-10 20:17:38 -0700482 fastopen = rcu_dereference(tp->fastopen_rsk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700483 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700485 !between(seq, snd_una, tp->snd_nxt)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700486 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 goto out;
488 }
489
490 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700491 case ICMP_REDIRECT:
Jon Maxwell45caeaa2017-03-10 16:40:33 +1100492 if (!sock_owned_by_user(sk))
493 do_redirect(icmp_skb, sk);
David S. Miller55be7a92012-07-11 21:27:49 -0700494 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 case ICMP_SOURCE_QUENCH:
496 /* Just silently ignore these. */
497 goto out;
498 case ICMP_PARAMETERPROB:
499 err = EPROTO;
500 break;
501 case ICMP_DEST_UNREACH:
502 if (code > NR_ICMP_UNREACH)
503 goto out;
504
505 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000506 /* We are not interested in TCP_LISTEN and open_requests
507 * (SYN-ACKs send out by Linux are always <576bytes so
508 * they should go through unfragmented).
509 */
510 if (sk->sk_state == TCP_LISTEN)
511 goto out;
512
Eric Dumazet563d34d2012-07-23 09:48:52 +0200513 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000514 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200515 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000516 } else {
Eric Dumazet7aa54702016-12-03 11:14:57 -0800517 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
Eric Dumazet144d56e2012-08-20 00:22:46 +0000518 sock_hold(sk);
519 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 goto out;
521 }
522
523 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000524 /* check if icmp_skb allows revert of backoff
525 * (see draft-zimmermann-tcp-lcd) */
526 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
527 break;
528 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700529 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000530 break;
531
David S. Miller8f49c272010-11-12 13:35:00 -0800532 if (sock_owned_by_user(sk))
533 break;
534
Eric Dumazet2c4cc972019-02-15 13:36:21 -0800535 skb = tcp_rtx_queue_head(sk);
536 if (WARN_ON_ONCE(!skb))
537 break;
538
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000539 icsk->icsk_backoff--;
Eric Dumazetfcdd1cf2014-09-22 13:19:44 -0700540 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
541 TCP_TIMEOUT_INIT;
542 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000543
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000544
Eric Dumazet9a568de2017-05-16 14:00:14 -0700545 tcp_mstamp_refresh(tp);
Eric Dumazet2fd66ff2018-09-21 08:51:47 -0700546 delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700547 remaining = icsk->icsk_rto -
Eric Dumazet9a568de2017-05-16 14:00:14 -0700548 usecs_to_jiffies(delta_us);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000549
Eric Dumazet9a568de2017-05-16 14:00:14 -0700550 if (remaining > 0) {
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000551 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
552 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000553 } else {
554 /* RTO revert clocked out retransmission.
555 * Will retransmit now */
556 tcp_retransmit_timer(sk);
557 }
558
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 break;
560 case ICMP_TIME_EXCEEDED:
561 err = EHOSTUNREACH;
562 break;
563 default:
564 goto out;
565 }
566
567 switch (sk->sk_state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700569 case TCP_SYN_RECV:
570 /* Only in fast or simultaneous open. If a fast open socket is
571 * is already accepted it is treated as a connected one below.
572 */
Ian Morris51456b22015-04-03 09:17:26 +0100573 if (fastopen && !fastopen->sk)
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700574 break;
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 sk->sk_err = err;
578
579 sk->sk_error_report(sk);
580
581 tcp_done(sk);
582 } else {
583 sk->sk_err_soft = err;
584 }
585 goto out;
586 }
587
588 /* If we've already connected we will keep trying
589 * until we time out, or the user gives up.
590 *
591 * rfc1122 4.2.3.9 allows to consider as hard errors
592 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
593 * but it is obsoleted by pmtu discovery).
594 *
595 * Note, that in modern internet, where routing is unreliable
596 * and in each dark corner broken firewalls sit, sending random
597 * errors ordered by their masters even this two messages finally lose
598 * their original sense (even Linux sends invalid PORT_UNREACHs)
599 *
600 * Now we are in compliance with RFCs.
601 * --ANK (980905)
602 */
603
604 inet = inet_sk(sk);
605 if (!sock_owned_by_user(sk) && inet->recverr) {
606 sk->sk_err = err;
607 sk->sk_error_report(sk);
608 } else { /* Only an error on timeout */
609 sk->sk_err_soft = err;
610 }
611
612out:
613 bh_unlock_sock(sk);
614 sock_put(sk);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100615 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616}
617
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000618void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700620 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
Eric Dumazet98be9b12018-02-19 11:56:52 -0800622 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
623 skb->csum_start = skb_transport_header(skb) - skb->head;
624 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625}
626
Herbert Xu419f9f82010-04-11 02:15:53 +0000627/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000628void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000629{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400630 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000631
632 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
633}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000634EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636/*
637 * This routine will send an RST to the other tcp.
638 *
639 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
640 * for reset.
641 * Answer: if a packet caused RST, it is not for a socket
642 * existing in our system, if it is matched to a socket,
643 * it is just duplicate segment or bug in other side's TCP.
644 * So that we build reply only basing on parameters
645 * arrived with segment.
646 * Exception: precedence violation. We do not implement it in any case.
647 */
648
Eric Dumazeta00e7442015-09-29 07:42:39 -0700649static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400651 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800652 struct {
653 struct tcphdr th;
654#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800655 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800656#endif
657 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800659#ifdef CONFIG_TCP_MD5SIG
Florian Westphale46787f2015-12-21 21:29:25 +0100660 struct tcp_md5sig_key *key = NULL;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000661 const __u8 *hash_location = NULL;
662 unsigned char newhash[16];
663 int genhash;
664 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800665#endif
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700666 u64 transmit_time = 0;
Jon Maxwell00483692018-05-10 16:53:51 +1000667 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700668 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 /* Never send a reset in response to a reset. */
671 if (th->rst)
672 return;
673
Eric Dumazetc3658e82014-11-25 07:40:04 -0800674 /* If sk not NULL, it means we did a successful lookup and incoming
675 * route had to be correct. prequeue might have dropped our dst.
676 */
677 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 return;
679
680 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800681 memset(&rep, 0, sizeof(rep));
682 rep.th.dest = th->source;
683 rep.th.source = th->dest;
684 rep.th.doff = sizeof(struct tcphdr) / 4;
685 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800688 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800690 rep.th.ack = 1;
691 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
692 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 }
694
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200695 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800696 arg.iov[0].iov_base = (unsigned char *)&rep;
697 arg.iov[0].iov_len = sizeof(rep.th);
698
Eric Dumazet0f85fea2014-12-09 09:56:08 -0800699 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800700#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700701 rcu_read_lock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000702 hash_location = tcp_parse_md5sig_option(th);
Florian Westphal271c3b92015-12-21 21:29:26 +0100703 if (sk && sk_fullsock(sk)) {
Florian Westphale46787f2015-12-21 21:29:25 +0100704 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
705 &ip_hdr(skb)->saddr, AF_INET);
706 } else if (hash_location) {
Shawn Lu658ddaa2012-01-31 22:35:48 +0000707 /*
708 * active side is lost. Try to find listening socket through
709 * source port, and then find md5 key through listening socket.
710 * we are not loose security here:
711 * Incoming packet is checked with md5 hash with finding key,
712 * no RST generated if md5 hash doesn't match.
713 */
Craig Galleka5836362016-02-10 11:50:38 -0500714 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
715 ip_hdr(skb)->saddr,
Tom Herbertda5e3632013-01-22 09:50:24 +0000716 th->source, ip_hdr(skb)->daddr,
David Ahern3fa6f612017-08-07 08:44:17 -0700717 ntohs(th->source), inet_iif(skb),
718 tcp_v4_sdif(skb));
Shawn Lu658ddaa2012-01-31 22:35:48 +0000719 /* don't send rst if it can't find key */
720 if (!sk1)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700721 goto out;
722
Shawn Lu658ddaa2012-01-31 22:35:48 +0000723 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
724 &ip_hdr(skb)->saddr, AF_INET);
725 if (!key)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700726 goto out;
727
Shawn Lu658ddaa2012-01-31 22:35:48 +0000728
Eric Dumazet39f8e582015-03-24 15:58:55 -0700729 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000730 if (genhash || memcmp(hash_location, newhash, 16) != 0)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700731 goto out;
732
Shawn Lu658ddaa2012-01-31 22:35:48 +0000733 }
734
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800735 if (key) {
736 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
737 (TCPOPT_NOP << 16) |
738 (TCPOPT_MD5SIG << 8) |
739 TCPOLEN_MD5SIG);
740 /* Update length and the length the header thinks exists */
741 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
742 rep.th.doff = arg.iov[0].iov_len / 4;
743
Adam Langley49a72df2008-07-19 00:01:42 -0700744 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700745 key, ip_hdr(skb)->saddr,
746 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800747 }
748#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700749 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
750 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700751 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
Florian Westphal271c3b92015-12-21 21:29:26 +0100753 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
754
Shawn Lue2446ea2012-02-04 12:38:09 +0000755 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000756 * routing might fail in this case. No choice here, if we choose to force
757 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000758 */
Song Liuc24b14c2017-10-23 09:20:24 -0700759 if (sk) {
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000760 arg.bound_dev_if = sk->sk_bound_dev_if;
Song Liu5c487bb2018-02-06 20:50:23 -0800761 if (sk_fullsock(sk))
762 trace_tcp_send_reset(sk, skb);
Song Liuc24b14c2017-10-23 09:20:24 -0700763 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Florian Westphal271c3b92015-12-21 21:29:26 +0100765 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
766 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
767
Eric Dumazet66b13d92011-10-24 03:06:21 -0400768 arg.tos = ip_hdr(skb)->tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900769 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700770 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700771 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700772 if (sk) {
Jon Maxwell00483692018-05-10 16:53:51 +1000773 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
774 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetf6c0f5d2019-09-24 08:01:16 -0700775 ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
776 inet_twsk(sk)->tw_priority : sk->sk_priority;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700777 transmit_time = tcp_transmit_time(sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700778 }
Jon Maxwell00483692018-05-10 16:53:51 +1000779 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800780 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700781 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700782 &arg, arg.iov[0].iov_len,
783 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784
Jon Maxwell00483692018-05-10 16:53:51 +1000785 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700786 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
787 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700788 local_bh_enable();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000789
790#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700791out:
792 rcu_read_unlock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000793#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794}
795
796/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
797 outside socket context is ugly, certainly. What can I do?
798 */
799
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900800static void tcp_v4_send_ack(const struct sock *sk,
Eric Dumazete62a1232016-01-21 08:02:54 -0800801 struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000802 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700803 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400804 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400806 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 struct {
808 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800809 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800810#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800811 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800812#endif
813 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 } rep;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900815 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 struct ip_reply_arg arg;
Jon Maxwell00483692018-05-10 16:53:51 +1000817 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700818 u64 transmit_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
820 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200821 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822
823 arg.iov[0].iov_base = (unsigned char *)&rep;
824 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000825 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800826 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
827 (TCPOPT_TIMESTAMP << 8) |
828 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000829 rep.opt[1] = htonl(tsval);
830 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800831 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 }
833
834 /* Swap the send and the receive. */
835 rep.th.dest = th->source;
836 rep.th.source = th->dest;
837 rep.th.doff = arg.iov[0].iov_len / 4;
838 rep.th.seq = htonl(seq);
839 rep.th.ack_seq = htonl(ack);
840 rep.th.ack = 1;
841 rep.th.window = htons(win);
842
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800843#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800844 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000845 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800846
847 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
848 (TCPOPT_NOP << 16) |
849 (TCPOPT_MD5SIG << 8) |
850 TCPOLEN_MD5SIG);
851 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
852 rep.th.doff = arg.iov[0].iov_len/4;
853
Adam Langley49a72df2008-07-19 00:01:42 -0700854 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700855 key, ip_hdr(skb)->saddr,
856 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800857 }
858#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700859 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700860 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
861 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 arg.iov[0].iov_len, IPPROTO_TCP, 0);
863 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900864 if (oif)
865 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400866 arg.tos = tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900867 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700868 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700869 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700870 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
871 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetf6c0f5d2019-09-24 08:01:16 -0700872 ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
873 inet_twsk(sk)->tw_priority : sk->sk_priority;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700874 transmit_time = tcp_transmit_time(sk);
Jon Maxwell00483692018-05-10 16:53:51 +1000875 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800876 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700877 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700878 &arg, arg.iov[0].iov_len,
879 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
Jon Maxwell00483692018-05-10 16:53:51 +1000881 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700882 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700883 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884}
885
886static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
887{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700888 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800889 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900891 tcp_v4_send_ack(sk, skb,
Eric Dumazete62a1232016-01-21 08:02:54 -0800892 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200893 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700894 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900895 tcptw->tw_ts_recent,
896 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700897 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400898 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
899 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900900 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700902 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903}
904
Eric Dumazeta00e7442015-09-29 07:42:39 -0700905static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200906 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907{
Jerry Chu168a8f52012-08-31 12:29:13 +0000908 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
909 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
910 */
Eric Dumazete62a1232016-01-21 08:02:54 -0800911 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
912 tcp_sk(sk)->snd_nxt;
913
Eric Dumazet20a2b492016-08-22 11:31:10 -0700914 /* RFC 7323 2.3
915 * The window field (SEG.WND) of every outgoing segment, with the
916 * exception of <SYN> segments, MUST be right-shifted by
917 * Rcv.Wind.Shift bits:
918 */
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900919 tcp_v4_send_ack(sk, skb, seq,
Eric Dumazet20a2b492016-08-22 11:31:10 -0700920 tcp_rsk(req)->rcv_nxt,
921 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700922 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900923 req->ts_recent,
924 0,
Christoph Paasch30791ac2017-12-11 00:05:46 -0800925 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000926 AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400927 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
928 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929}
930
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800932 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700933 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 * socket.
935 */
Eric Dumazet0f935db2015-09-25 07:39:21 -0700936static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300937 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800938 struct request_sock *req,
Eric Dumazetca6fb062015-10-02 11:43:35 -0700939 struct tcp_fastopen_cookie *foc,
Eric Dumazetb3d05142016-04-13 22:05:39 -0700940 enum tcp_synack_type synack_type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700942 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400943 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800945 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
947 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700948 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800949 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Eric Dumazetb3d05142016-04-13 22:05:39 -0700951 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700954 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700956 rcu_read_lock();
Eric Dumazet634fb9792013-10-09 15:21:29 -0700957 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
958 ireq->ir_rmt_addr,
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700959 rcu_dereference(ireq->ireq_opt));
960 rcu_read_unlock();
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200961 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 }
963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return err;
965}
966
967/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700968 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700970static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971{
Eric Dumazetc92e8c02017-10-20 09:04:13 -0700972 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973}
974
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800975#ifdef CONFIG_TCP_MD5SIG
976/*
977 * RFC2385 MD5 checksumming requires a mapping of
978 * IP address->MD5 Key.
979 * We need to maintain these in the sk structure.
980 */
981
Eric Dumazet921f9a02019-02-26 09:49:11 -0800982DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
Eric Dumazet6015c712018-11-27 15:03:21 -0800983EXPORT_SYMBOL(tcp_md5_needed);
984
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800985/* Find the Key structure for an address. */
Eric Dumazet6015c712018-11-27 15:03:21 -0800986struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
987 const union tcp_md5_addr *addr,
988 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800989{
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700990 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000991 struct tcp_md5sig_key *key;
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700992 const struct tcp_md5sig_info *md5sig;
Ivan Delalande67973182017-06-15 18:07:06 -0700993 __be32 mask;
994 struct tcp_md5sig_key *best_match = NULL;
995 bool match;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800996
Eric Dumazeta8afca02012-01-31 18:45:40 +0000997 /* caller either holds rcu_read_lock() or socket lock */
998 md5sig = rcu_dereference_check(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200999 lockdep_sock_is_held(sk));
Eric Dumazeta8afca02012-01-31 18:45:40 +00001000 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001001 return NULL;
Arnd Bergmann083a0322017-06-20 22:11:21 +02001002
Sasha Levinb67bfe02013-02-27 17:06:00 -08001003 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001004 if (key->family != family)
1005 continue;
Ivan Delalande67973182017-06-15 18:07:06 -07001006
1007 if (family == AF_INET) {
1008 mask = inet_make_mask(key->prefixlen);
1009 match = (key->addr.a4.s_addr & mask) ==
1010 (addr->a4.s_addr & mask);
1011#if IS_ENABLED(CONFIG_IPV6)
1012 } else if (family == AF_INET6) {
1013 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1014 key->prefixlen);
1015#endif
1016 } else {
1017 match = false;
1018 }
1019
1020 if (match && (!best_match ||
1021 key->prefixlen > best_match->prefixlen))
1022 best_match = key;
1023 }
1024 return best_match;
1025}
Eric Dumazet6015c712018-11-27 15:03:21 -08001026EXPORT_SYMBOL(__tcp_md5_do_lookup);
Ivan Delalande67973182017-06-15 18:07:06 -07001027
Wu Fengguange8f37d52017-07-06 07:58:53 +08001028static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1029 const union tcp_md5_addr *addr,
1030 int family, u8 prefixlen)
Ivan Delalande67973182017-06-15 18:07:06 -07001031{
1032 const struct tcp_sock *tp = tcp_sk(sk);
1033 struct tcp_md5sig_key *key;
1034 unsigned int size = sizeof(struct in_addr);
1035 const struct tcp_md5sig_info *md5sig;
1036
1037 /* caller either holds rcu_read_lock() or socket lock */
1038 md5sig = rcu_dereference_check(tp->md5sig_info,
1039 lockdep_sock_is_held(sk));
1040 if (!md5sig)
1041 return NULL;
1042#if IS_ENABLED(CONFIG_IPV6)
1043 if (family == AF_INET6)
1044 size = sizeof(struct in6_addr);
1045#endif
1046 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1047 if (key->family != family)
1048 continue;
1049 if (!memcmp(&key->addr, addr, size) &&
1050 key->prefixlen == prefixlen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001051 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001052 }
1053 return NULL;
1054}
1055
Eric Dumazetb83e3de2015-09-25 07:39:15 -07001056struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001057 const struct sock *addr_sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001058{
Eric Dumazetb52e6922015-04-09 14:36:42 -07001059 const union tcp_md5_addr *addr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001060
Eric Dumazetb52e6922015-04-09 14:36:42 -07001061 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001062 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001063}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001064EXPORT_SYMBOL(tcp_v4_md5_lookup);
1065
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001066/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001067int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
Ivan Delalande67973182017-06-15 18:07:06 -07001068 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1069 gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001070{
1071 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001072 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001073 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001074 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001075
Ivan Delalande67973182017-06-15 18:07:06 -07001076 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001077 if (key) {
1078 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001079 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001080 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001081 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001082 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001083
Eric Dumazeta8afca02012-01-31 18:45:40 +00001084 md5sig = rcu_dereference_protected(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +02001085 lockdep_sock_is_held(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001086 if (!md5sig) {
1087 md5sig = kmalloc(sizeof(*md5sig), gfp);
1088 if (!md5sig)
1089 return -ENOMEM;
1090
1091 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1092 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001093 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001094 }
1095
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001096 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001097 if (!key)
1098 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +00001099 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001100 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001101 return -ENOMEM;
1102 }
1103
1104 memcpy(key->key, newkey, newkeylen);
1105 key->keylen = newkeylen;
1106 key->family = family;
Ivan Delalande67973182017-06-15 18:07:06 -07001107 key->prefixlen = prefixlen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001108 memcpy(&key->addr, addr,
1109 (family == AF_INET6) ? sizeof(struct in6_addr) :
1110 sizeof(struct in_addr));
1111 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001112 return 0;
1113}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001114EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001115
Ivan Delalande67973182017-06-15 18:07:06 -07001116int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1117 u8 prefixlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001118{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001119 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001120
Ivan Delalande67973182017-06-15 18:07:06 -07001121 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001122 if (!key)
1123 return -ENOENT;
1124 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001125 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001126 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001127 return 0;
1128}
1129EXPORT_SYMBOL(tcp_md5_do_del);
1130
stephen hemmingere0683e702012-10-26 14:31:40 +00001131static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001132{
1133 struct tcp_sock *tp = tcp_sk(sk);
1134 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001135 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001136 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001137
Eric Dumazeta8afca02012-01-31 18:45:40 +00001138 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1139
Sasha Levinb67bfe02013-02-27 17:06:00 -08001140 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001141 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001142 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001143 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001144 }
1145}
1146
Ivan Delalande8917a772017-06-15 18:07:07 -07001147static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1148 char __user *optval, int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001149{
1150 struct tcp_md5sig cmd;
1151 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
Ivan Delalande8917a772017-06-15 18:07:07 -07001152 u8 prefixlen = 32;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001153
1154 if (optlen < sizeof(cmd))
1155 return -EINVAL;
1156
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001157 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001158 return -EFAULT;
1159
1160 if (sin->sin_family != AF_INET)
1161 return -EINVAL;
1162
Ivan Delalande8917a772017-06-15 18:07:07 -07001163 if (optname == TCP_MD5SIG_EXT &&
1164 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1165 prefixlen = cmd.tcpm_prefixlen;
1166 if (prefixlen > 32)
1167 return -EINVAL;
1168 }
1169
Dmitry Popov64a124e2014-08-03 22:45:19 +04001170 if (!cmd.tcpm_keylen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001171 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001172 AF_INET, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001173
1174 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1175 return -EINVAL;
1176
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001177 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001178 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001179 GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001180}
1181
Eric Dumazet19689e32016-06-27 18:51:53 +02001182static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1183 __be32 daddr, __be32 saddr,
1184 const struct tcphdr *th, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001185{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001186 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001187 struct scatterlist sg;
Eric Dumazet19689e32016-06-27 18:51:53 +02001188 struct tcphdr *_th;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001189
Eric Dumazet19689e32016-06-27 18:51:53 +02001190 bp = hp->scratch;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001191 bp->saddr = saddr;
1192 bp->daddr = daddr;
1193 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001194 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001195 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001196
Eric Dumazet19689e32016-06-27 18:51:53 +02001197 _th = (struct tcphdr *)(bp + 1);
1198 memcpy(_th, th, sizeof(*th));
1199 _th->check = 0;
1200
1201 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1202 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1203 sizeof(*bp) + sizeof(*th));
Herbert Xucf80e0e2016-01-24 21:20:23 +08001204 return crypto_ahash_update(hp->md5_req);
Adam Langley49a72df2008-07-19 00:01:42 -07001205}
1206
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001207static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001208 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001209{
1210 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001211 struct ahash_request *req;
Adam Langley49a72df2008-07-19 00:01:42 -07001212
1213 hp = tcp_get_md5sig_pool();
1214 if (!hp)
1215 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001216 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001217
Herbert Xucf80e0e2016-01-24 21:20:23 +08001218 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001219 goto clear_hash;
Eric Dumazet19689e32016-06-27 18:51:53 +02001220 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
Adam Langley49a72df2008-07-19 00:01:42 -07001221 goto clear_hash;
1222 if (tcp_md5_hash_key(hp, key))
1223 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001224 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1225 if (crypto_ahash_final(req))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001226 goto clear_hash;
1227
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001228 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001229 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001230
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001231clear_hash:
1232 tcp_put_md5sig_pool();
1233clear_hash_noput:
1234 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001235 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236}
1237
Eric Dumazet39f8e582015-03-24 15:58:55 -07001238int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1239 const struct sock *sk,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001240 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001241{
Adam Langley49a72df2008-07-19 00:01:42 -07001242 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001243 struct ahash_request *req;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001244 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001245 __be32 saddr, daddr;
1246
Eric Dumazet39f8e582015-03-24 15:58:55 -07001247 if (sk) { /* valid for establish/request sockets */
1248 saddr = sk->sk_rcv_saddr;
1249 daddr = sk->sk_daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001250 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001251 const struct iphdr *iph = ip_hdr(skb);
1252 saddr = iph->saddr;
1253 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001254 }
Adam Langley49a72df2008-07-19 00:01:42 -07001255
1256 hp = tcp_get_md5sig_pool();
1257 if (!hp)
1258 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001259 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001260
Herbert Xucf80e0e2016-01-24 21:20:23 +08001261 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001262 goto clear_hash;
1263
Eric Dumazet19689e32016-06-27 18:51:53 +02001264 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
Adam Langley49a72df2008-07-19 00:01:42 -07001265 goto clear_hash;
1266 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1267 goto clear_hash;
1268 if (tcp_md5_hash_key(hp, key))
1269 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001270 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1271 if (crypto_ahash_final(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001272 goto clear_hash;
1273
1274 tcp_put_md5sig_pool();
1275 return 0;
1276
1277clear_hash:
1278 tcp_put_md5sig_pool();
1279clear_hash_noput:
1280 memset(md5_hash, 0, 16);
1281 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001282}
Adam Langley49a72df2008-07-19 00:01:42 -07001283EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001284
Eric Dumazetba8e2752015-10-02 11:43:28 -07001285#endif
1286
Eric Dumazetff74e232015-03-24 15:58:54 -07001287/* Called with rcu_read_lock() */
Eric Dumazetba8e2752015-10-02 11:43:28 -07001288static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
Eric Dumazetff74e232015-03-24 15:58:54 -07001289 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001290{
Eric Dumazetba8e2752015-10-02 11:43:28 -07001291#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001292 /*
1293 * This gets called for each TCP segment that arrives
1294 * so we want to be efficient.
1295 * We have 3 drop cases:
1296 * o No MD5 hash and one expected.
1297 * o MD5 hash and we're not expecting one.
1298 * o MD5 hash and its wrong.
1299 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001300 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001301 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001302 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001303 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001304 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001305 unsigned char newhash[16];
1306
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001307 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1308 AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001309 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001310
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001311 /* We've parsed the options - do we have a hash? */
1312 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001313 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001314
1315 if (hash_expected && !hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001316 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001317 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001318 }
1319
1320 if (!hash_expected && hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001321 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001322 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001323 }
1324
1325 /* Okay, so this is hash_expected and hash_location -
1326 * so we need to calculate the checksum.
1327 */
Adam Langley49a72df2008-07-19 00:01:42 -07001328 genhash = tcp_v4_md5_hash_skb(newhash,
1329 hash_expected,
Eric Dumazet39f8e582015-03-24 15:58:55 -07001330 NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001331
1332 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Eric Dumazet72145a62016-08-24 09:01:23 -07001333 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
Joe Perchese87cc472012-05-13 21:56:26 +00001334 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1335 &iph->saddr, ntohs(th->source),
1336 &iph->daddr, ntohs(th->dest),
1337 genhash ? " tcp_v4_calc_md5_hash failed"
1338 : "");
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001339 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001340 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001341 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001342#endif
Eric Dumazetba8e2752015-10-02 11:43:28 -07001343 return false;
1344}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001345
Eric Dumazetb40cf182015-09-25 07:39:08 -07001346static void tcp_v4_init_req(struct request_sock *req,
1347 const struct sock *sk_listener,
Octavian Purdila16bea702014-06-25 17:09:53 +03001348 struct sk_buff *skb)
1349{
1350 struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001351 struct net *net = sock_net(sk_listener);
Octavian Purdila16bea702014-06-25 17:09:53 +03001352
Eric Dumazet08d2cc3b2015-03-18 14:05:38 -07001353 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1354 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001355 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
Octavian Purdila16bea702014-06-25 17:09:53 +03001356}
1357
Eric Dumazetf9646292015-09-29 07:42:50 -07001358static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1359 struct flowi *fl,
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001360 const struct request_sock *req)
Octavian Purdilad94e0412014-06-25 17:09:55 +03001361{
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001362 return inet_csk_route_req(sk, &fl->u.ip4, req);
Octavian Purdilad94e0412014-06-25 17:09:55 +03001363}
1364
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001365struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001367 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001368 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001369 .send_ack = tcp_v4_reqsk_send_ack,
1370 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001372 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373};
1374
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00001375static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001376 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001377#ifdef CONFIG_TCP_MD5SIG
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001378 .req_md5_lookup = tcp_v4_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001379 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001380#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001381 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001382#ifdef CONFIG_SYN_COOKIES
1383 .cookie_init_seq = cookie_v4_init_sequence,
1384#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001385 .route_req = tcp_v4_route_req,
Eric Dumazet84b114b2017-05-05 06:56:54 -07001386 .init_seq = tcp_v4_init_seq,
1387 .init_ts_off = tcp_v4_init_ts_off,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001388 .send_synack = tcp_v4_send_synack,
Octavian Purdila16bea702014-06-25 17:09:53 +03001389};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001390
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1392{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001394 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 goto drop;
1396
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001397 return tcp_conn_request(&tcp_request_sock_ops,
1398 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400drop:
Eric Dumazet9caad862016-04-01 08:52:20 -07001401 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 return 0;
1403}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001404EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406
1407/*
1408 * The three way handshake has completed - we got a valid synack -
1409 * now create the new socket.
1410 */
Eric Dumazet0c271712015-09-29 07:42:48 -07001411struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001412 struct request_sock *req,
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001413 struct dst_entry *dst,
1414 struct request_sock *req_unhash,
1415 bool *own_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001417 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 struct inet_sock *newinet;
1419 struct tcp_sock *newtp;
1420 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001421#ifdef CONFIG_TCP_MD5SIG
1422 struct tcp_md5sig_key *key;
1423#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001424 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425
1426 if (sk_acceptq_is_full(sk))
1427 goto exit_overflow;
1428
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 newsk = tcp_create_openreq_child(sk, req, skb);
1430 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001431 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432
Herbert Xubcd76112006-06-30 13:36:35 -07001433 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001434 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
1436 newtp = tcp_sk(newsk);
1437 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001438 ireq = inet_rsk(req);
Eric Dumazetd1e559d2015-03-18 14:05:35 -07001439 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1440 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
David Ahern6dd9a142015-12-16 13:20:44 -08001441 newsk->sk_bound_dev_if = ireq->ir_iif;
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001442 newinet->inet_saddr = ireq->ir_loc_addr;
1443 inet_opt = rcu_dereference(ireq->ireq_opt);
1444 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001445 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001446 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001447 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001448 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001449 if (inet_opt)
1450 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazeta904a062019-11-01 10:32:19 -07001451 newinet->inet_id = prandom_u32();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001453 if (!dst) {
1454 dst = inet_csk_route_child_sock(sk, newsk, req);
1455 if (!dst)
1456 goto put_and_exit;
1457 } else {
1458 /* syncookie case : see end of cookie_v4_check() */
1459 }
David S. Miller0e734412011-05-08 15:28:03 -07001460 sk_setup_caps(newsk, dst);
1461
Daniel Borkmann81164412015-01-05 23:57:48 +01001462 tcp_ca_openreq_child(newsk, dst);
1463
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 tcp_sync_mss(newsk, dst_mtu(dst));
Eric Dumazet3541f9e2017-02-02 08:04:56 -08001465 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001466
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 tcp_initialize_rcv_mss(newsk);
1468
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001469#ifdef CONFIG_TCP_MD5SIG
1470 /* Copy over the MD5 key from the original socket */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001471 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1472 AF_INET);
Ian Morris00db4122015-04-03 09:17:27 +01001473 if (key) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001474 /*
1475 * We're using one, so create a matching key
1476 * on the newsk structure. If we fail to get
1477 * memory, then we end up not copying the key
1478 * across. Shucks.
1479 */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001480 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
Ivan Delalande67973182017-06-15 18:07:06 -07001481 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001482 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001483 }
1484#endif
1485
David S. Miller0e734412011-05-08 15:28:03 -07001486 if (__inet_inherit_port(sk, newsk) < 0)
1487 goto put_and_exit;
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001488 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001489 if (likely(*own_req)) {
Eric Dumazet49a496c2015-11-05 12:50:19 -08001490 tcp_move_syn(newtp, req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001491 ireq->ireq_opt = NULL;
1492 } else {
1493 newinet->inet_opt = NULL;
1494 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 return newsk;
1496
1497exit_overflow:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001498 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001499exit_nonewsk:
1500 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501exit:
Eric Dumazet9caad862016-04-01 08:52:20 -07001502 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001504put_and_exit:
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001505 newinet->inet_opt = NULL;
Christoph Paasche337e242012-12-14 04:07:58 +00001506 inet_csk_prepare_forced_close(newsk);
1507 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001508 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001510EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511
Eric Dumazet079096f2015-10-02 11:43:32 -07001512static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514#ifdef CONFIG_SYN_COOKIES
Eric Dumazet079096f2015-10-02 11:43:32 -07001515 const struct tcphdr *th = tcp_hdr(skb);
1516
Florian Westphalaf9b4732010-06-03 00:43:44 +00001517 if (!th->syn)
Cong Wang461b74c2014-10-15 14:33:22 -07001518 sk = cookie_v4_check(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519#endif
1520 return sk;
1521}
1522
Petar Penkov9349d602019-07-29 09:59:14 -07001523u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
1524 struct tcphdr *th, u32 *cookie)
1525{
1526 u16 mss = 0;
1527#ifdef CONFIG_SYN_COOKIES
1528 mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
1529 &tcp_request_sock_ipv4_ops, sk, th);
1530 if (mss) {
1531 *cookie = __cookie_v4_init_sequence(iph, th, &mss);
1532 tcp_synq_overflow(sk);
1533 }
1534#endif
1535 return mss;
1536}
1537
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538/* The socket must have it's spinlock held when we get
Eric Dumazete994b2f2015-10-02 11:43:39 -07001539 * here, unless it is a TCP_LISTEN socket.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 *
1541 * We have a potential double-lock case here, so even when
1542 * doing backlog processing we use the BH locking scheme.
1543 * This is because we cannot sleep with the original spinlock
1544 * held.
1545 */
1546int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1547{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001548 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001549
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001551 struct dst_entry *dst = sk->sk_rx_dst;
1552
Tom Herbertbdeab992011-08-14 19:45:55 +00001553 sock_rps_save_rxhash(sk, skb);
Eric Dumazet3d973792014-11-11 05:54:27 -08001554 sk_mark_napi_id(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001555 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001556 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
Ian Morris51456b22015-04-03 09:17:26 +01001557 !dst->ops->check(dst, 0)) {
David S. Miller92101b32012-07-23 16:29:00 -07001558 dst_release(dst);
1559 sk->sk_rx_dst = NULL;
1560 }
1561 }
Yafang Shao3d97d882018-05-29 23:27:31 +08001562 tcp_rcv_established(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 return 0;
1564 }
1565
Eric Dumazet12e25e12015-06-03 23:49:21 -07001566 if (tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto csum_err;
1568
1569 if (sk->sk_state == TCP_LISTEN) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001570 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1571
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 if (!nsk)
1573 goto discard;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001575 if (tcp_child_process(sk, nsk, skb)) {
1576 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001578 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 return 0;
1580 }
Eric Dumazetca551582010-06-03 09:03:58 +00001581 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001582 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001583
Eric Dumazet72ab4a82015-09-29 07:42:41 -07001584 if (tcp_rcv_state_process(sk, skb)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001585 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001587 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 return 0;
1589
1590reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001591 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592discard:
1593 kfree_skb(skb);
1594 /* Be careful here. If this function gets more complicated and
1595 * gcc suffers from register pressure on the x86, sk (in %ebx)
1596 * might be destroyed here. This current version compiles correctly,
1597 * but you have been warned.
1598 */
1599 return 0;
1600
1601csum_err:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001602 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1603 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 goto discard;
1605}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001606EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607
Paolo Abeni74874492017-09-28 15:51:36 +02001608int tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001609{
David S. Miller41063e92012-06-19 21:22:05 -07001610 const struct iphdr *iph;
1611 const struct tcphdr *th;
1612 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001613
David S. Miller41063e92012-06-19 21:22:05 -07001614 if (skb->pkt_type != PACKET_HOST)
Paolo Abeni74874492017-09-28 15:51:36 +02001615 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001616
Eric Dumazet45f00f92012-10-22 21:42:47 +00001617 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
Paolo Abeni74874492017-09-28 15:51:36 +02001618 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001619
1620 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001621 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001622
1623 if (th->doff < sizeof(struct tcphdr) / 4)
Paolo Abeni74874492017-09-28 15:51:36 +02001624 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001625
Eric Dumazet45f00f92012-10-22 21:42:47 +00001626 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001627 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001628 iph->daddr, ntohs(th->dest),
David Ahern3fa6f612017-08-07 08:44:17 -07001629 skb->skb_iif, inet_sdif(skb));
David S. Miller41063e92012-06-19 21:22:05 -07001630 if (sk) {
1631 skb->sk = sk;
1632 skb->destructor = sock_edemux;
Eric Dumazetf7e4eb02015-03-15 21:12:13 -07001633 if (sk_fullsock(sk)) {
Michal Kubečekd0c294c2015-03-23 15:14:00 +01001634 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001635
David S. Miller41063e92012-06-19 21:22:05 -07001636 if (dst)
1637 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001638 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001639 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001640 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001641 }
1642 }
Paolo Abeni74874492017-09-28 15:51:36 +02001643 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001644}
1645
Eric Dumazetc9c33212016-08-27 07:37:54 -07001646bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1647{
Eric Dumazet82657922019-10-09 15:21:13 -07001648 u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
Eric Dumazet4f693b52018-11-27 14:42:03 -08001649 struct skb_shared_info *shinfo;
1650 const struct tcphdr *th;
1651 struct tcphdr *thtail;
1652 struct sk_buff *tail;
1653 unsigned int hdrlen;
1654 bool fragstolen;
1655 u32 gso_segs;
1656 int delta;
Eric Dumazetc9c33212016-08-27 07:37:54 -07001657
1658 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1659 * we can fix skb->truesize to its real value to avoid future drops.
1660 * This is valid because skb is not yet charged to the socket.
1661 * It has been noticed pure SACK packets were sometimes dropped
1662 * (if cooked by drivers without copybreak feature).
1663 */
Eric Dumazet60b1af32017-01-24 14:57:36 -08001664 skb_condense(skb);
Eric Dumazetc9c33212016-08-27 07:37:54 -07001665
Eric Dumazetade96282018-11-19 17:45:55 -08001666 skb_dst_drop(skb);
1667
Eric Dumazet4f693b52018-11-27 14:42:03 -08001668 if (unlikely(tcp_checksum_complete(skb))) {
1669 bh_unlock_sock(sk);
1670 __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1671 __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1672 return true;
1673 }
1674
1675 /* Attempt coalescing to last skb in backlog, even if we are
1676 * above the limits.
1677 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
1678 */
1679 th = (const struct tcphdr *)skb->data;
1680 hdrlen = th->doff * 4;
1681 shinfo = skb_shinfo(skb);
1682
1683 if (!shinfo->gso_size)
1684 shinfo->gso_size = skb->len - hdrlen;
1685
1686 if (!shinfo->gso_segs)
1687 shinfo->gso_segs = 1;
1688
1689 tail = sk->sk_backlog.tail;
1690 if (!tail)
1691 goto no_coalesce;
1692 thtail = (struct tcphdr *)tail->data;
1693
1694 if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
1695 TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
1696 ((TCP_SKB_CB(tail)->tcp_flags |
Eric Dumazetca2fe292019-04-26 10:10:05 -07001697 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
1698 !((TCP_SKB_CB(tail)->tcp_flags &
1699 TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
Eric Dumazet4f693b52018-11-27 14:42:03 -08001700 ((TCP_SKB_CB(tail)->tcp_flags ^
1701 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
1702#ifdef CONFIG_TLS_DEVICE
1703 tail->decrypted != skb->decrypted ||
1704#endif
1705 thtail->doff != th->doff ||
1706 memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
1707 goto no_coalesce;
1708
1709 __skb_pull(skb, hdrlen);
1710 if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
1711 thtail->window = th->window;
1712
1713 TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
1714
1715 if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
1716 TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
1717
Eric Dumazetca2fe292019-04-26 10:10:05 -07001718 /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
1719 * thtail->fin, so that the fast path in tcp_rcv_established()
1720 * is not entered if we append a packet with a FIN.
1721 * SYN, RST, URG are not present.
1722 * ACK is set on both packets.
1723 * PSH : we do not really care in TCP stack,
1724 * at least for 'GRO' packets.
1725 */
1726 thtail->fin |= th->fin;
Eric Dumazet4f693b52018-11-27 14:42:03 -08001727 TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1728
1729 if (TCP_SKB_CB(skb)->has_rxtstamp) {
1730 TCP_SKB_CB(tail)->has_rxtstamp = true;
1731 tail->tstamp = skb->tstamp;
1732 skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
1733 }
1734
1735 /* Not as strict as GRO. We only need to carry mss max value */
1736 skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
1737 skb_shinfo(tail)->gso_size);
1738
1739 gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
1740 skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
1741
1742 sk->sk_backlog.len += delta;
1743 __NET_INC_STATS(sock_net(sk),
1744 LINUX_MIB_TCPBACKLOGCOALESCE);
1745 kfree_skb_partial(skb, fragstolen);
1746 return false;
1747 }
1748 __skb_push(skb, hdrlen);
1749
1750no_coalesce:
1751 /* Only socket owner can try to collapse/prune rx queues
1752 * to reduce memory overhead, so add a little headroom here.
1753 * Few sockets backlog are possibly concurrently non empty.
1754 */
1755 limit += 64*1024;
1756
Eric Dumazetc9c33212016-08-27 07:37:54 -07001757 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1758 bh_unlock_sock(sk);
1759 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1760 return true;
1761 }
1762 return false;
1763}
1764EXPORT_SYMBOL(tcp_add_backlog);
1765
Eric Dumazetac6e7802016-11-10 13:12:35 -08001766int tcp_filter(struct sock *sk, struct sk_buff *skb)
1767{
1768 struct tcphdr *th = (struct tcphdr *)skb->data;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001769
Christoph Paaschf2feaef2019-03-11 11:41:05 -07001770 return sk_filter_trim_cap(sk, skb, th->doff * 4);
Eric Dumazetac6e7802016-11-10 13:12:35 -08001771}
1772EXPORT_SYMBOL(tcp_filter);
1773
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001774static void tcp_v4_restore_cb(struct sk_buff *skb)
1775{
1776 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1777 sizeof(struct inet_skb_parm));
1778}
1779
1780static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1781 const struct tcphdr *th)
1782{
1783 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1784 * barrier() makes sure compiler wont play fool^Waliasing games.
1785 */
1786 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1787 sizeof(struct inet_skb_parm));
1788 barrier();
1789
1790 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1791 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1792 skb->len - th->doff * 4);
1793 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1794 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1795 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1796 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1797 TCP_SKB_CB(skb)->sacked = 0;
1798 TCP_SKB_CB(skb)->has_rxtstamp =
1799 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1800}
1801
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802/*
1803 * From tcp_input.c
1804 */
1805
1806int tcp_v4_rcv(struct sk_buff *skb)
1807{
Eric Dumazet3b24d852016-04-01 08:52:17 -07001808 struct net *net = dev_net(skb->dev);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001809 struct sk_buff *skb_to_free;
David Ahern3fa6f612017-08-07 08:44:17 -07001810 int sdif = inet_sdif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001811 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001812 const struct tcphdr *th;
Eric Dumazet3b24d852016-04-01 08:52:17 -07001813 bool refcounted;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 struct sock *sk;
1815 int ret;
1816
1817 if (skb->pkt_type != PACKET_HOST)
1818 goto discard_it;
1819
1820 /* Count it even if it's bad */
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001821 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
1823 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1824 goto discard_it;
1825
Eric Dumazetea1627c2016-05-13 09:16:40 -07001826 th = (const struct tcphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827
Eric Dumazetea1627c2016-05-13 09:16:40 -07001828 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 goto bad_packet;
1830 if (!pskb_may_pull(skb, th->doff * 4))
1831 goto discard_it;
1832
1833 /* An explanation is required here, I think.
1834 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001835 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001837
1838 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001839 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840
Eric Dumazetea1627c2016-05-13 09:16:40 -07001841 th = (const struct tcphdr *)skb->data;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001842 iph = ip_hdr(skb);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001843lookup:
Craig Galleka5836362016-02-10 11:50:38 -05001844 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
David Ahern3fa6f612017-08-07 08:44:17 -07001845 th->dest, sdif, &refcounted);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 if (!sk)
1847 goto no_tcp_socket;
1848
Eric Dumazetbb134d52010-03-09 05:55:56 +00001849process:
1850 if (sk->sk_state == TCP_TIME_WAIT)
1851 goto do_time_wait;
1852
Eric Dumazet079096f2015-10-02 11:43:32 -07001853 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1854 struct request_sock *req = inet_reqsk(sk);
Eric Dumazete0f97592018-02-13 06:14:12 -08001855 bool req_stolen = false;
Eric Dumazet77166822016-02-18 05:39:18 -08001856 struct sock *nsk;
Eric Dumazet079096f2015-10-02 11:43:32 -07001857
1858 sk = req->rsk_listener;
Eric Dumazet72923552016-02-11 22:50:29 -08001859 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
Eric Dumazete65c3322016-08-24 08:50:24 -07001860 sk_drops_add(sk, skb);
Eric Dumazet72923552016-02-11 22:50:29 -08001861 reqsk_put(req);
1862 goto discard_it;
1863 }
Frank van der Linden4fd44a92018-06-12 23:09:37 +00001864 if (tcp_checksum_complete(skb)) {
1865 reqsk_put(req);
1866 goto csum_error;
1867 }
Eric Dumazet77166822016-02-18 05:39:18 -08001868 if (unlikely(sk->sk_state != TCP_LISTEN)) {
Eric Dumazetf03f2e12015-10-14 11:16:27 -07001869 inet_csk_reqsk_queue_drop_and_put(sk, req);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001870 goto lookup;
1871 }
Eric Dumazet3b24d852016-04-01 08:52:17 -07001872 /* We own a reference on the listener, increase it again
1873 * as we might lose it too soon.
1874 */
Eric Dumazet77166822016-02-18 05:39:18 -08001875 sock_hold(sk);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001876 refcounted = true;
Eric Dumazet1f3b3592017-09-08 12:44:47 -07001877 nsk = NULL;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001878 if (!tcp_filter(sk, skb)) {
1879 th = (const struct tcphdr *)skb->data;
1880 iph = ip_hdr(skb);
1881 tcp_v4_fill_cb(skb, iph, th);
Eric Dumazete0f97592018-02-13 06:14:12 -08001882 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001883 }
Eric Dumazet079096f2015-10-02 11:43:32 -07001884 if (!nsk) {
1885 reqsk_put(req);
Eric Dumazete0f97592018-02-13 06:14:12 -08001886 if (req_stolen) {
1887 /* Another cpu got exclusive access to req
1888 * and created a full blown socket.
1889 * Try to feed this packet to this socket
1890 * instead of discarding it.
1891 */
1892 tcp_v4_restore_cb(skb);
1893 sock_put(sk);
1894 goto lookup;
1895 }
Eric Dumazet77166822016-02-18 05:39:18 -08001896 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001897 }
1898 if (nsk == sk) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001899 reqsk_put(req);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001900 tcp_v4_restore_cb(skb);
Eric Dumazet079096f2015-10-02 11:43:32 -07001901 } else if (tcp_child_process(sk, nsk, skb)) {
1902 tcp_v4_send_reset(nsk, skb);
Eric Dumazet77166822016-02-18 05:39:18 -08001903 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001904 } else {
Eric Dumazet77166822016-02-18 05:39:18 -08001905 sock_put(sk);
Eric Dumazet079096f2015-10-02 11:43:32 -07001906 return 0;
1907 }
1908 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001909 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -07001910 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001911 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001912 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1915 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001916
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001917 if (tcp_v4_inbound_md5_hash(sk, skb))
1918 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001919
Florian Westphal895b5c92019-09-29 20:54:03 +02001920 nf_reset_ct(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921
Eric Dumazetac6e7802016-11-10 13:12:35 -08001922 if (tcp_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 goto discard_and_relse;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001924 th = (const struct tcphdr *)skb->data;
1925 iph = ip_hdr(skb);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001926 tcp_v4_fill_cb(skb, iph, th);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927
1928 skb->dev = NULL;
1929
Eric Dumazete994b2f2015-10-02 11:43:39 -07001930 if (sk->sk_state == TCP_LISTEN) {
1931 ret = tcp_v4_do_rcv(sk, skb);
1932 goto put_and_return;
1933 }
1934
1935 sk_incoming_cpu_update(sk);
1936
Ingo Molnarc6366182006-07-03 00:25:13 -07001937 bh_lock_sock_nested(sk);
Martin KaFai Laua44d6ea2016-03-14 10:52:15 -07001938 tcp_segs_in(tcp_sk(sk), skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 ret = 0;
1940 if (!sock_owned_by_user(sk)) {
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001941 skb_to_free = sk->sk_rx_skb_cache;
1942 sk->sk_rx_skb_cache = NULL;
Florian Westphale7942d02017-07-30 03:57:18 +02001943 ret = tcp_v4_do_rcv(sk, skb);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001944 } else {
1945 if (tcp_add_backlog(sk, skb))
1946 goto discard_and_relse;
1947 skb_to_free = NULL;
Zhu Yi6b03a532010-03-04 18:01:41 +00001948 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 bh_unlock_sock(sk);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001950 if (skb_to_free)
1951 __kfree_skb(skb_to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
Eric Dumazete994b2f2015-10-02 11:43:39 -07001953put_and_return:
Eric Dumazet3b24d852016-04-01 08:52:17 -07001954 if (refcounted)
1955 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
1957 return ret;
1958
1959no_tcp_socket:
1960 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1961 goto discard_it;
1962
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001963 tcp_v4_fill_cb(skb, iph, th);
1964
Eric Dumazet12e25e12015-06-03 23:49:21 -07001965 if (tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001966csum_error:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001967 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968bad_packet:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001969 __TCP_INC_STATS(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001971 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 }
1973
1974discard_it:
1975 /* Discard frame. */
1976 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001977 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978
1979discard_and_relse:
Eric Dumazet532182c2016-04-01 08:52:19 -07001980 sk_drops_add(sk, skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001981 if (refcounted)
1982 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 goto discard_it;
1984
1985do_time_wait:
1986 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001987 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 goto discard_it;
1989 }
1990
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001991 tcp_v4_fill_cb(skb, iph, th);
1992
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001993 if (tcp_checksum_complete(skb)) {
1994 inet_twsk_put(inet_twsk(sk));
1995 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001997 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001999 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Craig Galleka5836362016-02-10 11:50:38 -05002000 &tcp_hashinfo, skb,
2001 __tcp_hdrlen(th),
Tom Herbertda5e3632013-01-22 09:50:24 +00002002 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002003 iph->daddr, th->dest,
David Ahern3fa6f612017-08-07 08:44:17 -07002004 inet_iif(skb),
2005 sdif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 if (sk2) {
Eric Dumazetdbe7faa2015-07-08 14:28:30 -07002007 inet_twsk_deschedule_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 sk = sk2;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08002009 tcp_v4_restore_cb(skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002010 refcounted = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 goto process;
2012 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 }
Gustavo A. R. Silvafcfd6df2017-10-16 15:48:55 -05002014 /* to ACK */
2015 /* fall through */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 case TCP_TW_ACK:
2017 tcp_v4_timewait_ack(sk, skb);
2018 break;
2019 case TCP_TW_RST:
Florian Westphal271c3b92015-12-21 21:29:26 +01002020 tcp_v4_send_reset(sk, skb);
2021 inet_twsk_deschedule_put(inet_twsk(sk));
2022 goto discard_it;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 case TCP_TW_SUCCESS:;
2024 }
2025 goto discard_it;
2026}
2027
David S. Millerccb7c412010-12-01 18:09:13 -08002028static struct timewait_sock_ops tcp_timewait_sock_ops = {
2029 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2030 .twsk_unique = tcp_twsk_unique,
2031 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08002032};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
Eric Dumazet63d02d12012-08-09 14:11:00 +00002034void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00002035{
2036 struct dst_entry *dst = skb_dst(skb);
2037
Eric Dumazet5037e9e2015-12-14 14:08:53 -08002038 if (dst && dst_hold_safe(dst)) {
Eric Dumazetca777ef2014-09-08 08:06:07 -07002039 sk->sk_rx_dst = dst;
2040 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2041 }
Eric Dumazet5d299f32012-08-06 05:09:33 +00002042}
Eric Dumazet63d02d12012-08-09 14:11:00 +00002043EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00002044
Stephen Hemminger3b401a82009-09-01 19:25:04 +00002045const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002046 .queue_xmit = ip_queue_xmit,
2047 .send_check = tcp_v4_send_check,
2048 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00002049 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002050 .conn_request = tcp_v4_conn_request,
2051 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002052 .net_header_len = sizeof(struct iphdr),
2053 .setsockopt = ip_setsockopt,
2054 .getsockopt = ip_getsockopt,
2055 .addr2sockaddr = inet_csk_addr2sockaddr,
2056 .sockaddr_len = sizeof(struct sockaddr_in),
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002057#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002058 .compat_setsockopt = compat_ip_setsockopt,
2059 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002060#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04002061 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002063EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002065#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00002066static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002067 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07002068 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002069 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002070};
Andrew Mortonb6332e62006-11-30 19:16:28 -08002071#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002072
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073/* NOTE: A lot of things set to zero explicitly by call to
2074 * sk_alloc() so need not be done here.
2075 */
2076static int tcp_v4_init_sock(struct sock *sk)
2077{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002078 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079
Neal Cardwell900f65d2012-04-19 09:55:21 +00002080 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08002082 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00002083
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002084#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04002085 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002086#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 return 0;
2089}
2090
Brian Haley7d06b2e2008-06-14 17:04:49 -07002091void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092{
2093 struct tcp_sock *tp = tcp_sk(sk);
2094
Song Liue1a4aa52017-10-23 09:20:26 -07002095 trace_tcp_destroy_sock(sk);
2096
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 tcp_clear_xmit_timers(sk);
2098
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002099 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07002100
Dave Watson734942c2017-06-14 11:37:14 -07002101 tcp_cleanup_ulp(sk);
2102
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08002104 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105
Wei Wangcf1ef3f2017-04-20 14:45:46 -07002106 /* Check if we want to disable active TFO */
2107 tcp_fastopen_active_disable_ofo_check(sk);
2108
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 /* Cleans up our, hopefully empty, out_of_order_queue. */
Yaogong Wang9f5afea2016-09-07 14:49:28 -07002110 skb_rbtree_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002112#ifdef CONFIG_TCP_MD5SIG
2113 /* Clean up the MD5 key list, if any */
2114 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00002115 tcp_clear_md5_list(sk);
Mat Martineaufb7df5e2017-12-21 10:29:10 -08002116 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002117 tp->md5sig_info = NULL;
2118 }
2119#endif
2120
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002122 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002123 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124
Eric Dumazetd983ea62019-10-10 20:17:38 -07002125 BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
William Allen Simpson435cf552009-12-02 18:17:05 +00002126
Yuchung Chengcf60af02012-07-19 06:43:09 +00002127 /* If socket is aborted during connect operation */
2128 tcp_free_fastopen_req(tp);
Yuchung Cheng1fba70e2017-10-18 11:22:51 -07002129 tcp_fastopen_destroy_cipher(sk);
Eric Dumazetcd8ae852015-05-03 21:34:46 -07002130 tcp_saved_syn_free(tp);
Yuchung Chengcf60af02012-07-19 06:43:09 +00002131
Glauber Costa180d8cd2011-12-11 21:47:02 +00002132 sk_sockets_allocated_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134EXPORT_SYMBOL(tcp_v4_destroy_sock);
2135
2136#ifdef CONFIG_PROC_FS
2137/* Proc filesystem TCP sock list dumping. */
2138
Tom Herberta8b690f2010-06-07 00:43:42 -07002139/*
2140 * Get next listener socket follow cur. If cur is NULL, get first socket
2141 * starting from bucket given in st->bucket; when st->bucket is zero the
2142 * very first socket in the hash table is returned.
2143 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144static void *listening_get_next(struct seq_file *seq, void *cur)
2145{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002146 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002147 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002148 struct net *net = seq_file_net(seq);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002149 struct inet_listen_hashbucket *ilb;
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002150 struct hlist_nulls_node *node;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002151 struct sock *sk = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
2153 if (!sk) {
Eric Dumazet3b24d852016-04-01 08:52:17 -07002154get_head:
Tom Herberta8b690f2010-06-07 00:43:42 -07002155 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet9652dc22016-10-19 21:24:58 -07002156 spin_lock(&ilb->lock);
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002157 sk = sk_nulls_head(&ilb->nulls_head);
Tom Herberta8b690f2010-06-07 00:43:42 -07002158 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 goto get_sk;
2160 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002161 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002163 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002165 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166get_sk:
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002167 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002168 if (!net_eq(sock_net(sk), net))
2169 continue;
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002170 if (sk->sk_family == afinfo->family)
Eric Dumazet3b24d852016-04-01 08:52:17 -07002171 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172 }
Eric Dumazet9652dc22016-10-19 21:24:58 -07002173 spin_unlock(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002174 st->offset = 0;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002175 if (++st->bucket < INET_LHTABLE_SIZE)
2176 goto get_head;
2177 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178}
2179
2180static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2181{
Tom Herberta8b690f2010-06-07 00:43:42 -07002182 struct tcp_iter_state *st = seq->private;
2183 void *rc;
2184
2185 st->bucket = 0;
2186 st->offset = 0;
2187 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188
2189 while (rc && *pos) {
2190 rc = listening_get_next(seq, rc);
2191 --*pos;
2192 }
2193 return rc;
2194}
2195
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002196static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07002197{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002198 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002199}
2200
Tom Herberta8b690f2010-06-07 00:43:42 -07002201/*
2202 * Get first established socket starting from bucket given in st->bucket.
2203 * If st->bucket is zero, the very first socket in the hash is returned.
2204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205static void *established_get_first(struct seq_file *seq)
2206{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002207 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002208 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002209 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 void *rc = NULL;
2211
Tom Herberta8b690f2010-06-07 00:43:42 -07002212 st->offset = 0;
2213 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002215 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002216 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217
Andi Kleen6eac5602008-08-28 01:08:02 -07002218 /* Lockless fast path for the common case of empty buckets */
2219 if (empty_bucket(st))
2220 continue;
2221
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002222 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002223 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002224 if (sk->sk_family != afinfo->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002225 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 continue;
2227 }
2228 rc = sk;
2229 goto out;
2230 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002231 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 }
2233out:
2234 return rc;
2235}
2236
2237static void *established_get_next(struct seq_file *seq, void *cur)
2238{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002239 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002241 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002242 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002243 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244
2245 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002246 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002248 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002250 sk_nulls_for_each_from(sk, node) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002251 if (sk->sk_family == afinfo->family &&
2252 net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002253 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 }
2255
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002256 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2257 ++st->bucket;
2258 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259}
2260
2261static void *established_get_idx(struct seq_file *seq, loff_t pos)
2262{
Tom Herberta8b690f2010-06-07 00:43:42 -07002263 struct tcp_iter_state *st = seq->private;
2264 void *rc;
2265
2266 st->bucket = 0;
2267 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268
2269 while (rc && pos) {
2270 rc = established_get_next(seq, rc);
2271 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 return rc;
2274}
2275
2276static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2277{
2278 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002279 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 st->state = TCP_SEQ_STATE_LISTENING;
2282 rc = listening_get_idx(seq, &pos);
2283
2284 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 st->state = TCP_SEQ_STATE_ESTABLISHED;
2286 rc = established_get_idx(seq, pos);
2287 }
2288
2289 return rc;
2290}
2291
Tom Herberta8b690f2010-06-07 00:43:42 -07002292static void *tcp_seek_last_pos(struct seq_file *seq)
2293{
2294 struct tcp_iter_state *st = seq->private;
2295 int offset = st->offset;
2296 int orig_num = st->num;
2297 void *rc = NULL;
2298
2299 switch (st->state) {
Tom Herberta8b690f2010-06-07 00:43:42 -07002300 case TCP_SEQ_STATE_LISTENING:
2301 if (st->bucket >= INET_LHTABLE_SIZE)
2302 break;
2303 st->state = TCP_SEQ_STATE_LISTENING;
2304 rc = listening_get_next(seq, NULL);
2305 while (offset-- && rc)
2306 rc = listening_get_next(seq, rc);
2307 if (rc)
2308 break;
2309 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002310 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002311 /* Fallthrough */
2312 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002313 if (st->bucket > tcp_hashinfo.ehash_mask)
2314 break;
2315 rc = established_get_first(seq);
2316 while (offset-- && rc)
2317 rc = established_get_next(seq, rc);
2318 }
2319
2320 st->num = orig_num;
2321
2322 return rc;
2323}
2324
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002325void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326{
Jianjun Kong5799de02008-11-03 02:49:10 -08002327 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002328 void *rc;
2329
2330 if (*pos && *pos == st->last_pos) {
2331 rc = tcp_seek_last_pos(seq);
2332 if (rc)
2333 goto out;
2334 }
2335
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336 st->state = TCP_SEQ_STATE_LISTENING;
2337 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002338 st->bucket = 0;
2339 st->offset = 0;
2340 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2341
2342out:
2343 st->last_pos = *pos;
2344 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002346EXPORT_SYMBOL(tcp_seq_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002348void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349{
Tom Herberta8b690f2010-06-07 00:43:42 -07002350 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
2353 if (v == SEQ_START_TOKEN) {
2354 rc = tcp_get_idx(seq, 0);
2355 goto out;
2356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357
2358 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 case TCP_SEQ_STATE_LISTENING:
2360 rc = listening_get_next(seq, v);
2361 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002363 st->bucket = 0;
2364 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 rc = established_get_first(seq);
2366 }
2367 break;
2368 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 rc = established_get_next(seq, v);
2370 break;
2371 }
2372out:
2373 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002374 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 return rc;
2376}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002377EXPORT_SYMBOL(tcp_seq_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002379void tcp_seq_stop(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380{
Jianjun Kong5799de02008-11-03 02:49:10 -08002381 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382
2383 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 case TCP_SEQ_STATE_LISTENING:
2385 if (v != SEQ_START_TOKEN)
Eric Dumazet9652dc22016-10-19 21:24:58 -07002386 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 case TCP_SEQ_STATE_ESTABLISHED:
2389 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002390 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391 break;
2392 }
2393}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002394EXPORT_SYMBOL(tcp_seq_stop);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395
Eric Dumazetd4f06872015-03-12 16:44:09 -07002396static void get_openreq4(const struct request_sock *req,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002397 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002399 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetfa76ce732015-03-19 19:04:20 -07002400 long delta = req->rsk_timer.expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002402 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002403 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002405 ireq->ir_loc_addr,
Eric Dumazetd4f06872015-03-12 16:44:09 -07002406 ireq->ir_num,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002407 ireq->ir_rmt_addr,
2408 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 TCP_SYN_RECV,
2410 0, 0, /* could print option size, but that is af dependent. */
2411 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002412 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002413 req->num_timeout,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002414 from_kuid_munged(seq_user_ns(f),
2415 sock_i_uid(req->rsk_listener)),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 0, /* non standard timer */
2417 0, /* open_requests have no inode */
Eric Dumazetd4f06872015-03-12 16:44:09 -07002418 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002419 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420}
2421
Tetsuo Handa652586d2013-11-14 14:31:57 -08002422static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423{
2424 int timer_active;
2425 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002426 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002427 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002428 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet0536fcc2015-09-29 07:42:52 -07002429 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002430 __be32 dest = inet->inet_daddr;
2431 __be32 src = inet->inet_rcv_saddr;
2432 __u16 destp = ntohs(inet->inet_dport);
2433 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002434 int rx_queue;
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002435 int state;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002437 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
Yuchung Cheng57dde7f2017-01-12 22:11:33 -08002438 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002439 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002441 timer_expires = icsk->icsk_timeout;
2442 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002444 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002445 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002447 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 } else {
2449 timer_active = 0;
2450 timer_expires = jiffies;
2451 }
2452
Yafang Shao986ffdf2017-12-20 11:12:52 +08002453 state = inet_sk_state_load(sk);
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002454 if (state == TCP_LISTEN)
Eric Dumazet288efe82019-11-05 14:11:53 -08002455 rx_queue = READ_ONCE(sk->sk_ack_backlog);
Eric Dumazet49d09002009-12-03 16:06:13 -08002456 else
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002457 /* Because we don't lock the socket,
2458 * we might find a transient negative value.
Eric Dumazet49d09002009-12-03 16:06:13 -08002459 */
Eric Dumazetdba7d9b2019-10-10 20:17:39 -07002460 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
Eric Dumazet7db48e92019-10-10 20:17:40 -07002461 READ_ONCE(tp->copied_seq), 0);
Eric Dumazet49d09002009-12-03 16:06:13 -08002462
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002463 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002464 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002465 i, src, srcp, dest, destp, state,
Eric Dumazet0f317462019-10-10 20:17:41 -07002466 READ_ONCE(tp->write_seq) - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002467 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002469 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002470 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002471 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002472 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002473 sock_i_ino(sk),
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002474 refcount_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002475 jiffies_to_clock_t(icsk->icsk_rto),
2476 jiffies_to_clock_t(icsk->icsk_ack.ato),
Wei Wang31954cd2019-01-25 10:53:19 -08002477 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 tp->snd_cwnd,
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002479 state == TCP_LISTEN ?
2480 fastopenq->max_qlen :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002481 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482}
2483
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002484static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002485 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486{
Eric Dumazet789f5582015-04-12 18:51:09 -07002487 long delta = tw->tw_timer.expires - jiffies;
Al Viro23f33c22006-09-27 18:43:50 -07002488 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 __u16 destp, srcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490
2491 dest = tw->tw_daddr;
2492 src = tw->tw_rcv_saddr;
2493 destp = ntohs(tw->tw_dport);
2494 srcp = ntohs(tw->tw_sport);
2495
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002496 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002497 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002499 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002500 refcount_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501}
2502
2503#define TMPSZ 150
2504
2505static int tcp4_seq_show(struct seq_file *seq, void *v)
2506{
Jianjun Kong5799de02008-11-03 02:49:10 -08002507 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002508 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509
Tetsuo Handa652586d2013-11-14 14:31:57 -08002510 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002512 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 "rx_queue tr tm->when retrnsmt uid timeout "
2514 "inode");
2515 goto out;
2516 }
2517 st = seq->private;
2518
Eric Dumazet079096f2015-10-02 11:43:32 -07002519 if (sk->sk_state == TCP_TIME_WAIT)
2520 get_timewait4_sock(v, seq, st->num);
2521 else if (sk->sk_state == TCP_NEW_SYN_RECV)
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002522 get_openreq4(v, seq, st->num);
Eric Dumazet079096f2015-10-02 11:43:32 -07002523 else
2524 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002526 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 return 0;
2528}
2529
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002530static const struct seq_operations tcp4_seq_ops = {
2531 .show = tcp4_seq_show,
2532 .start = tcp_seq_start,
2533 .next = tcp_seq_next,
2534 .stop = tcp_seq_stop,
2535};
2536
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539};
2540
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002541static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002542{
Christoph Hellwigc3506372018-04-10 19:42:55 +02002543 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2544 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002545 return -ENOMEM;
2546 return 0;
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002547}
2548
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002549static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002550{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002551 remove_proc_entry("tcp", net->proc_net);
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002552}
2553
2554static struct pernet_operations tcp4_net_ops = {
2555 .init = tcp4_proc_init_net,
2556 .exit = tcp4_proc_exit_net,
2557};
2558
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559int __init tcp4_proc_init(void)
2560{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002561 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562}
2563
2564void tcp4_proc_exit(void)
2565{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002566 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567}
2568#endif /* CONFIG_PROC_FS */
2569
2570struct proto tcp_prot = {
2571 .name = "TCP",
2572 .owner = THIS_MODULE,
2573 .close = tcp_close,
Andrey Ignatovd74bad42018-03-30 15:08:05 -07002574 .pre_connect = tcp_v4_pre_connect,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 .connect = tcp_v4_connect,
2576 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002577 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .ioctl = tcp_ioctl,
2579 .init = tcp_v4_init_sock,
2580 .destroy = tcp_v4_destroy_sock,
2581 .shutdown = tcp_shutdown,
2582 .setsockopt = tcp_setsockopt,
2583 .getsockopt = tcp_getsockopt,
Ursula Braun4b9d07a2017-01-09 16:55:12 +01002584 .keepalive = tcp_set_keepalive,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002586 .sendmsg = tcp_sendmsg,
2587 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002589 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002590 .hash = inet_hash,
2591 .unhash = inet_unhash,
2592 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazet06044752017-06-07 13:29:12 -07002594 .leave_memory_pressure = tcp_leave_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002595 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002597 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 .memory_allocated = &tcp_memory_allocated,
2599 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002600 .sysctl_mem = sysctl_tcp_mem,
Eric Dumazet356d1832017-11-07 00:29:28 -08002601 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2602 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 .max_header = MAX_TCP_HEADER,
2604 .obj_size = sizeof(struct tcp_sock),
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08002605 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002606 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002607 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002608 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002609 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002610#ifdef CONFIG_COMPAT
2611 .compat_setsockopt = compat_tcp_setsockopt,
2612 .compat_getsockopt = compat_tcp_getsockopt,
2613#endif
Lorenzo Colittic1e64e22015-12-16 12:30:05 +09002614 .diag_destroy = tcp_abort,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002616EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617
Denis V. Lunev046ee902008-04-03 14:31:33 -07002618static void __net_exit tcp_sk_exit(struct net *net)
2619{
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002620 int cpu;
2621
Dust Lib506bc92019-04-01 16:04:53 +08002622 if (net->ipv4.tcp_congestion_control)
2623 module_put(net->ipv4.tcp_congestion_control->owner);
Stephen Hemminger6670e152017-11-14 08:25:49 -08002624
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002625 for_each_possible_cpu(cpu)
2626 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2627 free_percpu(net->ipv4.tcp_sk);
2628}
2629
2630static int __net_init tcp_sk_init(struct net *net)
2631{
Haishuang Yanfee83d02016-12-28 17:52:33 +08002632 int res, cpu, cnt;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002633
2634 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2635 if (!net->ipv4.tcp_sk)
2636 return -ENOMEM;
2637
2638 for_each_possible_cpu(cpu) {
2639 struct sock *sk;
2640
2641 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2642 IPPROTO_TCP, net);
2643 if (res)
2644 goto fail;
Eric Dumazeta9d65322016-04-01 08:52:21 -07002645 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
Eric Dumazet431280e2018-08-22 13:30:45 -07002646
2647 /* Please enforce IP_DF and IPID==0 for RST and
2648 * ACK sent in SYN-RECV and TIME-WAIT state.
2649 */
2650 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2651
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002652 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2653 }
Daniel Borkmann49213552015-05-19 21:04:22 +02002654
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002655 net->ipv4.sysctl_tcp_ecn = 2;
Daniel Borkmann49213552015-05-19 21:04:22 +02002656 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2657
Fan Dub0f9ca52015-02-10 09:53:16 +08002658 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
Eric Dumazet5f3e2bf002019-06-06 09:15:31 -07002659 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
Fan Du6b58e0a2015-03-06 11:18:23 +08002660 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
Fan Du05cbc0d2015-03-06 11:18:24 +08002661 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
Josh Huntc04b79b2019-08-07 19:52:29 -04002662 net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002663
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002664 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
Nikolay Borisov9bd68612016-01-07 16:38:44 +02002665 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
Nikolay Borisovb840d152016-01-07 16:38:45 +02002666 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002667
Nikolay Borisov6fa25162016-02-03 09:46:49 +02002668 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
Nikolay Borisov7c083ec2016-02-03 09:46:50 +02002669 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
David S. Miller0aca7372016-02-08 04:24:33 -05002670 net->ipv4.sysctl_tcp_syncookies = 1;
Nikolay Borisov1043e252016-02-03 09:46:52 +02002671 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
Nikolay Borisovae5c3f42016-02-03 09:46:53 +02002672 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
Nikolay Borisovc6214a92016-02-03 09:46:54 +02002673 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
Nikolay Borisovc402d9b2016-02-03 09:46:55 +02002674 net->ipv4.sysctl_tcp_orphan_retries = 0;
Nikolay Borisov1e579ca2016-02-03 09:46:56 +02002675 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
Nikolay Borisov4979f2d2016-02-03 09:46:57 +02002676 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -07002677 net->ipv4.sysctl_tcp_tw_reuse = 2;
Nikolay Borisov12ed8242016-02-03 09:46:51 +02002678
Haishuang Yanfee83d02016-12-28 17:52:33 +08002679 cnt = tcp_hashinfo.ehash_mask + 1;
Yafang Shao743e4812018-09-01 20:21:05 +08002680 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
Haishuang Yan1946e672016-12-28 17:52:32 +08002681 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2682
Eric Dumazet623d0c22019-10-30 10:05:46 -07002683 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
Eric Dumazetf9301032017-06-07 10:34:37 -07002684 net->ipv4.sysctl_tcp_sack = 1;
Eric Dumazet9bb37ef2017-06-07 10:34:38 -07002685 net->ipv4.sysctl_tcp_window_scaling = 1;
Eric Dumazet5d2ed052017-06-07 10:34:39 -07002686 net->ipv4.sysctl_tcp_timestamps = 1;
Eric Dumazet2ae21cf2017-10-26 21:54:56 -07002687 net->ipv4.sysctl_tcp_early_retrans = 3;
Eric Dumazete20223f2017-10-26 21:54:57 -07002688 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
Eric Dumazetb510f0d2017-10-26 21:54:59 -07002689 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
Eric Dumazete0a1e5b2017-10-26 21:55:00 -07002690 net->ipv4.sysctl_tcp_retrans_collapse = 1;
Eric Dumazetc6e21802017-10-26 21:55:06 -07002691 net->ipv4.sysctl_tcp_max_reordering = 300;
Eric Dumazet6496f6b2017-10-26 21:55:07 -07002692 net->ipv4.sysctl_tcp_dsack = 1;
Eric Dumazet0c126542017-10-26 21:55:08 -07002693 net->ipv4.sysctl_tcp_app_win = 31;
Eric Dumazet94f08932017-10-26 21:55:09 -07002694 net->ipv4.sysctl_tcp_adv_win_scale = 1;
Eric Dumazetaf9b69a2017-10-26 21:55:10 -07002695 net->ipv4.sysctl_tcp_frto = 2;
Eric Dumazet4540c0c2017-10-27 07:47:22 -07002696 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
Eric Dumazetd06a9902017-10-27 07:47:23 -07002697 /* This limits the percentage of the congestion window which we
2698 * will allow a single TSO frame to consume. Building TSO frames
2699 * which are too large can cause TCP streams to be bursty.
2700 */
2701 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
Eric Dumazetc73e5802018-11-11 07:34:28 -08002702 /* Default TSQ limit of 16 TSO segments */
2703 net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
Eric Dumazetb530b682017-10-27 07:47:26 -07002704 /* rfc5961 challenge ack rate limiting */
2705 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
Eric Dumazet26e95962017-10-27 07:47:27 -07002706 net->ipv4.sysctl_tcp_min_tso_segs = 2;
Eric Dumazetbd239702017-10-27 07:47:28 -07002707 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
Eric Dumazet790f00e2017-10-27 07:47:29 -07002708 net->ipv4.sysctl_tcp_autocorking = 1;
Eric Dumazet4170ba62017-10-27 07:47:30 -07002709 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
Eric Dumazet23a7102a2017-10-27 07:47:31 -07002710 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
Eric Dumazetc26e91f2017-10-27 07:47:32 -07002711 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
Eric Dumazet356d1832017-11-07 00:29:28 -08002712 if (net != &init_net) {
2713 memcpy(net->ipv4.sysctl_tcp_rmem,
2714 init_net.ipv4.sysctl_tcp_rmem,
2715 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2716 memcpy(net->ipv4.sysctl_tcp_wmem,
2717 init_net.ipv4.sysctl_tcp_wmem,
2718 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2719 }
Eric Dumazet6d82aa22018-05-17 14:47:28 -07002720 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
Eric Dumazet9c21d2f2018-05-17 14:47:29 -07002721 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002722 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
Haishuang Yan43713842017-09-27 11:35:42 +08002723 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
Haishuang Yan3733be12017-09-27 11:35:43 +08002724 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2725 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002726
Stephen Hemminger6670e152017-11-14 08:25:49 -08002727 /* Reno is always built in */
2728 if (!net_eq(net, &init_net) &&
2729 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2730 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2731 else
2732 net->ipv4.tcp_congestion_control = &tcp_reno;
2733
Daniel Borkmann49213552015-05-19 21:04:22 +02002734 return 0;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002735fail:
2736 tcp_sk_exit(net);
2737
2738 return res;
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002739}
2740
2741static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2742{
Haishuang Yan43713842017-09-27 11:35:42 +08002743 struct net *net;
2744
Haishuang Yan1946e672016-12-28 17:52:32 +08002745 inet_twsk_purge(&tcp_hashinfo, AF_INET);
Haishuang Yan43713842017-09-27 11:35:42 +08002746
2747 list_for_each_entry(net, net_exit_list, exit_list)
2748 tcp_fastopen_ctx_destroy(net);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002749}
2750
2751static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002752 .init = tcp_sk_init,
2753 .exit = tcp_sk_exit,
2754 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002755};
2756
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002757void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758{
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002759 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002760 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761}