blob: efc6fef692ffdca4dcdd3f4b87a837656dd66c8c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Joe Perchesafd465032012-03-12 07:03:32 +000053#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Herbert Xueb4dea52008-12-29 23:04:08 -080055#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090064#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020066#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070068#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030070#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <net/ipv6.h>
72#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080073#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <net/xfrm.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070075#include <net/secure_seq.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030076#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070077
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
Ivan Delalande67973182017-06-15 18:07:06 -070083#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
Herbert Xucf80e0e2016-01-24 21:20:23 +080085#include <crypto/hash.h>
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080086#include <linux/scatterlist.h>
87
Song Liuc24b14c42017-10-23 09:20:24 -070088#include <trace/events/tcp.h>
89
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080090#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000091static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040092 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080093#endif
94
Eric Dumazet5caea4e2008-11-20 00:40:07 -080095struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000096EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
Eric Dumazet84b114b2017-05-05 06:56:54 -070098static u32 tcp_v4_init_seq(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070099{
Eric Dumazet84b114b2017-05-05 06:56:54 -0700100 return secure_tcp_seq(ip_hdr(skb)->daddr,
101 ip_hdr(skb)->saddr,
102 tcp_hdr(skb)->dest,
103 tcp_hdr(skb)->source);
104}
105
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700106static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700107{
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700108 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109}
110
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700113 const struct inet_timewait_sock *tw = inet_twsk(sktw);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800114 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
115 struct tcp_sock *tp = tcp_sk(sk);
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700116 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
117
118 if (reuse == 2) {
119 /* Still does not detect *everything* that goes through
120 * lo, since we require a loopback src or dst address
121 * or direct binding to 'lo' interface.
122 */
123 bool loopback = false;
124 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
125 loopback = true;
126#if IS_ENABLED(CONFIG_IPV6)
127 if (tw->tw_family == AF_INET6) {
128 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
129 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
130 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
131 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
132 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
133 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
134 loopback = true;
135 } else
136#endif
137 {
138 if (ipv4_is_loopback(tw->tw_daddr) ||
139 ipv4_is_loopback(tw->tw_rcv_saddr))
140 loopback = true;
141 }
142 if (!loopback)
143 reuse = 0;
144 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800145
146 /* With PAWS, it is safe from the viewpoint
147 of data integrity. Even without PAWS it is safe provided sequence
148 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
149
150 Actually, the idea is close to VJ's one, only timestamp cache is
151 held not per host, but per port pair and TW bucket is used as state
152 holder.
153
154 If TW bucket has been already destroyed we fall back to VJ's scheme
155 and use initial timestamp retrieved from peer table.
156 */
157 if (tcptw->tw_ts_recent_stamp &&
Arnd Bergmanncca9bab2018-07-11 12:16:12 +0200158 (!twp || (reuse && time_after32(ktime_get_seconds(),
159 tcptw->tw_ts_recent_stamp)))) {
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400160 /* In case of repair and re-using TIME-WAIT sockets we still
161 * want to be sure that it is safe as above but honor the
162 * sequence numbers and time stamps set as part of the repair
163 * process.
164 *
165 * Without this check re-using a TIME-WAIT socket with TCP
166 * repair would accumulate a -1 on the repair assigned
167 * sequence number. The first time it is reused the sequence
168 * is -1, the second time -2, etc. This fixes that issue
169 * without appearing to create any others.
170 */
171 if (likely(!tp->repair)) {
172 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
173 if (tp->write_seq == 0)
174 tp->write_seq = 1;
175 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
176 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
177 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800178 sock_hold(sktw);
179 return 1;
180 }
181
182 return 0;
183}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800184EXPORT_SYMBOL_GPL(tcp_twsk_unique);
185
Andrey Ignatovd74bad42018-03-30 15:08:05 -0700186static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
187 int addr_len)
188{
189 /* This check is replicated from tcp_v4_connect() and intended to
190 * prevent BPF program called below from accessing bytes that are out
191 * of the bound specified by user in addr_len.
192 */
193 if (addr_len < sizeof(struct sockaddr_in))
194 return -EINVAL;
195
196 sock_owned_by_me(sk);
197
198 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
199}
200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201/* This will initiate an outgoing connection. */
202int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
203{
David S. Miller2d7192d2011-04-26 13:28:44 -0700204 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 struct inet_sock *inet = inet_sk(sk);
206 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800207 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700208 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700209 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700210 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000212 struct ip_options_rcu *inet_opt;
Haishuang Yan1946e672016-12-28 17:52:32 +0800213 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214
215 if (addr_len < sizeof(struct sockaddr_in))
216 return -EINVAL;
217
218 if (usin->sin_family != AF_INET)
219 return -EAFNOSUPPORT;
220
221 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000222 inet_opt = rcu_dereference_protected(inet->inet_opt,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200223 lockdep_sock_is_held(sk));
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000224 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 if (!daddr)
226 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000227 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 }
229
David S. Millerdca8b082011-02-24 13:38:12 -0800230 orig_sport = inet->inet_sport;
231 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700232 fl4 = &inet->cork.fl.u.ip4;
233 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800234 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
235 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200236 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800237 if (IS_ERR(rt)) {
238 err = PTR_ERR(rt);
239 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800240 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800241 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700242 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243
244 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
245 ip_rt_put(rt);
246 return -ENETUNREACH;
247 }
248
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000249 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700250 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000252 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700253 inet->inet_saddr = fl4->saddr;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700254 sk_rcv_saddr_set(sk, inet->inet_saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000256 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 /* Reset inherited state */
258 tp->rx_opt.ts_recent = 0;
259 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000260 if (likely(!tp->repair))
261 tp->write_seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000264 inet->inet_dport = usin->sin_port;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700265 sk_daddr_set(sk, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800267 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000268 if (inet_opt)
269 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000271 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 /* Socket identity is still unknown (sport may be zero).
274 * However we set state to SYN-SENT and not releasing socket
275 * lock select source port, enter ourselves into the hash tables and
276 * complete initialization after this.
277 */
278 tcp_set_state(sk, TCP_SYN_SENT);
Haishuang Yan1946e672016-12-28 17:52:32 +0800279 err = inet_hash_connect(tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 if (err)
281 goto failure;
282
Tom Herbert877d1f62015-07-28 16:02:05 -0700283 sk_set_txhash(sk);
Sathya Perla9e7ceb02014-10-22 21:42:01 +0530284
David S. Millerda905bd2011-05-06 16:11:19 -0700285 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800286 inet->inet_sport, inet->inet_dport, sk);
287 if (IS_ERR(rt)) {
288 err = PTR_ERR(rt);
289 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800291 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700293 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700294 sk_setup_caps(sk, &rt->dst);
Wei Wang19f6d3f2017-01-23 10:59:22 -0800295 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300297 if (likely(!tp->repair)) {
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300298 if (!tp->write_seq)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700299 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
300 inet->inet_daddr,
301 inet->inet_sport,
302 usin->sin_port);
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700303 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
304 inet->inet_saddr,
Eric Dumazet84b114b2017-05-05 06:56:54 -0700305 inet->inet_daddr);
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300306 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000308 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Wei Wang19f6d3f2017-01-23 10:59:22 -0800310 if (tcp_fastopen_defer_connect(sk, &err))
311 return err;
312 if (err)
313 goto failure;
314
Andrey Vagin2b916472012-11-22 01:13:58 +0000315 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000316
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 if (err)
318 goto failure;
319
320 return 0;
321
322failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200323 /*
324 * This unhashes the socket and releases the local port,
325 * if necessary.
326 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 tcp_set_state(sk, TCP_CLOSE);
328 ip_rt_put(rt);
329 sk->sk_route_caps = 0;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000330 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 return err;
332}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000333EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200336 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
337 * It can be called through tcp_release_cb() if socket was owned by user
338 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400340void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800343 struct dst_entry *dst;
344 u32 mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800346 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
347 return;
348 mtu = tcp_sk(sk)->mtu_info;
David S. Miller80d0a692012-07-16 03:28:06 -0700349 dst = inet_csk_update_pmtu(sk, mtu);
350 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 return;
352
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 /* Something is about to be wrong... Remember soft error
354 * for the case, if this connection will not able to recover.
355 */
356 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
357 sk->sk_err_soft = EMSGSIZE;
358
359 mtu = dst_mtu(dst);
360
361 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100362 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800363 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 tcp_sync_mss(sk, mtu);
365
366 /* Resend the TCP packet because it's
367 * clear that the old packet has been
368 * dropped. This is the new "fast" path mtu
369 * discovery.
370 */
371 tcp_simple_retransmit(sk);
372 } /* else let the usual retransmit timer handle it */
373}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400374EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
David S. Miller55be7a92012-07-11 21:27:49 -0700376static void do_redirect(struct sk_buff *skb, struct sock *sk)
377{
378 struct dst_entry *dst = __sk_dst_check(sk, 0);
379
David S. Miller1ed5c482012-07-12 00:41:25 -0700380 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700381 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700382}
383
Eric Dumazet26e37362015-03-22 10:22:22 -0700384
385/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
Eric Dumazet9cf74902016-02-02 19:31:12 -0800386void tcp_req_err(struct sock *sk, u32 seq, bool abort)
Eric Dumazet26e37362015-03-22 10:22:22 -0700387{
388 struct request_sock *req = inet_reqsk(sk);
389 struct net *net = sock_net(sk);
390
391 /* ICMPs are not backlogged, hence we cannot get
392 * an established socket here.
393 */
Eric Dumazet26e37362015-03-22 10:22:22 -0700394 if (seq != tcp_rsk(req)->snt_isn) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700395 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Eric Dumazet9cf74902016-02-02 19:31:12 -0800396 } else if (abort) {
Eric Dumazet26e37362015-03-22 10:22:22 -0700397 /*
398 * Still in SYN_RECV, just remove it silently.
399 * There is no good way to pass the error to the newly
400 * created socket, and POSIX does not want network
401 * errors returned from accept().
402 */
Fan Duc6973662015-03-23 15:00:41 -0700403 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
Eric Dumazet9caad862016-04-01 08:52:20 -0700404 tcp_listendrop(req->rsk_listener);
Eric Dumazet26e37362015-03-22 10:22:22 -0700405 }
Eric Dumazetef84d8c2015-10-14 11:16:26 -0700406 reqsk_put(req);
Eric Dumazet26e37362015-03-22 10:22:22 -0700407}
408EXPORT_SYMBOL(tcp_req_err);
409
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410/*
411 * This routine is called by the ICMP module when it gets some
412 * sort of error condition. If err < 0 then the socket should
413 * be closed and the error returned to the user. If err > 0
414 * it's just the icmp type << 8 | icmp code. After adjustment
415 * header points to the first 8 bytes of the tcp header. We need
416 * to find the appropriate port.
417 *
418 * The locking strategy used here is very "optimistic". When
419 * someone else accesses the socket the ICMP is just dropped
420 * and for some paths there is no check at all.
421 * A more general error queue to queue errors for later handling
422 * is probably better.
423 *
424 */
425
Stefano Brivio32bbd872018-11-08 12:19:21 +0100426int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000428 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000429 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000430 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 struct tcp_sock *tp;
432 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000433 const int type = icmp_hdr(icmp_skb)->type;
434 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000436 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700437 struct request_sock *fastopen;
Eric Dumazet9a568de2017-05-16 14:00:14 -0700438 u32 seq, snd_una;
439 s32 remaining;
440 u32 delta_us;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000442 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443
Eric Dumazet26e37362015-03-22 10:22:22 -0700444 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
445 th->dest, iph->saddr, ntohs(th->source),
David Ahern3fa6f612017-08-07 08:44:17 -0700446 inet_iif(icmp_skb), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 if (!sk) {
Eric Dumazet5d3848b2016-04-27 16:44:29 -0700448 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100449 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 }
451 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700452 inet_twsk_put(inet_twsk(sk));
Stefano Brivio32bbd872018-11-08 12:19:21 +0100453 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 }
Eric Dumazet26e37362015-03-22 10:22:22 -0700455 seq = ntohl(th->seq);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100456 if (sk->sk_state == TCP_NEW_SYN_RECV) {
457 tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
458 type == ICMP_TIME_EXCEEDED ||
459 (type == ICMP_DEST_UNREACH &&
460 (code == ICMP_NET_UNREACH ||
461 code == ICMP_HOST_UNREACH)));
462 return 0;
463 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464
465 bh_lock_sock(sk);
466 /* If too many ICMPs get dropped on busy
467 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200468 * We do take care of PMTU discovery (RFC1191) special case :
469 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000471 if (sock_owned_by_user(sk)) {
472 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700473 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
Eric Dumazetb74aa932013-01-19 16:10:37 +0000474 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (sk->sk_state == TCP_CLOSE)
476 goto out;
477
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000478 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700479 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000480 goto out;
481 }
482
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000483 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 tp = tcp_sk(sk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700485 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
486 fastopen = tp->fastopen_rsk;
487 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700489 !between(seq, snd_una, tp->snd_nxt)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700490 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 goto out;
492 }
493
494 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700495 case ICMP_REDIRECT:
Jon Maxwell45caeaa2017-03-10 16:40:33 +1100496 if (!sock_owned_by_user(sk))
497 do_redirect(icmp_skb, sk);
David S. Miller55be7a92012-07-11 21:27:49 -0700498 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 case ICMP_SOURCE_QUENCH:
500 /* Just silently ignore these. */
501 goto out;
502 case ICMP_PARAMETERPROB:
503 err = EPROTO;
504 break;
505 case ICMP_DEST_UNREACH:
506 if (code > NR_ICMP_UNREACH)
507 goto out;
508
509 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000510 /* We are not interested in TCP_LISTEN and open_requests
511 * (SYN-ACKs send out by Linux are always <576bytes so
512 * they should go through unfragmented).
513 */
514 if (sk->sk_state == TCP_LISTEN)
515 goto out;
516
Eric Dumazet563d34d2012-07-23 09:48:52 +0200517 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000518 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200519 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000520 } else {
Eric Dumazet7aa54702016-12-03 11:14:57 -0800521 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
Eric Dumazet144d56e2012-08-20 00:22:46 +0000522 sock_hold(sk);
523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 goto out;
525 }
526
527 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000528 /* check if icmp_skb allows revert of backoff
529 * (see draft-zimmermann-tcp-lcd) */
530 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
531 break;
532 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700533 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000534 break;
535
David S. Miller8f49c272010-11-12 13:35:00 -0800536 if (sock_owned_by_user(sk))
537 break;
538
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000539 icsk->icsk_backoff--;
Eric Dumazetfcdd1cf2014-09-22 13:19:44 -0700540 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
541 TCP_TIMEOUT_INIT;
542 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000543
Eric Dumazet75c119a2017-10-05 22:21:27 -0700544 skb = tcp_rtx_queue_head(sk);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000545
Eric Dumazet9a568de2017-05-16 14:00:14 -0700546 tcp_mstamp_refresh(tp);
Eric Dumazet2fd66ff2018-09-21 08:51:47 -0700547 delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700548 remaining = icsk->icsk_rto -
Eric Dumazet9a568de2017-05-16 14:00:14 -0700549 usecs_to_jiffies(delta_us);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000550
Eric Dumazet9a568de2017-05-16 14:00:14 -0700551 if (remaining > 0) {
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000552 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
553 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000554 } else {
555 /* RTO revert clocked out retransmission.
556 * Will retransmit now */
557 tcp_retransmit_timer(sk);
558 }
559
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 break;
561 case ICMP_TIME_EXCEEDED:
562 err = EHOSTUNREACH;
563 break;
564 default:
565 goto out;
566 }
567
568 switch (sk->sk_state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700570 case TCP_SYN_RECV:
571 /* Only in fast or simultaneous open. If a fast open socket is
572 * is already accepted it is treated as a connected one below.
573 */
Ian Morris51456b22015-04-03 09:17:26 +0100574 if (fastopen && !fastopen->sk)
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700575 break;
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 sk->sk_err = err;
579
580 sk->sk_error_report(sk);
581
582 tcp_done(sk);
583 } else {
584 sk->sk_err_soft = err;
585 }
586 goto out;
587 }
588
589 /* If we've already connected we will keep trying
590 * until we time out, or the user gives up.
591 *
592 * rfc1122 4.2.3.9 allows to consider as hard errors
593 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
594 * but it is obsoleted by pmtu discovery).
595 *
596 * Note, that in modern internet, where routing is unreliable
597 * and in each dark corner broken firewalls sit, sending random
598 * errors ordered by their masters even this two messages finally lose
599 * their original sense (even Linux sends invalid PORT_UNREACHs)
600 *
601 * Now we are in compliance with RFCs.
602 * --ANK (980905)
603 */
604
605 inet = inet_sk(sk);
606 if (!sock_owned_by_user(sk) && inet->recverr) {
607 sk->sk_err = err;
608 sk->sk_error_report(sk);
609 } else { /* Only an error on timeout */
610 sk->sk_err_soft = err;
611 }
612
613out:
614 bh_unlock_sock(sk);
615 sock_put(sk);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100616 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617}
618
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000619void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700621 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
Eric Dumazet98be9b12018-02-19 11:56:52 -0800623 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
624 skb->csum_start = skb_transport_header(skb) - skb->head;
625 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626}
627
Herbert Xu419f9f82010-04-11 02:15:53 +0000628/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000629void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000630{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400631 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000632
633 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
634}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000635EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000636
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637/*
638 * This routine will send an RST to the other tcp.
639 *
640 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
641 * for reset.
642 * Answer: if a packet caused RST, it is not for a socket
643 * existing in our system, if it is matched to a socket,
644 * it is just duplicate segment or bug in other side's TCP.
645 * So that we build reply only basing on parameters
646 * arrived with segment.
647 * Exception: precedence violation. We do not implement it in any case.
648 */
649
Eric Dumazeta00e7442015-09-29 07:42:39 -0700650static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400652 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800653 struct {
654 struct tcphdr th;
655#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800656 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800657#endif
658 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800660#ifdef CONFIG_TCP_MD5SIG
Florian Westphale46787f2015-12-21 21:29:25 +0100661 struct tcp_md5sig_key *key = NULL;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000662 const __u8 *hash_location = NULL;
663 unsigned char newhash[16];
664 int genhash;
665 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800666#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700667 struct net *net;
Jon Maxwell00483692018-05-10 16:53:51 +1000668 struct sock *ctl_sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 /* Never send a reset in response to a reset. */
671 if (th->rst)
672 return;
673
Eric Dumazetc3658e82014-11-25 07:40:04 -0800674 /* If sk not NULL, it means we did a successful lookup and incoming
675 * route had to be correct. prequeue might have dropped our dst.
676 */
677 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 return;
679
680 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800681 memset(&rep, 0, sizeof(rep));
682 rep.th.dest = th->source;
683 rep.th.source = th->dest;
684 rep.th.doff = sizeof(struct tcphdr) / 4;
685 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800688 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800690 rep.th.ack = 1;
691 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
692 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 }
694
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200695 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800696 arg.iov[0].iov_base = (unsigned char *)&rep;
697 arg.iov[0].iov_len = sizeof(rep.th);
698
Eric Dumazet0f85fea2014-12-09 09:56:08 -0800699 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800700#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700701 rcu_read_lock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000702 hash_location = tcp_parse_md5sig_option(th);
Florian Westphal271c3b92015-12-21 21:29:26 +0100703 if (sk && sk_fullsock(sk)) {
Florian Westphale46787f2015-12-21 21:29:25 +0100704 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
705 &ip_hdr(skb)->saddr, AF_INET);
706 } else if (hash_location) {
Shawn Lu658ddaa2012-01-31 22:35:48 +0000707 /*
708 * active side is lost. Try to find listening socket through
709 * source port, and then find md5 key through listening socket.
710 * we are not loose security here:
711 * Incoming packet is checked with md5 hash with finding key,
712 * no RST generated if md5 hash doesn't match.
713 */
Craig Galleka5836362016-02-10 11:50:38 -0500714 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
715 ip_hdr(skb)->saddr,
Tom Herbertda5e3632013-01-22 09:50:24 +0000716 th->source, ip_hdr(skb)->daddr,
David Ahern3fa6f612017-08-07 08:44:17 -0700717 ntohs(th->source), inet_iif(skb),
718 tcp_v4_sdif(skb));
Shawn Lu658ddaa2012-01-31 22:35:48 +0000719 /* don't send rst if it can't find key */
720 if (!sk1)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700721 goto out;
722
Shawn Lu658ddaa2012-01-31 22:35:48 +0000723 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
724 &ip_hdr(skb)->saddr, AF_INET);
725 if (!key)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700726 goto out;
727
Shawn Lu658ddaa2012-01-31 22:35:48 +0000728
Eric Dumazet39f8e582015-03-24 15:58:55 -0700729 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000730 if (genhash || memcmp(hash_location, newhash, 16) != 0)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700731 goto out;
732
Shawn Lu658ddaa2012-01-31 22:35:48 +0000733 }
734
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800735 if (key) {
736 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
737 (TCPOPT_NOP << 16) |
738 (TCPOPT_MD5SIG << 8) |
739 TCPOLEN_MD5SIG);
740 /* Update length and the length the header thinks exists */
741 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
742 rep.th.doff = arg.iov[0].iov_len / 4;
743
Adam Langley49a72df2008-07-19 00:01:42 -0700744 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700745 key, ip_hdr(skb)->saddr,
746 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800747 }
748#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700749 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
750 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700751 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
Florian Westphal271c3b92015-12-21 21:29:26 +0100753 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
754
Shawn Lue2446ea2012-02-04 12:38:09 +0000755 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000756 * routing might fail in this case. No choice here, if we choose to force
757 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000758 */
Song Liuc24b14c42017-10-23 09:20:24 -0700759 if (sk) {
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000760 arg.bound_dev_if = sk->sk_bound_dev_if;
Song Liu5c487bb2018-02-06 20:50:23 -0800761 if (sk_fullsock(sk))
762 trace_tcp_send_reset(sk, skb);
Song Liuc24b14c42017-10-23 09:20:24 -0700763 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Florian Westphal271c3b92015-12-21 21:29:26 +0100765 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
766 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
767
Eric Dumazet66b13d92011-10-24 03:06:21 -0400768 arg.tos = ip_hdr(skb)->tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900769 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700770 local_bh_disable();
Jon Maxwell00483692018-05-10 16:53:51 +1000771 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
772 if (sk)
773 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
774 inet_twsk(sk)->tw_mark : sk->sk_mark;
775 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800776 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700777 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
778 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Jon Maxwell00483692018-05-10 16:53:51 +1000780 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700781 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
782 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700783 local_bh_enable();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000784
785#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700786out:
787 rcu_read_unlock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000788#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
791/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
792 outside socket context is ugly, certainly. What can I do?
793 */
794
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900795static void tcp_v4_send_ack(const struct sock *sk,
Eric Dumazete62a1232016-01-21 08:02:54 -0800796 struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000797 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700798 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400799 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400801 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 struct {
803 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800804 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800805#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800806 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800807#endif
808 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 } rep;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900810 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 struct ip_reply_arg arg;
Jon Maxwell00483692018-05-10 16:53:51 +1000812 struct sock *ctl_sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813
814 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200815 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817 arg.iov[0].iov_base = (unsigned char *)&rep;
818 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000819 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800820 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
821 (TCPOPT_TIMESTAMP << 8) |
822 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000823 rep.opt[1] = htonl(tsval);
824 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800825 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 }
827
828 /* Swap the send and the receive. */
829 rep.th.dest = th->source;
830 rep.th.source = th->dest;
831 rep.th.doff = arg.iov[0].iov_len / 4;
832 rep.th.seq = htonl(seq);
833 rep.th.ack_seq = htonl(ack);
834 rep.th.ack = 1;
835 rep.th.window = htons(win);
836
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800837#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800838 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000839 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800840
841 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
842 (TCPOPT_NOP << 16) |
843 (TCPOPT_MD5SIG << 8) |
844 TCPOLEN_MD5SIG);
845 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
846 rep.th.doff = arg.iov[0].iov_len/4;
847
Adam Langley49a72df2008-07-19 00:01:42 -0700848 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700849 key, ip_hdr(skb)->saddr,
850 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800851 }
852#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700853 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700854 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
855 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 arg.iov[0].iov_len, IPPROTO_TCP, 0);
857 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900858 if (oif)
859 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400860 arg.tos = tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900861 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700862 local_bh_disable();
Jon Maxwell00483692018-05-10 16:53:51 +1000863 ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk);
864 if (sk)
865 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
866 inet_twsk(sk)->tw_mark : sk->sk_mark;
867 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800868 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700869 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
870 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
Jon Maxwell00483692018-05-10 16:53:51 +1000872 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700873 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700874 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875}
876
877static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
878{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700879 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800880 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900882 tcp_v4_send_ack(sk, skb,
Eric Dumazete62a1232016-01-21 08:02:54 -0800883 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200884 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700885 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900886 tcptw->tw_ts_recent,
887 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700888 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400889 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
890 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900891 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700893 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894}
895
Eric Dumazeta00e7442015-09-29 07:42:39 -0700896static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200897 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898{
Jerry Chu168a8f52012-08-31 12:29:13 +0000899 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
900 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
901 */
Eric Dumazete62a1232016-01-21 08:02:54 -0800902 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
903 tcp_sk(sk)->snd_nxt;
904
Eric Dumazet20a2b492016-08-22 11:31:10 -0700905 /* RFC 7323 2.3
906 * The window field (SEG.WND) of every outgoing segment, with the
907 * exception of <SYN> segments, MUST be right-shifted by
908 * Rcv.Wind.Shift bits:
909 */
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900910 tcp_v4_send_ack(sk, skb, seq,
Eric Dumazet20a2b492016-08-22 11:31:10 -0700911 tcp_rsk(req)->rcv_nxt,
912 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700913 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900914 req->ts_recent,
915 0,
Christoph Paasch30791ac2017-12-11 00:05:46 -0800916 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000917 AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400918 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
919 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920}
921
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800923 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700924 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 * socket.
926 */
Eric Dumazet0f935db2015-09-25 07:39:21 -0700927static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300928 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800929 struct request_sock *req,
Eric Dumazetca6fb062015-10-02 11:43:35 -0700930 struct tcp_fastopen_cookie *foc,
Eric Dumazetb3d05142016-04-13 22:05:39 -0700931 enum tcp_synack_type synack_type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700933 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400934 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800936 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700939 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800940 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
Eric Dumazetb3d05142016-04-13 22:05:39 -0700942 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
944 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700945 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700947 rcu_read_lock();
Eric Dumazet634fb9792013-10-09 15:21:29 -0700948 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
949 ireq->ir_rmt_addr,
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700950 rcu_dereference(ireq->ireq_opt));
951 rcu_read_unlock();
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200952 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 }
954
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 return err;
956}
957
958/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700959 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700961static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962{
Eric Dumazetc92e8c02017-10-20 09:04:13 -0700963 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964}
965
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800966#ifdef CONFIG_TCP_MD5SIG
967/*
968 * RFC2385 MD5 checksumming requires a mapping of
969 * IP address->MD5 Key.
970 * We need to maintain these in the sk structure.
971 */
972
Eric Dumazet6015c712018-11-27 15:03:21 -0800973struct static_key tcp_md5_needed __read_mostly;
974EXPORT_SYMBOL(tcp_md5_needed);
975
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800976/* Find the Key structure for an address. */
Eric Dumazet6015c712018-11-27 15:03:21 -0800977struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
978 const union tcp_md5_addr *addr,
979 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800980{
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700981 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000982 struct tcp_md5sig_key *key;
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700983 const struct tcp_md5sig_info *md5sig;
Ivan Delalande67973182017-06-15 18:07:06 -0700984 __be32 mask;
985 struct tcp_md5sig_key *best_match = NULL;
986 bool match;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800987
Eric Dumazeta8afca02012-01-31 18:45:40 +0000988 /* caller either holds rcu_read_lock() or socket lock */
989 md5sig = rcu_dereference_check(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200990 lockdep_sock_is_held(sk));
Eric Dumazeta8afca02012-01-31 18:45:40 +0000991 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800992 return NULL;
Arnd Bergmann083a0322017-06-20 22:11:21 +0200993
Sasha Levinb67bfe02013-02-27 17:06:00 -0800994 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000995 if (key->family != family)
996 continue;
Ivan Delalande67973182017-06-15 18:07:06 -0700997
998 if (family == AF_INET) {
999 mask = inet_make_mask(key->prefixlen);
1000 match = (key->addr.a4.s_addr & mask) ==
1001 (addr->a4.s_addr & mask);
1002#if IS_ENABLED(CONFIG_IPV6)
1003 } else if (family == AF_INET6) {
1004 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1005 key->prefixlen);
1006#endif
1007 } else {
1008 match = false;
1009 }
1010
1011 if (match && (!best_match ||
1012 key->prefixlen > best_match->prefixlen))
1013 best_match = key;
1014 }
1015 return best_match;
1016}
Eric Dumazet6015c712018-11-27 15:03:21 -08001017EXPORT_SYMBOL(__tcp_md5_do_lookup);
Ivan Delalande67973182017-06-15 18:07:06 -07001018
Wu Fengguange8f37d52017-07-06 07:58:53 +08001019static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1020 const union tcp_md5_addr *addr,
1021 int family, u8 prefixlen)
Ivan Delalande67973182017-06-15 18:07:06 -07001022{
1023 const struct tcp_sock *tp = tcp_sk(sk);
1024 struct tcp_md5sig_key *key;
1025 unsigned int size = sizeof(struct in_addr);
1026 const struct tcp_md5sig_info *md5sig;
1027
1028 /* caller either holds rcu_read_lock() or socket lock */
1029 md5sig = rcu_dereference_check(tp->md5sig_info,
1030 lockdep_sock_is_held(sk));
1031 if (!md5sig)
1032 return NULL;
1033#if IS_ENABLED(CONFIG_IPV6)
1034 if (family == AF_INET6)
1035 size = sizeof(struct in6_addr);
1036#endif
1037 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1038 if (key->family != family)
1039 continue;
1040 if (!memcmp(&key->addr, addr, size) &&
1041 key->prefixlen == prefixlen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001042 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001043 }
1044 return NULL;
1045}
1046
Eric Dumazetb83e3de2015-09-25 07:39:15 -07001047struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001048 const struct sock *addr_sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001049{
Eric Dumazetb52e6922015-04-09 14:36:42 -07001050 const union tcp_md5_addr *addr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001051
Eric Dumazetb52e6922015-04-09 14:36:42 -07001052 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001053 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001054}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001055EXPORT_SYMBOL(tcp_v4_md5_lookup);
1056
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001057/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001058int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
Ivan Delalande67973182017-06-15 18:07:06 -07001059 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1060 gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001061{
1062 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001063 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001064 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001065 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001066
Ivan Delalande67973182017-06-15 18:07:06 -07001067 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001068 if (key) {
1069 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001070 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001071 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001072 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001073 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001074
Eric Dumazeta8afca02012-01-31 18:45:40 +00001075 md5sig = rcu_dereference_protected(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +02001076 lockdep_sock_is_held(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001077 if (!md5sig) {
1078 md5sig = kmalloc(sizeof(*md5sig), gfp);
1079 if (!md5sig)
1080 return -ENOMEM;
1081
1082 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1083 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001084 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001085 }
1086
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001087 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001088 if (!key)
1089 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +00001090 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001091 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001092 return -ENOMEM;
1093 }
1094
1095 memcpy(key->key, newkey, newkeylen);
1096 key->keylen = newkeylen;
1097 key->family = family;
Ivan Delalande67973182017-06-15 18:07:06 -07001098 key->prefixlen = prefixlen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001099 memcpy(&key->addr, addr,
1100 (family == AF_INET6) ? sizeof(struct in6_addr) :
1101 sizeof(struct in_addr));
1102 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001103 return 0;
1104}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001105EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001106
Ivan Delalande67973182017-06-15 18:07:06 -07001107int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1108 u8 prefixlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001109{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001110 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001111
Ivan Delalande67973182017-06-15 18:07:06 -07001112 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001113 if (!key)
1114 return -ENOENT;
1115 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001116 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001117 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001118 return 0;
1119}
1120EXPORT_SYMBOL(tcp_md5_do_del);
1121
stephen hemmingere0683e702012-10-26 14:31:40 +00001122static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001123{
1124 struct tcp_sock *tp = tcp_sk(sk);
1125 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001126 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001127 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001128
Eric Dumazeta8afca02012-01-31 18:45:40 +00001129 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1130
Sasha Levinb67bfe02013-02-27 17:06:00 -08001131 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001132 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001133 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001134 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001135 }
1136}
1137
Ivan Delalande8917a772017-06-15 18:07:07 -07001138static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1139 char __user *optval, int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001140{
1141 struct tcp_md5sig cmd;
1142 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
Ivan Delalande8917a772017-06-15 18:07:07 -07001143 u8 prefixlen = 32;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001144
1145 if (optlen < sizeof(cmd))
1146 return -EINVAL;
1147
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001148 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001149 return -EFAULT;
1150
1151 if (sin->sin_family != AF_INET)
1152 return -EINVAL;
1153
Ivan Delalande8917a772017-06-15 18:07:07 -07001154 if (optname == TCP_MD5SIG_EXT &&
1155 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1156 prefixlen = cmd.tcpm_prefixlen;
1157 if (prefixlen > 32)
1158 return -EINVAL;
1159 }
1160
Dmitry Popov64a124e2014-08-03 22:45:19 +04001161 if (!cmd.tcpm_keylen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001162 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001163 AF_INET, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001164
1165 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1166 return -EINVAL;
1167
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001168 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001169 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001170 GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001171}
1172
Eric Dumazet19689e32016-06-27 18:51:53 +02001173static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1174 __be32 daddr, __be32 saddr,
1175 const struct tcphdr *th, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001176{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001177 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001178 struct scatterlist sg;
Eric Dumazet19689e32016-06-27 18:51:53 +02001179 struct tcphdr *_th;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001180
Eric Dumazet19689e32016-06-27 18:51:53 +02001181 bp = hp->scratch;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001182 bp->saddr = saddr;
1183 bp->daddr = daddr;
1184 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001185 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001186 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001187
Eric Dumazet19689e32016-06-27 18:51:53 +02001188 _th = (struct tcphdr *)(bp + 1);
1189 memcpy(_th, th, sizeof(*th));
1190 _th->check = 0;
1191
1192 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1193 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1194 sizeof(*bp) + sizeof(*th));
Herbert Xucf80e0e2016-01-24 21:20:23 +08001195 return crypto_ahash_update(hp->md5_req);
Adam Langley49a72df2008-07-19 00:01:42 -07001196}
1197
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001198static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001199 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001200{
1201 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001202 struct ahash_request *req;
Adam Langley49a72df2008-07-19 00:01:42 -07001203
1204 hp = tcp_get_md5sig_pool();
1205 if (!hp)
1206 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001207 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001208
Herbert Xucf80e0e2016-01-24 21:20:23 +08001209 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001210 goto clear_hash;
Eric Dumazet19689e32016-06-27 18:51:53 +02001211 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
Adam Langley49a72df2008-07-19 00:01:42 -07001212 goto clear_hash;
1213 if (tcp_md5_hash_key(hp, key))
1214 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001215 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1216 if (crypto_ahash_final(req))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001217 goto clear_hash;
1218
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001219 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001220 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001221
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001222clear_hash:
1223 tcp_put_md5sig_pool();
1224clear_hash_noput:
1225 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001226 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001227}
1228
Eric Dumazet39f8e582015-03-24 15:58:55 -07001229int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1230 const struct sock *sk,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001231 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001232{
Adam Langley49a72df2008-07-19 00:01:42 -07001233 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001234 struct ahash_request *req;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001235 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236 __be32 saddr, daddr;
1237
Eric Dumazet39f8e582015-03-24 15:58:55 -07001238 if (sk) { /* valid for establish/request sockets */
1239 saddr = sk->sk_rcv_saddr;
1240 daddr = sk->sk_daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001241 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001242 const struct iphdr *iph = ip_hdr(skb);
1243 saddr = iph->saddr;
1244 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001245 }
Adam Langley49a72df2008-07-19 00:01:42 -07001246
1247 hp = tcp_get_md5sig_pool();
1248 if (!hp)
1249 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001250 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001251
Herbert Xucf80e0e2016-01-24 21:20:23 +08001252 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001253 goto clear_hash;
1254
Eric Dumazet19689e32016-06-27 18:51:53 +02001255 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
Adam Langley49a72df2008-07-19 00:01:42 -07001256 goto clear_hash;
1257 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1258 goto clear_hash;
1259 if (tcp_md5_hash_key(hp, key))
1260 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001261 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1262 if (crypto_ahash_final(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001263 goto clear_hash;
1264
1265 tcp_put_md5sig_pool();
1266 return 0;
1267
1268clear_hash:
1269 tcp_put_md5sig_pool();
1270clear_hash_noput:
1271 memset(md5_hash, 0, 16);
1272 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001273}
Adam Langley49a72df2008-07-19 00:01:42 -07001274EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001275
Eric Dumazetba8e2752015-10-02 11:43:28 -07001276#endif
1277
Eric Dumazetff74e232015-03-24 15:58:54 -07001278/* Called with rcu_read_lock() */
Eric Dumazetba8e2752015-10-02 11:43:28 -07001279static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
Eric Dumazetff74e232015-03-24 15:58:54 -07001280 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001281{
Eric Dumazetba8e2752015-10-02 11:43:28 -07001282#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001283 /*
1284 * This gets called for each TCP segment that arrives
1285 * so we want to be efficient.
1286 * We have 3 drop cases:
1287 * o No MD5 hash and one expected.
1288 * o MD5 hash and we're not expecting one.
1289 * o MD5 hash and its wrong.
1290 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001291 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001292 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001293 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001294 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001295 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001296 unsigned char newhash[16];
1297
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001298 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1299 AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001300 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001301
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001302 /* We've parsed the options - do we have a hash? */
1303 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001304 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001305
1306 if (hash_expected && !hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001307 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001308 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001309 }
1310
1311 if (!hash_expected && hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001312 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001313 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001314 }
1315
1316 /* Okay, so this is hash_expected and hash_location -
1317 * so we need to calculate the checksum.
1318 */
Adam Langley49a72df2008-07-19 00:01:42 -07001319 genhash = tcp_v4_md5_hash_skb(newhash,
1320 hash_expected,
Eric Dumazet39f8e582015-03-24 15:58:55 -07001321 NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001322
1323 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Eric Dumazet72145a62016-08-24 09:01:23 -07001324 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
Joe Perchese87cc472012-05-13 21:56:26 +00001325 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1326 &iph->saddr, ntohs(th->source),
1327 &iph->daddr, ntohs(th->dest),
1328 genhash ? " tcp_v4_calc_md5_hash failed"
1329 : "");
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001330 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001331 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001332 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001333#endif
Eric Dumazetba8e2752015-10-02 11:43:28 -07001334 return false;
1335}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001336
Eric Dumazetb40cf182015-09-25 07:39:08 -07001337static void tcp_v4_init_req(struct request_sock *req,
1338 const struct sock *sk_listener,
Octavian Purdila16bea702014-06-25 17:09:53 +03001339 struct sk_buff *skb)
1340{
1341 struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001342 struct net *net = sock_net(sk_listener);
Octavian Purdila16bea702014-06-25 17:09:53 +03001343
Eric Dumazet08d2cc3b2015-03-18 14:05:38 -07001344 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1345 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001346 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
Octavian Purdila16bea702014-06-25 17:09:53 +03001347}
1348
Eric Dumazetf9646292015-09-29 07:42:50 -07001349static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1350 struct flowi *fl,
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001351 const struct request_sock *req)
Octavian Purdilad94e0412014-06-25 17:09:55 +03001352{
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001353 return inet_csk_route_req(sk, &fl->u.ip4, req);
Octavian Purdilad94e0412014-06-25 17:09:55 +03001354}
1355
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001356struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001358 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001359 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001360 .send_ack = tcp_v4_reqsk_send_ack,
1361 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001363 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364};
1365
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00001366static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001367 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001368#ifdef CONFIG_TCP_MD5SIG
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001369 .req_md5_lookup = tcp_v4_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001370 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001371#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001372 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001373#ifdef CONFIG_SYN_COOKIES
1374 .cookie_init_seq = cookie_v4_init_sequence,
1375#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001376 .route_req = tcp_v4_route_req,
Eric Dumazet84b114b2017-05-05 06:56:54 -07001377 .init_seq = tcp_v4_init_seq,
1378 .init_ts_off = tcp_v4_init_ts_off,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001379 .send_synack = tcp_v4_send_synack,
Octavian Purdila16bea702014-06-25 17:09:53 +03001380};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001381
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1383{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001385 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 goto drop;
1387
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001388 return tcp_conn_request(&tcp_request_sock_ops,
1389 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391drop:
Eric Dumazet9caad862016-04-01 08:52:20 -07001392 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return 0;
1394}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001395EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397
1398/*
1399 * The three way handshake has completed - we got a valid synack -
1400 * now create the new socket.
1401 */
Eric Dumazet0c271712015-09-29 07:42:48 -07001402struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001403 struct request_sock *req,
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001404 struct dst_entry *dst,
1405 struct request_sock *req_unhash,
1406 bool *own_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001408 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 struct inet_sock *newinet;
1410 struct tcp_sock *newtp;
1411 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001412#ifdef CONFIG_TCP_MD5SIG
1413 struct tcp_md5sig_key *key;
1414#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001415 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416
1417 if (sk_acceptq_is_full(sk))
1418 goto exit_overflow;
1419
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 newsk = tcp_create_openreq_child(sk, req, skb);
1421 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001422 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423
Herbert Xubcd76112006-06-30 13:36:35 -07001424 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001425 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
1427 newtp = tcp_sk(newsk);
1428 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001429 ireq = inet_rsk(req);
Eric Dumazetd1e559d2015-03-18 14:05:35 -07001430 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1431 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
David Ahern6dd9a142015-12-16 13:20:44 -08001432 newsk->sk_bound_dev_if = ireq->ir_iif;
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001433 newinet->inet_saddr = ireq->ir_loc_addr;
1434 inet_opt = rcu_dereference(ireq->ireq_opt);
1435 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001436 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001437 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001438 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001439 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001440 if (inet_opt)
1441 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001442 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001444 if (!dst) {
1445 dst = inet_csk_route_child_sock(sk, newsk, req);
1446 if (!dst)
1447 goto put_and_exit;
1448 } else {
1449 /* syncookie case : see end of cookie_v4_check() */
1450 }
David S. Miller0e734412011-05-08 15:28:03 -07001451 sk_setup_caps(newsk, dst);
1452
Daniel Borkmann81164412015-01-05 23:57:48 +01001453 tcp_ca_openreq_child(newsk, dst);
1454
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 tcp_sync_mss(newsk, dst_mtu(dst));
Eric Dumazet3541f9e2017-02-02 08:04:56 -08001456 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001457
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 tcp_initialize_rcv_mss(newsk);
1459
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001460#ifdef CONFIG_TCP_MD5SIG
1461 /* Copy over the MD5 key from the original socket */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001462 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1463 AF_INET);
Ian Morris00db4122015-04-03 09:17:27 +01001464 if (key) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001465 /*
1466 * We're using one, so create a matching key
1467 * on the newsk structure. If we fail to get
1468 * memory, then we end up not copying the key
1469 * across. Shucks.
1470 */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001471 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
Ivan Delalande67973182017-06-15 18:07:06 -07001472 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001473 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001474 }
1475#endif
1476
David S. Miller0e734412011-05-08 15:28:03 -07001477 if (__inet_inherit_port(sk, newsk) < 0)
1478 goto put_and_exit;
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001479 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001480 if (likely(*own_req)) {
Eric Dumazet49a496c2015-11-05 12:50:19 -08001481 tcp_move_syn(newtp, req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001482 ireq->ireq_opt = NULL;
1483 } else {
1484 newinet->inet_opt = NULL;
1485 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 return newsk;
1487
1488exit_overflow:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001489 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001490exit_nonewsk:
1491 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492exit:
Eric Dumazet9caad862016-04-01 08:52:20 -07001493 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001495put_and_exit:
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001496 newinet->inet_opt = NULL;
Christoph Paasche337e242012-12-14 04:07:58 +00001497 inet_csk_prepare_forced_close(newsk);
1498 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001499 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001501EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502
Eric Dumazet079096f2015-10-02 11:43:32 -07001503static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505#ifdef CONFIG_SYN_COOKIES
Eric Dumazet079096f2015-10-02 11:43:32 -07001506 const struct tcphdr *th = tcp_hdr(skb);
1507
Florian Westphalaf9b4732010-06-03 00:43:44 +00001508 if (!th->syn)
Cong Wang461b74c2014-10-15 14:33:22 -07001509 sk = cookie_v4_check(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510#endif
1511 return sk;
1512}
1513
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514/* The socket must have it's spinlock held when we get
Eric Dumazete994b2f2015-10-02 11:43:39 -07001515 * here, unless it is a TCP_LISTEN socket.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 *
1517 * We have a potential double-lock case here, so even when
1518 * doing backlog processing we use the BH locking scheme.
1519 * This is because we cannot sleep with the original spinlock
1520 * held.
1521 */
1522int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1523{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001524 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001525
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001527 struct dst_entry *dst = sk->sk_rx_dst;
1528
Tom Herbertbdeab992011-08-14 19:45:55 +00001529 sock_rps_save_rxhash(sk, skb);
Eric Dumazet3d973792014-11-11 05:54:27 -08001530 sk_mark_napi_id(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001531 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001532 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
Ian Morris51456b22015-04-03 09:17:26 +01001533 !dst->ops->check(dst, 0)) {
David S. Miller92101b32012-07-23 16:29:00 -07001534 dst_release(dst);
1535 sk->sk_rx_dst = NULL;
1536 }
1537 }
Yafang Shao3d97d882018-05-29 23:27:31 +08001538 tcp_rcv_established(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 return 0;
1540 }
1541
Eric Dumazet12e25e12015-06-03 23:49:21 -07001542 if (tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 goto csum_err;
1544
1545 if (sk->sk_state == TCP_LISTEN) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001546 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1547
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 if (!nsk)
1549 goto discard;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001551 if (tcp_child_process(sk, nsk, skb)) {
1552 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001554 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 return 0;
1556 }
Eric Dumazetca551582010-06-03 09:03:58 +00001557 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001558 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001559
Eric Dumazet72ab4a82015-09-29 07:42:41 -07001560 if (tcp_rcv_state_process(sk, skb)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001561 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001563 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 return 0;
1565
1566reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001567 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568discard:
1569 kfree_skb(skb);
1570 /* Be careful here. If this function gets more complicated and
1571 * gcc suffers from register pressure on the x86, sk (in %ebx)
1572 * might be destroyed here. This current version compiles correctly,
1573 * but you have been warned.
1574 */
1575 return 0;
1576
1577csum_err:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001578 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1579 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 goto discard;
1581}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001582EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583
Paolo Abeni74874492017-09-28 15:51:36 +02001584int tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001585{
David S. Miller41063e92012-06-19 21:22:05 -07001586 const struct iphdr *iph;
1587 const struct tcphdr *th;
1588 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001589
David S. Miller41063e92012-06-19 21:22:05 -07001590 if (skb->pkt_type != PACKET_HOST)
Paolo Abeni74874492017-09-28 15:51:36 +02001591 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001592
Eric Dumazet45f00f92012-10-22 21:42:47 +00001593 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
Paolo Abeni74874492017-09-28 15:51:36 +02001594 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001595
1596 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001597 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001598
1599 if (th->doff < sizeof(struct tcphdr) / 4)
Paolo Abeni74874492017-09-28 15:51:36 +02001600 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001601
Eric Dumazet45f00f92012-10-22 21:42:47 +00001602 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001603 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001604 iph->daddr, ntohs(th->dest),
David Ahern3fa6f612017-08-07 08:44:17 -07001605 skb->skb_iif, inet_sdif(skb));
David S. Miller41063e92012-06-19 21:22:05 -07001606 if (sk) {
1607 skb->sk = sk;
1608 skb->destructor = sock_edemux;
Eric Dumazetf7e4eb02015-03-15 21:12:13 -07001609 if (sk_fullsock(sk)) {
Michal Kubečekd0c294c2015-03-23 15:14:00 +01001610 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001611
David S. Miller41063e92012-06-19 21:22:05 -07001612 if (dst)
1613 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001614 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001615 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001616 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001617 }
1618 }
Paolo Abeni74874492017-09-28 15:51:36 +02001619 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001620}
1621
Eric Dumazetc9c33212016-08-27 07:37:54 -07001622bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1623{
1624 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
Eric Dumazet4f693b52018-11-27 14:42:03 -08001625 struct skb_shared_info *shinfo;
1626 const struct tcphdr *th;
1627 struct tcphdr *thtail;
1628 struct sk_buff *tail;
1629 unsigned int hdrlen;
1630 bool fragstolen;
1631 u32 gso_segs;
1632 int delta;
Eric Dumazetc9c33212016-08-27 07:37:54 -07001633
1634 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1635 * we can fix skb->truesize to its real value to avoid future drops.
1636 * This is valid because skb is not yet charged to the socket.
1637 * It has been noticed pure SACK packets were sometimes dropped
1638 * (if cooked by drivers without copybreak feature).
1639 */
Eric Dumazet60b1af32017-01-24 14:57:36 -08001640 skb_condense(skb);
Eric Dumazetc9c33212016-08-27 07:37:54 -07001641
Eric Dumazetade96282018-11-19 17:45:55 -08001642 skb_dst_drop(skb);
1643
Eric Dumazet4f693b52018-11-27 14:42:03 -08001644 if (unlikely(tcp_checksum_complete(skb))) {
1645 bh_unlock_sock(sk);
1646 __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1647 __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1648 return true;
1649 }
1650
1651 /* Attempt coalescing to last skb in backlog, even if we are
1652 * above the limits.
1653 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
1654 */
1655 th = (const struct tcphdr *)skb->data;
1656 hdrlen = th->doff * 4;
1657 shinfo = skb_shinfo(skb);
1658
1659 if (!shinfo->gso_size)
1660 shinfo->gso_size = skb->len - hdrlen;
1661
1662 if (!shinfo->gso_segs)
1663 shinfo->gso_segs = 1;
1664
1665 tail = sk->sk_backlog.tail;
1666 if (!tail)
1667 goto no_coalesce;
1668 thtail = (struct tcphdr *)tail->data;
1669
1670 if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
1671 TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
1672 ((TCP_SKB_CB(tail)->tcp_flags |
1673 TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
1674 ((TCP_SKB_CB(tail)->tcp_flags ^
1675 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
1676#ifdef CONFIG_TLS_DEVICE
1677 tail->decrypted != skb->decrypted ||
1678#endif
1679 thtail->doff != th->doff ||
1680 memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
1681 goto no_coalesce;
1682
1683 __skb_pull(skb, hdrlen);
1684 if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
1685 thtail->window = th->window;
1686
1687 TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
1688
1689 if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
1690 TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
1691
1692 TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1693
1694 if (TCP_SKB_CB(skb)->has_rxtstamp) {
1695 TCP_SKB_CB(tail)->has_rxtstamp = true;
1696 tail->tstamp = skb->tstamp;
1697 skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
1698 }
1699
1700 /* Not as strict as GRO. We only need to carry mss max value */
1701 skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
1702 skb_shinfo(tail)->gso_size);
1703
1704 gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
1705 skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
1706
1707 sk->sk_backlog.len += delta;
1708 __NET_INC_STATS(sock_net(sk),
1709 LINUX_MIB_TCPBACKLOGCOALESCE);
1710 kfree_skb_partial(skb, fragstolen);
1711 return false;
1712 }
1713 __skb_push(skb, hdrlen);
1714
1715no_coalesce:
1716 /* Only socket owner can try to collapse/prune rx queues
1717 * to reduce memory overhead, so add a little headroom here.
1718 * Few sockets backlog are possibly concurrently non empty.
1719 */
1720 limit += 64*1024;
1721
Eric Dumazetc9c33212016-08-27 07:37:54 -07001722 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1723 bh_unlock_sock(sk);
1724 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1725 return true;
1726 }
1727 return false;
1728}
1729EXPORT_SYMBOL(tcp_add_backlog);
1730
Eric Dumazetac6e7802016-11-10 13:12:35 -08001731int tcp_filter(struct sock *sk, struct sk_buff *skb)
1732{
1733 struct tcphdr *th = (struct tcphdr *)skb->data;
1734 unsigned int eaten = skb->len;
1735 int err;
1736
1737 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1738 if (!err) {
1739 eaten -= skb->len;
1740 TCP_SKB_CB(skb)->end_seq -= eaten;
1741 }
1742 return err;
1743}
1744EXPORT_SYMBOL(tcp_filter);
1745
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001746static void tcp_v4_restore_cb(struct sk_buff *skb)
1747{
1748 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1749 sizeof(struct inet_skb_parm));
1750}
1751
1752static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1753 const struct tcphdr *th)
1754{
1755 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1756 * barrier() makes sure compiler wont play fool^Waliasing games.
1757 */
1758 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1759 sizeof(struct inet_skb_parm));
1760 barrier();
1761
1762 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1763 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1764 skb->len - th->doff * 4);
1765 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1766 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1767 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1768 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1769 TCP_SKB_CB(skb)->sacked = 0;
1770 TCP_SKB_CB(skb)->has_rxtstamp =
1771 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1772}
1773
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774/*
1775 * From tcp_input.c
1776 */
1777
1778int tcp_v4_rcv(struct sk_buff *skb)
1779{
Eric Dumazet3b24d852016-04-01 08:52:17 -07001780 struct net *net = dev_net(skb->dev);
David Ahern3fa6f612017-08-07 08:44:17 -07001781 int sdif = inet_sdif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001782 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001783 const struct tcphdr *th;
Eric Dumazet3b24d852016-04-01 08:52:17 -07001784 bool refcounted;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 struct sock *sk;
1786 int ret;
1787
1788 if (skb->pkt_type != PACKET_HOST)
1789 goto discard_it;
1790
1791 /* Count it even if it's bad */
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001792 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793
1794 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1795 goto discard_it;
1796
Eric Dumazetea1627c2016-05-13 09:16:40 -07001797 th = (const struct tcphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798
Eric Dumazetea1627c2016-05-13 09:16:40 -07001799 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 goto bad_packet;
1801 if (!pskb_may_pull(skb, th->doff * 4))
1802 goto discard_it;
1803
1804 /* An explanation is required here, I think.
1805 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001806 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001808
1809 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001810 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
Eric Dumazetea1627c2016-05-13 09:16:40 -07001812 th = (const struct tcphdr *)skb->data;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001813 iph = ip_hdr(skb);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001814lookup:
Craig Galleka5836362016-02-10 11:50:38 -05001815 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
David Ahern3fa6f612017-08-07 08:44:17 -07001816 th->dest, sdif, &refcounted);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 if (!sk)
1818 goto no_tcp_socket;
1819
Eric Dumazetbb134d52010-03-09 05:55:56 +00001820process:
1821 if (sk->sk_state == TCP_TIME_WAIT)
1822 goto do_time_wait;
1823
Eric Dumazet079096f2015-10-02 11:43:32 -07001824 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1825 struct request_sock *req = inet_reqsk(sk);
Eric Dumazete0f97592018-02-13 06:14:12 -08001826 bool req_stolen = false;
Eric Dumazet77166822016-02-18 05:39:18 -08001827 struct sock *nsk;
Eric Dumazet079096f2015-10-02 11:43:32 -07001828
1829 sk = req->rsk_listener;
Eric Dumazet72923552016-02-11 22:50:29 -08001830 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
Eric Dumazete65c3322016-08-24 08:50:24 -07001831 sk_drops_add(sk, skb);
Eric Dumazet72923552016-02-11 22:50:29 -08001832 reqsk_put(req);
1833 goto discard_it;
1834 }
Frank van der Linden4fd44a92018-06-12 23:09:37 +00001835 if (tcp_checksum_complete(skb)) {
1836 reqsk_put(req);
1837 goto csum_error;
1838 }
Eric Dumazet77166822016-02-18 05:39:18 -08001839 if (unlikely(sk->sk_state != TCP_LISTEN)) {
Eric Dumazetf03f2e12015-10-14 11:16:27 -07001840 inet_csk_reqsk_queue_drop_and_put(sk, req);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001841 goto lookup;
1842 }
Eric Dumazet3b24d852016-04-01 08:52:17 -07001843 /* We own a reference on the listener, increase it again
1844 * as we might lose it too soon.
1845 */
Eric Dumazet77166822016-02-18 05:39:18 -08001846 sock_hold(sk);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001847 refcounted = true;
Eric Dumazet1f3b3592017-09-08 12:44:47 -07001848 nsk = NULL;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001849 if (!tcp_filter(sk, skb)) {
1850 th = (const struct tcphdr *)skb->data;
1851 iph = ip_hdr(skb);
1852 tcp_v4_fill_cb(skb, iph, th);
Eric Dumazete0f97592018-02-13 06:14:12 -08001853 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001854 }
Eric Dumazet079096f2015-10-02 11:43:32 -07001855 if (!nsk) {
1856 reqsk_put(req);
Eric Dumazete0f97592018-02-13 06:14:12 -08001857 if (req_stolen) {
1858 /* Another cpu got exclusive access to req
1859 * and created a full blown socket.
1860 * Try to feed this packet to this socket
1861 * instead of discarding it.
1862 */
1863 tcp_v4_restore_cb(skb);
1864 sock_put(sk);
1865 goto lookup;
1866 }
Eric Dumazet77166822016-02-18 05:39:18 -08001867 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001868 }
1869 if (nsk == sk) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001870 reqsk_put(req);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001871 tcp_v4_restore_cb(skb);
Eric Dumazet079096f2015-10-02 11:43:32 -07001872 } else if (tcp_child_process(sk, nsk, skb)) {
1873 tcp_v4_send_reset(nsk, skb);
Eric Dumazet77166822016-02-18 05:39:18 -08001874 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001875 } else {
Eric Dumazet77166822016-02-18 05:39:18 -08001876 sock_put(sk);
Eric Dumazet079096f2015-10-02 11:43:32 -07001877 return 0;
1878 }
1879 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001880 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -07001881 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001882 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001883 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001884
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1886 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001887
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001888 if (tcp_v4_inbound_md5_hash(sk, skb))
1889 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001890
Patrick McHardyb59c2702006-01-06 23:06:10 -08001891 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
Eric Dumazetac6e7802016-11-10 13:12:35 -08001893 if (tcp_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 goto discard_and_relse;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001895 th = (const struct tcphdr *)skb->data;
1896 iph = ip_hdr(skb);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001897 tcp_v4_fill_cb(skb, iph, th);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
1899 skb->dev = NULL;
1900
Eric Dumazete994b2f2015-10-02 11:43:39 -07001901 if (sk->sk_state == TCP_LISTEN) {
1902 ret = tcp_v4_do_rcv(sk, skb);
1903 goto put_and_return;
1904 }
1905
1906 sk_incoming_cpu_update(sk);
1907
Ingo Molnarc6366182006-07-03 00:25:13 -07001908 bh_lock_sock_nested(sk);
Martin KaFai Laua44d6ea2016-03-14 10:52:15 -07001909 tcp_segs_in(tcp_sk(sk), skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 ret = 0;
1911 if (!sock_owned_by_user(sk)) {
Florian Westphale7942d02017-07-30 03:57:18 +02001912 ret = tcp_v4_do_rcv(sk, skb);
Eric Dumazetc9c33212016-08-27 07:37:54 -07001913 } else if (tcp_add_backlog(sk, skb)) {
Zhu Yi6b03a532010-03-04 18:01:41 +00001914 goto discard_and_relse;
1915 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 bh_unlock_sock(sk);
1917
Eric Dumazete994b2f2015-10-02 11:43:39 -07001918put_and_return:
Eric Dumazet3b24d852016-04-01 08:52:17 -07001919 if (refcounted)
1920 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921
1922 return ret;
1923
1924no_tcp_socket:
1925 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1926 goto discard_it;
1927
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001928 tcp_v4_fill_cb(skb, iph, th);
1929
Eric Dumazet12e25e12015-06-03 23:49:21 -07001930 if (tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001931csum_error:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001932 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933bad_packet:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001934 __TCP_INC_STATS(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001936 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 }
1938
1939discard_it:
1940 /* Discard frame. */
1941 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001942 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943
1944discard_and_relse:
Eric Dumazet532182c2016-04-01 08:52:19 -07001945 sk_drops_add(sk, skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001946 if (refcounted)
1947 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 goto discard_it;
1949
1950do_time_wait:
1951 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001952 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 goto discard_it;
1954 }
1955
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001956 tcp_v4_fill_cb(skb, iph, th);
1957
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001958 if (tcp_checksum_complete(skb)) {
1959 inet_twsk_put(inet_twsk(sk));
1960 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001962 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001964 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Craig Galleka5836362016-02-10 11:50:38 -05001965 &tcp_hashinfo, skb,
1966 __tcp_hdrlen(th),
Tom Herbertda5e3632013-01-22 09:50:24 +00001967 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001968 iph->daddr, th->dest,
David Ahern3fa6f612017-08-07 08:44:17 -07001969 inet_iif(skb),
1970 sdif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 if (sk2) {
Eric Dumazetdbe7faa2015-07-08 14:28:30 -07001972 inet_twsk_deschedule_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 sk = sk2;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001974 tcp_v4_restore_cb(skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001975 refcounted = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 goto process;
1977 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 }
Gustavo A. R. Silvafcfd6df2017-10-16 15:48:55 -05001979 /* to ACK */
1980 /* fall through */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 case TCP_TW_ACK:
1982 tcp_v4_timewait_ack(sk, skb);
1983 break;
1984 case TCP_TW_RST:
Florian Westphal271c3b92015-12-21 21:29:26 +01001985 tcp_v4_send_reset(sk, skb);
1986 inet_twsk_deschedule_put(inet_twsk(sk));
1987 goto discard_it;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 case TCP_TW_SUCCESS:;
1989 }
1990 goto discard_it;
1991}
1992
David S. Millerccb7c412010-12-01 18:09:13 -08001993static struct timewait_sock_ops tcp_timewait_sock_ops = {
1994 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1995 .twsk_unique = tcp_twsk_unique,
1996 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08001997};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998
Eric Dumazet63d02d12012-08-09 14:11:00 +00001999void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00002000{
2001 struct dst_entry *dst = skb_dst(skb);
2002
Eric Dumazet5037e9e2015-12-14 14:08:53 -08002003 if (dst && dst_hold_safe(dst)) {
Eric Dumazetca777ef2014-09-08 08:06:07 -07002004 sk->sk_rx_dst = dst;
2005 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2006 }
Eric Dumazet5d299f32012-08-06 05:09:33 +00002007}
Eric Dumazet63d02d12012-08-09 14:11:00 +00002008EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00002009
Stephen Hemminger3b401a82009-09-01 19:25:04 +00002010const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002011 .queue_xmit = ip_queue_xmit,
2012 .send_check = tcp_v4_send_check,
2013 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00002014 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002015 .conn_request = tcp_v4_conn_request,
2016 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002017 .net_header_len = sizeof(struct iphdr),
2018 .setsockopt = ip_setsockopt,
2019 .getsockopt = ip_getsockopt,
2020 .addr2sockaddr = inet_csk_addr2sockaddr,
2021 .sockaddr_len = sizeof(struct sockaddr_in),
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002022#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002023 .compat_setsockopt = compat_ip_setsockopt,
2024 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002025#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04002026 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002028EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002030#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00002031static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002032 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07002033 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002034 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002035};
Andrew Mortonb6332e62006-11-30 19:16:28 -08002036#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002037
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038/* NOTE: A lot of things set to zero explicitly by call to
2039 * sk_alloc() so need not be done here.
2040 */
2041static int tcp_v4_init_sock(struct sock *sk)
2042{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002043 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044
Neal Cardwell900f65d2012-04-19 09:55:21 +00002045 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08002047 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00002048
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002049#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04002050 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002051#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 return 0;
2054}
2055
Brian Haley7d06b2e2008-06-14 17:04:49 -07002056void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057{
2058 struct tcp_sock *tp = tcp_sk(sk);
2059
Song Liue1a4aa52017-10-23 09:20:26 -07002060 trace_tcp_destroy_sock(sk);
2061
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 tcp_clear_xmit_timers(sk);
2063
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002064 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07002065
Dave Watson734942c2017-06-14 11:37:14 -07002066 tcp_cleanup_ulp(sk);
2067
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08002069 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Wei Wangcf1ef3f2017-04-20 14:45:46 -07002071 /* Check if we want to disable active TFO */
2072 tcp_fastopen_active_disable_ofo_check(sk);
2073
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 /* Cleans up our, hopefully empty, out_of_order_queue. */
Yaogong Wang9f5afea2016-09-07 14:49:28 -07002075 skb_rbtree_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002077#ifdef CONFIG_TCP_MD5SIG
2078 /* Clean up the MD5 key list, if any */
2079 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00002080 tcp_clear_md5_list(sk);
Mat Martineaufb7df5e2017-12-21 10:29:10 -08002081 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002082 tp->md5sig_info = NULL;
2083 }
2084#endif
2085
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002087 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002088 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089
Ian Morris00db4122015-04-03 09:17:27 +01002090 BUG_ON(tp->fastopen_rsk);
William Allen Simpson435cf552009-12-02 18:17:05 +00002091
Yuchung Chengcf60af02012-07-19 06:43:09 +00002092 /* If socket is aborted during connect operation */
2093 tcp_free_fastopen_req(tp);
Yuchung Cheng1fba70e2017-10-18 11:22:51 -07002094 tcp_fastopen_destroy_cipher(sk);
Eric Dumazetcd8ae852015-05-03 21:34:46 -07002095 tcp_saved_syn_free(tp);
Yuchung Chengcf60af02012-07-19 06:43:09 +00002096
Glauber Costa180d8cd2011-12-11 21:47:02 +00002097 sk_sockets_allocated_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099EXPORT_SYMBOL(tcp_v4_destroy_sock);
2100
2101#ifdef CONFIG_PROC_FS
2102/* Proc filesystem TCP sock list dumping. */
2103
Tom Herberta8b690f2010-06-07 00:43:42 -07002104/*
2105 * Get next listener socket follow cur. If cur is NULL, get first socket
2106 * starting from bucket given in st->bucket; when st->bucket is zero the
2107 * very first socket in the hash table is returned.
2108 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109static void *listening_get_next(struct seq_file *seq, void *cur)
2110{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002111 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002112 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002113 struct net *net = seq_file_net(seq);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002114 struct inet_listen_hashbucket *ilb;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002115 struct sock *sk = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116
2117 if (!sk) {
Eric Dumazet3b24d852016-04-01 08:52:17 -07002118get_head:
Tom Herberta8b690f2010-06-07 00:43:42 -07002119 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet9652dc22016-10-19 21:24:58 -07002120 spin_lock(&ilb->lock);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002121 sk = sk_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07002122 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 goto get_sk;
2124 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002125 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002127 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
Eric Dumazet3b24d852016-04-01 08:52:17 -07002129 sk = sk_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130get_sk:
Eric Dumazet3b24d852016-04-01 08:52:17 -07002131 sk_for_each_from(sk) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002132 if (!net_eq(sock_net(sk), net))
2133 continue;
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002134 if (sk->sk_family == afinfo->family)
Eric Dumazet3b24d852016-04-01 08:52:17 -07002135 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 }
Eric Dumazet9652dc22016-10-19 21:24:58 -07002137 spin_unlock(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002138 st->offset = 0;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002139 if (++st->bucket < INET_LHTABLE_SIZE)
2140 goto get_head;
2141 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142}
2143
2144static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2145{
Tom Herberta8b690f2010-06-07 00:43:42 -07002146 struct tcp_iter_state *st = seq->private;
2147 void *rc;
2148
2149 st->bucket = 0;
2150 st->offset = 0;
2151 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
2153 while (rc && *pos) {
2154 rc = listening_get_next(seq, rc);
2155 --*pos;
2156 }
2157 return rc;
2158}
2159
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002160static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07002161{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002162 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002163}
2164
Tom Herberta8b690f2010-06-07 00:43:42 -07002165/*
2166 * Get first established socket starting from bucket given in st->bucket.
2167 * If st->bucket is zero, the very first socket in the hash is returned.
2168 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169static void *established_get_first(struct seq_file *seq)
2170{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002171 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002172 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002173 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 void *rc = NULL;
2175
Tom Herberta8b690f2010-06-07 00:43:42 -07002176 st->offset = 0;
2177 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002179 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002180 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181
Andi Kleen6eac5602008-08-28 01:08:02 -07002182 /* Lockless fast path for the common case of empty buckets */
2183 if (empty_bucket(st))
2184 continue;
2185
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002186 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002187 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002188 if (sk->sk_family != afinfo->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002189 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 continue;
2191 }
2192 rc = sk;
2193 goto out;
2194 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002195 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 }
2197out:
2198 return rc;
2199}
2200
2201static void *established_get_next(struct seq_file *seq, void *cur)
2202{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002203 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002205 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002206 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002207 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208
2209 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002210 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002212 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002214 sk_nulls_for_each_from(sk, node) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002215 if (sk->sk_family == afinfo->family &&
2216 net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002217 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 }
2219
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002220 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2221 ++st->bucket;
2222 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223}
2224
2225static void *established_get_idx(struct seq_file *seq, loff_t pos)
2226{
Tom Herberta8b690f2010-06-07 00:43:42 -07002227 struct tcp_iter_state *st = seq->private;
2228 void *rc;
2229
2230 st->bucket = 0;
2231 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232
2233 while (rc && pos) {
2234 rc = established_get_next(seq, rc);
2235 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002236 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 return rc;
2238}
2239
2240static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2241{
2242 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002243 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 st->state = TCP_SEQ_STATE_LISTENING;
2246 rc = listening_get_idx(seq, &pos);
2247
2248 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249 st->state = TCP_SEQ_STATE_ESTABLISHED;
2250 rc = established_get_idx(seq, pos);
2251 }
2252
2253 return rc;
2254}
2255
Tom Herberta8b690f2010-06-07 00:43:42 -07002256static void *tcp_seek_last_pos(struct seq_file *seq)
2257{
2258 struct tcp_iter_state *st = seq->private;
2259 int offset = st->offset;
2260 int orig_num = st->num;
2261 void *rc = NULL;
2262
2263 switch (st->state) {
Tom Herberta8b690f2010-06-07 00:43:42 -07002264 case TCP_SEQ_STATE_LISTENING:
2265 if (st->bucket >= INET_LHTABLE_SIZE)
2266 break;
2267 st->state = TCP_SEQ_STATE_LISTENING;
2268 rc = listening_get_next(seq, NULL);
2269 while (offset-- && rc)
2270 rc = listening_get_next(seq, rc);
2271 if (rc)
2272 break;
2273 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002274 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002275 /* Fallthrough */
2276 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002277 if (st->bucket > tcp_hashinfo.ehash_mask)
2278 break;
2279 rc = established_get_first(seq);
2280 while (offset-- && rc)
2281 rc = established_get_next(seq, rc);
2282 }
2283
2284 st->num = orig_num;
2285
2286 return rc;
2287}
2288
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002289void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290{
Jianjun Kong5799de02008-11-03 02:49:10 -08002291 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002292 void *rc;
2293
2294 if (*pos && *pos == st->last_pos) {
2295 rc = tcp_seek_last_pos(seq);
2296 if (rc)
2297 goto out;
2298 }
2299
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 st->state = TCP_SEQ_STATE_LISTENING;
2301 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002302 st->bucket = 0;
2303 st->offset = 0;
2304 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2305
2306out:
2307 st->last_pos = *pos;
2308 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002310EXPORT_SYMBOL(tcp_seq_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002312void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313{
Tom Herberta8b690f2010-06-07 00:43:42 -07002314 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
2317 if (v == SEQ_START_TOKEN) {
2318 rc = tcp_get_idx(seq, 0);
2319 goto out;
2320 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321
2322 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 case TCP_SEQ_STATE_LISTENING:
2324 rc = listening_get_next(seq, v);
2325 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002327 st->bucket = 0;
2328 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 rc = established_get_first(seq);
2330 }
2331 break;
2332 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 rc = established_get_next(seq, v);
2334 break;
2335 }
2336out:
2337 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002338 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 return rc;
2340}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002341EXPORT_SYMBOL(tcp_seq_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002343void tcp_seq_stop(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344{
Jianjun Kong5799de02008-11-03 02:49:10 -08002345 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346
2347 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 case TCP_SEQ_STATE_LISTENING:
2349 if (v != SEQ_START_TOKEN)
Eric Dumazet9652dc22016-10-19 21:24:58 -07002350 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 case TCP_SEQ_STATE_ESTABLISHED:
2353 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002354 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 break;
2356 }
2357}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002358EXPORT_SYMBOL(tcp_seq_stop);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359
Eric Dumazetd4f06872015-03-12 16:44:09 -07002360static void get_openreq4(const struct request_sock *req,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002361 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002363 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetfa76ce732015-03-19 19:04:20 -07002364 long delta = req->rsk_timer.expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002366 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002367 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002369 ireq->ir_loc_addr,
Eric Dumazetd4f06872015-03-12 16:44:09 -07002370 ireq->ir_num,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002371 ireq->ir_rmt_addr,
2372 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 TCP_SYN_RECV,
2374 0, 0, /* could print option size, but that is af dependent. */
2375 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002376 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002377 req->num_timeout,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002378 from_kuid_munged(seq_user_ns(f),
2379 sock_i_uid(req->rsk_listener)),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 0, /* non standard timer */
2381 0, /* open_requests have no inode */
Eric Dumazetd4f06872015-03-12 16:44:09 -07002382 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002383 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384}
2385
Tetsuo Handa652586d2013-11-14 14:31:57 -08002386static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387{
2388 int timer_active;
2389 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002390 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002391 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002392 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet0536fcc2015-09-29 07:42:52 -07002393 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002394 __be32 dest = inet->inet_daddr;
2395 __be32 src = inet->inet_rcv_saddr;
2396 __u16 destp = ntohs(inet->inet_dport);
2397 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002398 int rx_queue;
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002399 int state;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002401 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
Yuchung Cheng57dde7f2017-01-12 22:11:33 -08002402 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002403 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002405 timer_expires = icsk->icsk_timeout;
2406 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002408 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002409 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002411 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 } else {
2413 timer_active = 0;
2414 timer_expires = jiffies;
2415 }
2416
Yafang Shao986ffdf2017-12-20 11:12:52 +08002417 state = inet_sk_state_load(sk);
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002418 if (state == TCP_LISTEN)
Eric Dumazet49d09002009-12-03 16:06:13 -08002419 rx_queue = sk->sk_ack_backlog;
2420 else
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002421 /* Because we don't lock the socket,
2422 * we might find a transient negative value.
Eric Dumazet49d09002009-12-03 16:06:13 -08002423 */
2424 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2425
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002426 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002427 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002428 i, src, srcp, dest, destp, state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002429 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002430 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002432 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002433 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002434 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002435 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002436 sock_i_ino(sk),
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002437 refcount_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002438 jiffies_to_clock_t(icsk->icsk_rto),
2439 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002440 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 tp->snd_cwnd,
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002442 state == TCP_LISTEN ?
2443 fastopenq->max_qlen :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002444 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445}
2446
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002447static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002448 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449{
Eric Dumazet789f5582015-04-12 18:51:09 -07002450 long delta = tw->tw_timer.expires - jiffies;
Al Viro23f33c22006-09-27 18:43:50 -07002451 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 __u16 destp, srcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453
2454 dest = tw->tw_daddr;
2455 src = tw->tw_rcv_saddr;
2456 destp = ntohs(tw->tw_dport);
2457 srcp = ntohs(tw->tw_sport);
2458
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002459 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002460 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002462 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002463 refcount_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464}
2465
2466#define TMPSZ 150
2467
2468static int tcp4_seq_show(struct seq_file *seq, void *v)
2469{
Jianjun Kong5799de02008-11-03 02:49:10 -08002470 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002471 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472
Tetsuo Handa652586d2013-11-14 14:31:57 -08002473 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002475 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 "rx_queue tr tm->when retrnsmt uid timeout "
2477 "inode");
2478 goto out;
2479 }
2480 st = seq->private;
2481
Eric Dumazet079096f2015-10-02 11:43:32 -07002482 if (sk->sk_state == TCP_TIME_WAIT)
2483 get_timewait4_sock(v, seq, st->num);
2484 else if (sk->sk_state == TCP_NEW_SYN_RECV)
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002485 get_openreq4(v, seq, st->num);
Eric Dumazet079096f2015-10-02 11:43:32 -07002486 else
2487 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002489 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 return 0;
2491}
2492
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002493static const struct seq_operations tcp4_seq_ops = {
2494 .show = tcp4_seq_show,
2495 .start = tcp_seq_start,
2496 .next = tcp_seq_next,
2497 .stop = tcp_seq_stop,
2498};
2499
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502};
2503
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002504static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002505{
Christoph Hellwigc3506372018-04-10 19:42:55 +02002506 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2507 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002508 return -ENOMEM;
2509 return 0;
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002510}
2511
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002512static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002513{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002514 remove_proc_entry("tcp", net->proc_net);
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002515}
2516
2517static struct pernet_operations tcp4_net_ops = {
2518 .init = tcp4_proc_init_net,
2519 .exit = tcp4_proc_exit_net,
2520};
2521
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522int __init tcp4_proc_init(void)
2523{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002524 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525}
2526
2527void tcp4_proc_exit(void)
2528{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002529 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530}
2531#endif /* CONFIG_PROC_FS */
2532
2533struct proto tcp_prot = {
2534 .name = "TCP",
2535 .owner = THIS_MODULE,
2536 .close = tcp_close,
Andrey Ignatovd74bad42018-03-30 15:08:05 -07002537 .pre_connect = tcp_v4_pre_connect,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .connect = tcp_v4_connect,
2539 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002540 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 .ioctl = tcp_ioctl,
2542 .init = tcp_v4_init_sock,
2543 .destroy = tcp_v4_destroy_sock,
2544 .shutdown = tcp_shutdown,
2545 .setsockopt = tcp_setsockopt,
2546 .getsockopt = tcp_getsockopt,
Ursula Braun4b9d07a2017-01-09 16:55:12 +01002547 .keepalive = tcp_set_keepalive,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002549 .sendmsg = tcp_sendmsg,
2550 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002552 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002553 .hash = inet_hash,
2554 .unhash = inet_unhash,
2555 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazet06044752017-06-07 13:29:12 -07002557 .leave_memory_pressure = tcp_leave_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002558 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002560 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 .memory_allocated = &tcp_memory_allocated,
2562 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002563 .sysctl_mem = sysctl_tcp_mem,
Eric Dumazet356d1832017-11-07 00:29:28 -08002564 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2565 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 .max_header = MAX_TCP_HEADER,
2567 .obj_size = sizeof(struct tcp_sock),
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08002568 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002569 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002570 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002571 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002572 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002573#ifdef CONFIG_COMPAT
2574 .compat_setsockopt = compat_tcp_setsockopt,
2575 .compat_getsockopt = compat_tcp_getsockopt,
2576#endif
Lorenzo Colittic1e64e22015-12-16 12:30:05 +09002577 .diag_destroy = tcp_abort,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002579EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580
Denis V. Lunev046ee902008-04-03 14:31:33 -07002581static void __net_exit tcp_sk_exit(struct net *net)
2582{
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002583 int cpu;
2584
Stephen Hemminger6670e152017-11-14 08:25:49 -08002585 module_put(net->ipv4.tcp_congestion_control->owner);
2586
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002587 for_each_possible_cpu(cpu)
2588 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2589 free_percpu(net->ipv4.tcp_sk);
2590}
2591
2592static int __net_init tcp_sk_init(struct net *net)
2593{
Haishuang Yanfee83d02016-12-28 17:52:33 +08002594 int res, cpu, cnt;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002595
2596 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2597 if (!net->ipv4.tcp_sk)
2598 return -ENOMEM;
2599
2600 for_each_possible_cpu(cpu) {
2601 struct sock *sk;
2602
2603 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2604 IPPROTO_TCP, net);
2605 if (res)
2606 goto fail;
Eric Dumazeta9d65322016-04-01 08:52:21 -07002607 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
Eric Dumazet431280e2018-08-22 13:30:45 -07002608
2609 /* Please enforce IP_DF and IPID==0 for RST and
2610 * ACK sent in SYN-RECV and TIME-WAIT state.
2611 */
2612 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2613
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002614 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2615 }
Daniel Borkmann49213552015-05-19 21:04:22 +02002616
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002617 net->ipv4.sysctl_tcp_ecn = 2;
Daniel Borkmann49213552015-05-19 21:04:22 +02002618 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2619
Fan Dub0f9ca52015-02-10 09:53:16 +08002620 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
Fan Du6b58e0a2015-03-06 11:18:23 +08002621 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
Fan Du05cbc0d2015-03-06 11:18:24 +08002622 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002623
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002624 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
Nikolay Borisov9bd68612016-01-07 16:38:44 +02002625 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
Nikolay Borisovb840d152016-01-07 16:38:45 +02002626 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002627
Nikolay Borisov6fa25162016-02-03 09:46:49 +02002628 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
Nikolay Borisov7c083ec2016-02-03 09:46:50 +02002629 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
David S. Miller0aca7372016-02-08 04:24:33 -05002630 net->ipv4.sysctl_tcp_syncookies = 1;
Nikolay Borisov1043e252016-02-03 09:46:52 +02002631 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
Nikolay Borisovae5c3f42016-02-03 09:46:53 +02002632 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
Nikolay Borisovc6214a92016-02-03 09:46:54 +02002633 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
Nikolay Borisovc402d9b2016-02-03 09:46:55 +02002634 net->ipv4.sysctl_tcp_orphan_retries = 0;
Nikolay Borisov1e579ca2016-02-03 09:46:56 +02002635 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
Nikolay Borisov4979f2d2016-02-03 09:46:57 +02002636 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -07002637 net->ipv4.sysctl_tcp_tw_reuse = 2;
Nikolay Borisov12ed8242016-02-03 09:46:51 +02002638
Haishuang Yanfee83d02016-12-28 17:52:33 +08002639 cnt = tcp_hashinfo.ehash_mask + 1;
Yafang Shao743e4812018-09-01 20:21:05 +08002640 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
Haishuang Yan1946e672016-12-28 17:52:32 +08002641 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2642
Haishuang Yanfee83d02016-12-28 17:52:33 +08002643 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
Eric Dumazetf9301032017-06-07 10:34:37 -07002644 net->ipv4.sysctl_tcp_sack = 1;
Eric Dumazet9bb37ef2017-06-07 10:34:38 -07002645 net->ipv4.sysctl_tcp_window_scaling = 1;
Eric Dumazet5d2ed052017-06-07 10:34:39 -07002646 net->ipv4.sysctl_tcp_timestamps = 1;
Eric Dumazet2ae21cf2017-10-26 21:54:56 -07002647 net->ipv4.sysctl_tcp_early_retrans = 3;
Eric Dumazete20223f2017-10-26 21:54:57 -07002648 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
Eric Dumazetb510f0d2017-10-26 21:54:59 -07002649 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
Eric Dumazete0a1e5b2017-10-26 21:55:00 -07002650 net->ipv4.sysctl_tcp_retrans_collapse = 1;
Eric Dumazetc6e21802017-10-26 21:55:06 -07002651 net->ipv4.sysctl_tcp_max_reordering = 300;
Eric Dumazet6496f6b2017-10-26 21:55:07 -07002652 net->ipv4.sysctl_tcp_dsack = 1;
Eric Dumazet0c126542017-10-26 21:55:08 -07002653 net->ipv4.sysctl_tcp_app_win = 31;
Eric Dumazet94f08932017-10-26 21:55:09 -07002654 net->ipv4.sysctl_tcp_adv_win_scale = 1;
Eric Dumazetaf9b69a2017-10-26 21:55:10 -07002655 net->ipv4.sysctl_tcp_frto = 2;
Eric Dumazet4540c0c2017-10-27 07:47:22 -07002656 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
Eric Dumazetd06a9902017-10-27 07:47:23 -07002657 /* This limits the percentage of the congestion window which we
2658 * will allow a single TSO frame to consume. Building TSO frames
2659 * which are too large can cause TCP streams to be bursty.
2660 */
2661 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
Eric Dumazetc73e5802018-11-11 07:34:28 -08002662 /* Default TSQ limit of 16 TSO segments */
2663 net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
Eric Dumazetb530b682017-10-27 07:47:26 -07002664 /* rfc5961 challenge ack rate limiting */
2665 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
Eric Dumazet26e95962017-10-27 07:47:27 -07002666 net->ipv4.sysctl_tcp_min_tso_segs = 2;
Eric Dumazetbd239702017-10-27 07:47:28 -07002667 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
Eric Dumazet790f00e2017-10-27 07:47:29 -07002668 net->ipv4.sysctl_tcp_autocorking = 1;
Eric Dumazet4170ba62017-10-27 07:47:30 -07002669 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
Eric Dumazet23a7102a2017-10-27 07:47:31 -07002670 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
Eric Dumazetc26e91f2017-10-27 07:47:32 -07002671 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
Eric Dumazet356d1832017-11-07 00:29:28 -08002672 if (net != &init_net) {
2673 memcpy(net->ipv4.sysctl_tcp_rmem,
2674 init_net.ipv4.sysctl_tcp_rmem,
2675 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2676 memcpy(net->ipv4.sysctl_tcp_wmem,
2677 init_net.ipv4.sysctl_tcp_wmem,
2678 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2679 }
Eric Dumazet6d82aa22018-05-17 14:47:28 -07002680 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
Eric Dumazet9c21d2f2018-05-17 14:47:29 -07002681 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002682 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
Haishuang Yan43713842017-09-27 11:35:42 +08002683 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
Haishuang Yan3733be12017-09-27 11:35:43 +08002684 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2685 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002686
Stephen Hemminger6670e152017-11-14 08:25:49 -08002687 /* Reno is always built in */
2688 if (!net_eq(net, &init_net) &&
2689 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2690 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2691 else
2692 net->ipv4.tcp_congestion_control = &tcp_reno;
2693
Daniel Borkmann49213552015-05-19 21:04:22 +02002694 return 0;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002695fail:
2696 tcp_sk_exit(net);
2697
2698 return res;
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002699}
2700
2701static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2702{
Haishuang Yan43713842017-09-27 11:35:42 +08002703 struct net *net;
2704
Haishuang Yan1946e672016-12-28 17:52:32 +08002705 inet_twsk_purge(&tcp_hashinfo, AF_INET);
Haishuang Yan43713842017-09-27 11:35:42 +08002706
2707 list_for_each_entry(net, net_exit_list, exit_list)
2708 tcp_fastopen_ctx_destroy(net);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002709}
2710
2711static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002712 .init = tcp_sk_init,
2713 .exit = tcp_sk_exit,
2714 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002715};
2716
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002717void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718{
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002719 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002720 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721}