blob: fd394ad179a008085b4e87215290f243ea1993b6 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * Implementation of the Transmission Control Protocol(TCP).
8 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * IPv4 specific functions
10 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
18
19/*
20 * Changes:
21 * David S. Miller : New socket lookup architecture.
22 * This code is dedicated to John Dyson.
23 * David S. Miller : Change semantics of established hash,
24 * half is devoted to TIME_WAIT sockets
25 * and the rest go in the other half.
26 * Andi Kleen : Add support for syncookies and fixed
27 * some bugs: ip options weren't passed to
28 * the TCP layer, missed a check for an
29 * ACK bit.
30 * Andi Kleen : Implemented fast path mtu discovery.
31 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070032 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 * most of it into the af independent code.
34 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080035 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * Mike McLagan : Routing by source
37 * Juan Jose Ciarlante: ip_dynaddr bits
38 * Andi Kleen: various fixes.
39 * Vitaly E. Lavrov : Transparent proxy revived after year
40 * coma.
41 * Andi Kleen : Fix new listen.
42 * Andi Kleen : Fix accept error reporting.
43 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
44 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
45 * a single port at the same time.
46 */
47
Joe Perchesafd465032012-03-12 07:03:32 +000048#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Herbert Xueb4dea52008-12-29 23:04:08 -080050#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
52#include <linux/fcntl.h>
53#include <linux/module.h>
54#include <linux/random.h>
55#include <linux/cache.h>
56#include <linux/jhash.h>
57#include <linux/init.h>
58#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090059#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020061#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070063#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030065#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/ipv6.h>
67#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080068#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/xfrm.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070070#include <net/secure_seq.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030071#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73#include <linux/inet.h>
74#include <linux/ipv6.h>
75#include <linux/stddef.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
Ivan Delalande67973182017-06-15 18:07:06 -070078#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
Herbert Xucf80e0e2016-01-24 21:20:23 +080080#include <crypto/hash.h>
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080081#include <linux/scatterlist.h>
82
Song Liuc24b14c42017-10-23 09:20:24 -070083#include <trace/events/tcp.h>
84
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080085#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000086static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040087 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080088#endif
89
Eric Dumazet5caea4e2008-11-20 00:40:07 -080090struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000091EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
Eric Dumazet84b114b2017-05-05 06:56:54 -070093static u32 tcp_v4_init_seq(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070094{
Eric Dumazet84b114b2017-05-05 06:56:54 -070095 return secure_tcp_seq(ip_hdr(skb)->daddr,
96 ip_hdr(skb)->saddr,
97 tcp_hdr(skb)->dest,
98 tcp_hdr(skb)->source);
99}
100
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700101static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700102{
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700103 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104}
105
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
107{
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700108 const struct inet_timewait_sock *tw = inet_twsk(sktw);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800109 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110 struct tcp_sock *tp = tcp_sk(sk);
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700111 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
112
113 if (reuse == 2) {
114 /* Still does not detect *everything* that goes through
115 * lo, since we require a loopback src or dst address
116 * or direct binding to 'lo' interface.
117 */
118 bool loopback = false;
119 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
120 loopback = true;
121#if IS_ENABLED(CONFIG_IPV6)
122 if (tw->tw_family == AF_INET6) {
123 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
124 (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
125 (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
126 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
127 (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
128 (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
129 loopback = true;
130 } else
131#endif
132 {
133 if (ipv4_is_loopback(tw->tw_daddr) ||
134 ipv4_is_loopback(tw->tw_rcv_saddr))
135 loopback = true;
136 }
137 if (!loopback)
138 reuse = 0;
139 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800140
141 /* With PAWS, it is safe from the viewpoint
142 of data integrity. Even without PAWS it is safe provided sequence
143 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
144
145 Actually, the idea is close to VJ's one, only timestamp cache is
146 held not per host, but per port pair and TW bucket is used as state
147 holder.
148
149 If TW bucket has been already destroyed we fall back to VJ's scheme
150 and use initial timestamp retrieved from peer table.
151 */
152 if (tcptw->tw_ts_recent_stamp &&
Arnd Bergmanncca9bab2018-07-11 12:16:12 +0200153 (!twp || (reuse && time_after32(ktime_get_seconds(),
154 tcptw->tw_ts_recent_stamp)))) {
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400155 /* In case of repair and re-using TIME-WAIT sockets we still
156 * want to be sure that it is safe as above but honor the
157 * sequence numbers and time stamps set as part of the repair
158 * process.
159 *
160 * Without this check re-using a TIME-WAIT socket with TCP
161 * repair would accumulate a -1 on the repair assigned
162 * sequence number. The first time it is reused the sequence
163 * is -1, the second time -2, etc. This fixes that issue
164 * without appearing to create any others.
165 */
166 if (likely(!tp->repair)) {
167 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
168 if (tp->write_seq == 0)
169 tp->write_seq = 1;
170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
172 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800173 sock_hold(sktw);
174 return 1;
175 }
176
177 return 0;
178}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800179EXPORT_SYMBOL_GPL(tcp_twsk_unique);
180
Andrey Ignatovd74bad42018-03-30 15:08:05 -0700181static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
182 int addr_len)
183{
184 /* This check is replicated from tcp_v4_connect() and intended to
185 * prevent BPF program called below from accessing bytes that are out
186 * of the bound specified by user in addr_len.
187 */
188 if (addr_len < sizeof(struct sockaddr_in))
189 return -EINVAL;
190
191 sock_owned_by_me(sk);
192
193 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
194}
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196/* This will initiate an outgoing connection. */
197int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
198{
David S. Miller2d7192d2011-04-26 13:28:44 -0700199 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 struct inet_sock *inet = inet_sk(sk);
201 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800202 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700203 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700204 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700205 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000207 struct ip_options_rcu *inet_opt;
Haishuang Yan1946e672016-12-28 17:52:32 +0800208 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
210 if (addr_len < sizeof(struct sockaddr_in))
211 return -EINVAL;
212
213 if (usin->sin_family != AF_INET)
214 return -EAFNOSUPPORT;
215
216 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000217 inet_opt = rcu_dereference_protected(inet->inet_opt,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200218 lockdep_sock_is_held(sk));
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000219 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 if (!daddr)
221 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000222 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
224
David S. Millerdca8b082011-02-24 13:38:12 -0800225 orig_sport = inet->inet_sport;
226 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700227 fl4 = &inet->cork.fl.u.ip4;
228 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800229 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
230 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200231 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800232 if (IS_ERR(rt)) {
233 err = PTR_ERR(rt);
234 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800235 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800236 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
240 ip_rt_put(rt);
241 return -ENETUNREACH;
242 }
243
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000244 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700245 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000247 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700248 inet->inet_saddr = fl4->saddr;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700249 sk_rcv_saddr_set(sk, inet->inet_saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000251 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 /* Reset inherited state */
253 tp->rx_opt.ts_recent = 0;
254 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000255 if (likely(!tp->repair))
256 tp->write_seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 }
258
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000259 inet->inet_dport = usin->sin_port;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700260 sk_daddr_set(sk, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800262 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000263 if (inet_opt)
264 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000266 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
268 /* Socket identity is still unknown (sport may be zero).
269 * However we set state to SYN-SENT and not releasing socket
270 * lock select source port, enter ourselves into the hash tables and
271 * complete initialization after this.
272 */
273 tcp_set_state(sk, TCP_SYN_SENT);
Haishuang Yan1946e672016-12-28 17:52:32 +0800274 err = inet_hash_connect(tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (err)
276 goto failure;
277
Tom Herbert877d1f62015-07-28 16:02:05 -0700278 sk_set_txhash(sk);
Sathya Perla9e7ceb02014-10-22 21:42:01 +0530279
David S. Millerda905bd2011-05-06 16:11:19 -0700280 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800281 inet->inet_sport, inet->inet_dport, sk);
282 if (IS_ERR(rt)) {
283 err = PTR_ERR(rt);
284 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800286 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700288 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700289 sk_setup_caps(sk, &rt->dst);
Wei Wang19f6d3f2017-01-23 10:59:22 -0800290 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300292 if (likely(!tp->repair)) {
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300293 if (!tp->write_seq)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700294 tp->write_seq = secure_tcp_seq(inet->inet_saddr,
295 inet->inet_daddr,
296 inet->inet_sport,
297 usin->sin_port);
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700298 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
299 inet->inet_saddr,
Eric Dumazet84b114b2017-05-05 06:56:54 -0700300 inet->inet_daddr);
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300301 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000303 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Wei Wang19f6d3f2017-01-23 10:59:22 -0800305 if (tcp_fastopen_defer_connect(sk, &err))
306 return err;
307 if (err)
308 goto failure;
309
Andrey Vagin2b916472012-11-22 01:13:58 +0000310 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000311
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 if (err)
313 goto failure;
314
315 return 0;
316
317failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200318 /*
319 * This unhashes the socket and releases the local port,
320 * if necessary.
321 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 tcp_set_state(sk, TCP_CLOSE);
323 ip_rt_put(rt);
324 sk->sk_route_caps = 0;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000325 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 return err;
327}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000328EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200331 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
332 * It can be called through tcp_release_cb() if socket was owned by user
333 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400335void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800338 struct dst_entry *dst;
339 u32 mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800341 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
342 return;
343 mtu = tcp_sk(sk)->mtu_info;
David S. Miller80d0a692012-07-16 03:28:06 -0700344 dst = inet_csk_update_pmtu(sk, mtu);
345 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 return;
347
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 /* Something is about to be wrong... Remember soft error
349 * for the case, if this connection will not able to recover.
350 */
351 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
352 sk->sk_err_soft = EMSGSIZE;
353
354 mtu = dst_mtu(dst);
355
356 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100357 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800358 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 tcp_sync_mss(sk, mtu);
360
361 /* Resend the TCP packet because it's
362 * clear that the old packet has been
363 * dropped. This is the new "fast" path mtu
364 * discovery.
365 */
366 tcp_simple_retransmit(sk);
367 } /* else let the usual retransmit timer handle it */
368}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400369EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
David S. Miller55be7a92012-07-11 21:27:49 -0700371static void do_redirect(struct sk_buff *skb, struct sock *sk)
372{
373 struct dst_entry *dst = __sk_dst_check(sk, 0);
374
David S. Miller1ed5c482012-07-12 00:41:25 -0700375 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700376 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700377}
378
Eric Dumazet26e37362015-03-22 10:22:22 -0700379
380/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
Eric Dumazet9cf74902016-02-02 19:31:12 -0800381void tcp_req_err(struct sock *sk, u32 seq, bool abort)
Eric Dumazet26e37362015-03-22 10:22:22 -0700382{
383 struct request_sock *req = inet_reqsk(sk);
384 struct net *net = sock_net(sk);
385
386 /* ICMPs are not backlogged, hence we cannot get
387 * an established socket here.
388 */
Eric Dumazet26e37362015-03-22 10:22:22 -0700389 if (seq != tcp_rsk(req)->snt_isn) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700390 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Eric Dumazet9cf74902016-02-02 19:31:12 -0800391 } else if (abort) {
Eric Dumazet26e37362015-03-22 10:22:22 -0700392 /*
393 * Still in SYN_RECV, just remove it silently.
394 * There is no good way to pass the error to the newly
395 * created socket, and POSIX does not want network
396 * errors returned from accept().
397 */
Fan Duc6973662015-03-23 15:00:41 -0700398 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
Eric Dumazet9caad862016-04-01 08:52:20 -0700399 tcp_listendrop(req->rsk_listener);
Eric Dumazet26e37362015-03-22 10:22:22 -0700400 }
Eric Dumazetef84d8c2015-10-14 11:16:26 -0700401 reqsk_put(req);
Eric Dumazet26e37362015-03-22 10:22:22 -0700402}
403EXPORT_SYMBOL(tcp_req_err);
404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405/*
406 * This routine is called by the ICMP module when it gets some
407 * sort of error condition. If err < 0 then the socket should
408 * be closed and the error returned to the user. If err > 0
409 * it's just the icmp type << 8 | icmp code. After adjustment
410 * header points to the first 8 bytes of the tcp header. We need
411 * to find the appropriate port.
412 *
413 * The locking strategy used here is very "optimistic". When
414 * someone else accesses the socket the ICMP is just dropped
415 * and for some paths there is no check at all.
416 * A more general error queue to queue errors for later handling
417 * is probably better.
418 *
419 */
420
Stefano Brivio32bbd872018-11-08 12:19:21 +0100421int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000423 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000424 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000425 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 struct tcp_sock *tp;
427 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000428 const int type = icmp_hdr(icmp_skb)->type;
429 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000431 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700432 struct request_sock *fastopen;
Eric Dumazet9a568de2017-05-16 14:00:14 -0700433 u32 seq, snd_una;
434 s32 remaining;
435 u32 delta_us;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000437 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Eric Dumazet26e37362015-03-22 10:22:22 -0700439 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
440 th->dest, iph->saddr, ntohs(th->source),
David Ahern3fa6f612017-08-07 08:44:17 -0700441 inet_iif(icmp_skb), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 if (!sk) {
Eric Dumazet5d3848b2016-04-27 16:44:29 -0700443 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100444 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 }
446 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700447 inet_twsk_put(inet_twsk(sk));
Stefano Brivio32bbd872018-11-08 12:19:21 +0100448 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 }
Eric Dumazet26e37362015-03-22 10:22:22 -0700450 seq = ntohl(th->seq);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100451 if (sk->sk_state == TCP_NEW_SYN_RECV) {
452 tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
453 type == ICMP_TIME_EXCEEDED ||
454 (type == ICMP_DEST_UNREACH &&
455 (code == ICMP_NET_UNREACH ||
456 code == ICMP_HOST_UNREACH)));
457 return 0;
458 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459
460 bh_lock_sock(sk);
461 /* If too many ICMPs get dropped on busy
462 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200463 * We do take care of PMTU discovery (RFC1191) special case :
464 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000466 if (sock_owned_by_user(sk)) {
467 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700468 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
Eric Dumazetb74aa932013-01-19 16:10:37 +0000469 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 if (sk->sk_state == TCP_CLOSE)
471 goto out;
472
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000473 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700474 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000475 goto out;
476 }
477
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000478 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 tp = tcp_sk(sk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700480 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
481 fastopen = tp->fastopen_rsk;
482 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700484 !between(seq, snd_una, tp->snd_nxt)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700485 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 goto out;
487 }
488
489 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700490 case ICMP_REDIRECT:
Jon Maxwell45caeaa2017-03-10 16:40:33 +1100491 if (!sock_owned_by_user(sk))
492 do_redirect(icmp_skb, sk);
David S. Miller55be7a92012-07-11 21:27:49 -0700493 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 case ICMP_SOURCE_QUENCH:
495 /* Just silently ignore these. */
496 goto out;
497 case ICMP_PARAMETERPROB:
498 err = EPROTO;
499 break;
500 case ICMP_DEST_UNREACH:
501 if (code > NR_ICMP_UNREACH)
502 goto out;
503
504 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000505 /* We are not interested in TCP_LISTEN and open_requests
506 * (SYN-ACKs send out by Linux are always <576bytes so
507 * they should go through unfragmented).
508 */
509 if (sk->sk_state == TCP_LISTEN)
510 goto out;
511
Eric Dumazet563d34d2012-07-23 09:48:52 +0200512 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000513 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200514 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000515 } else {
Eric Dumazet7aa54702016-12-03 11:14:57 -0800516 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
Eric Dumazet144d56e2012-08-20 00:22:46 +0000517 sock_hold(sk);
518 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 goto out;
520 }
521
522 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000523 /* check if icmp_skb allows revert of backoff
524 * (see draft-zimmermann-tcp-lcd) */
525 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
526 break;
527 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700528 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000529 break;
530
David S. Miller8f49c272010-11-12 13:35:00 -0800531 if (sock_owned_by_user(sk))
532 break;
533
Eric Dumazet2c4cc972019-02-15 13:36:21 -0800534 skb = tcp_rtx_queue_head(sk);
535 if (WARN_ON_ONCE(!skb))
536 break;
537
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000538 icsk->icsk_backoff--;
Eric Dumazetfcdd1cf2014-09-22 13:19:44 -0700539 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
540 TCP_TIMEOUT_INIT;
541 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000542
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000543
Eric Dumazet9a568de2017-05-16 14:00:14 -0700544 tcp_mstamp_refresh(tp);
Eric Dumazet2fd66ff2018-09-21 08:51:47 -0700545 delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700546 remaining = icsk->icsk_rto -
Eric Dumazet9a568de2017-05-16 14:00:14 -0700547 usecs_to_jiffies(delta_us);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000548
Eric Dumazet9a568de2017-05-16 14:00:14 -0700549 if (remaining > 0) {
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000550 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
551 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000552 } else {
553 /* RTO revert clocked out retransmission.
554 * Will retransmit now */
555 tcp_retransmit_timer(sk);
556 }
557
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 break;
559 case ICMP_TIME_EXCEEDED:
560 err = EHOSTUNREACH;
561 break;
562 default:
563 goto out;
564 }
565
566 switch (sk->sk_state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700568 case TCP_SYN_RECV:
569 /* Only in fast or simultaneous open. If a fast open socket is
570 * is already accepted it is treated as a connected one below.
571 */
Ian Morris51456b22015-04-03 09:17:26 +0100572 if (fastopen && !fastopen->sk)
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700573 break;
574
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 sk->sk_err = err;
577
578 sk->sk_error_report(sk);
579
580 tcp_done(sk);
581 } else {
582 sk->sk_err_soft = err;
583 }
584 goto out;
585 }
586
587 /* If we've already connected we will keep trying
588 * until we time out, or the user gives up.
589 *
590 * rfc1122 4.2.3.9 allows to consider as hard errors
591 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
592 * but it is obsoleted by pmtu discovery).
593 *
594 * Note, that in modern internet, where routing is unreliable
595 * and in each dark corner broken firewalls sit, sending random
596 * errors ordered by their masters even this two messages finally lose
597 * their original sense (even Linux sends invalid PORT_UNREACHs)
598 *
599 * Now we are in compliance with RFCs.
600 * --ANK (980905)
601 */
602
603 inet = inet_sk(sk);
604 if (!sock_owned_by_user(sk) && inet->recverr) {
605 sk->sk_err = err;
606 sk->sk_error_report(sk);
607 } else { /* Only an error on timeout */
608 sk->sk_err_soft = err;
609 }
610
611out:
612 bh_unlock_sock(sk);
613 sock_put(sk);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100614 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615}
616
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000617void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700619 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
Eric Dumazet98be9b12018-02-19 11:56:52 -0800621 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
622 skb->csum_start = skb_transport_header(skb) - skb->head;
623 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624}
625
Herbert Xu419f9f82010-04-11 02:15:53 +0000626/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000627void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000628{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400629 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000630
631 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
632}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000633EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000634
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635/*
636 * This routine will send an RST to the other tcp.
637 *
638 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
639 * for reset.
640 * Answer: if a packet caused RST, it is not for a socket
641 * existing in our system, if it is matched to a socket,
642 * it is just duplicate segment or bug in other side's TCP.
643 * So that we build reply only basing on parameters
644 * arrived with segment.
645 * Exception: precedence violation. We do not implement it in any case.
646 */
647
Eric Dumazeta00e7442015-09-29 07:42:39 -0700648static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400650 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800651 struct {
652 struct tcphdr th;
653#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800654 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800655#endif
656 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800658#ifdef CONFIG_TCP_MD5SIG
Florian Westphale46787f2015-12-21 21:29:25 +0100659 struct tcp_md5sig_key *key = NULL;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000660 const __u8 *hash_location = NULL;
661 unsigned char newhash[16];
662 int genhash;
663 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800664#endif
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700665 u64 transmit_time = 0;
Jon Maxwell00483692018-05-10 16:53:51 +1000666 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700667 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668
669 /* Never send a reset in response to a reset. */
670 if (th->rst)
671 return;
672
Eric Dumazetc3658e82014-11-25 07:40:04 -0800673 /* If sk not NULL, it means we did a successful lookup and incoming
674 * route had to be correct. prequeue might have dropped our dst.
675 */
676 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 return;
678
679 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800680 memset(&rep, 0, sizeof(rep));
681 rep.th.dest = th->source;
682 rep.th.source = th->dest;
683 rep.th.doff = sizeof(struct tcphdr) / 4;
684 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800687 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800689 rep.th.ack = 1;
690 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
691 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 }
693
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200694 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800695 arg.iov[0].iov_base = (unsigned char *)&rep;
696 arg.iov[0].iov_len = sizeof(rep.th);
697
Eric Dumazet0f85fea2014-12-09 09:56:08 -0800698 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800699#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700700 rcu_read_lock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000701 hash_location = tcp_parse_md5sig_option(th);
Florian Westphal271c3b92015-12-21 21:29:26 +0100702 if (sk && sk_fullsock(sk)) {
Florian Westphale46787f2015-12-21 21:29:25 +0100703 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
704 &ip_hdr(skb)->saddr, AF_INET);
705 } else if (hash_location) {
Shawn Lu658ddaa2012-01-31 22:35:48 +0000706 /*
707 * active side is lost. Try to find listening socket through
708 * source port, and then find md5 key through listening socket.
709 * we are not loose security here:
710 * Incoming packet is checked with md5 hash with finding key,
711 * no RST generated if md5 hash doesn't match.
712 */
Craig Galleka5836362016-02-10 11:50:38 -0500713 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
714 ip_hdr(skb)->saddr,
Tom Herbertda5e3632013-01-22 09:50:24 +0000715 th->source, ip_hdr(skb)->daddr,
David Ahern3fa6f612017-08-07 08:44:17 -0700716 ntohs(th->source), inet_iif(skb),
717 tcp_v4_sdif(skb));
Shawn Lu658ddaa2012-01-31 22:35:48 +0000718 /* don't send rst if it can't find key */
719 if (!sk1)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700720 goto out;
721
Shawn Lu658ddaa2012-01-31 22:35:48 +0000722 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
723 &ip_hdr(skb)->saddr, AF_INET);
724 if (!key)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700725 goto out;
726
Shawn Lu658ddaa2012-01-31 22:35:48 +0000727
Eric Dumazet39f8e582015-03-24 15:58:55 -0700728 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000729 if (genhash || memcmp(hash_location, newhash, 16) != 0)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700730 goto out;
731
Shawn Lu658ddaa2012-01-31 22:35:48 +0000732 }
733
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800734 if (key) {
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
736 (TCPOPT_NOP << 16) |
737 (TCPOPT_MD5SIG << 8) |
738 TCPOLEN_MD5SIG);
739 /* Update length and the length the header thinks exists */
740 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
741 rep.th.doff = arg.iov[0].iov_len / 4;
742
Adam Langley49a72df2008-07-19 00:01:42 -0700743 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700744 key, ip_hdr(skb)->saddr,
745 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800746 }
747#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700748 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
749 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700750 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
Florian Westphal271c3b92015-12-21 21:29:26 +0100752 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
753
Shawn Lue2446ea2012-02-04 12:38:09 +0000754 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000755 * routing might fail in this case. No choice here, if we choose to force
756 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000757 */
Song Liuc24b14c42017-10-23 09:20:24 -0700758 if (sk) {
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000759 arg.bound_dev_if = sk->sk_bound_dev_if;
Song Liu5c487bb2018-02-06 20:50:23 -0800760 if (sk_fullsock(sk))
761 trace_tcp_send_reset(sk, skb);
Song Liuc24b14c42017-10-23 09:20:24 -0700762 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Florian Westphal271c3b92015-12-21 21:29:26 +0100764 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
765 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
766
Eric Dumazet66b13d92011-10-24 03:06:21 -0400767 arg.tos = ip_hdr(skb)->tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900768 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700769 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700770 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700771 if (sk) {
Jon Maxwell00483692018-05-10 16:53:51 +1000772 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
773 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700774 transmit_time = tcp_transmit_time(sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700775 }
Jon Maxwell00483692018-05-10 16:53:51 +1000776 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800777 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700778 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700779 &arg, arg.iov[0].iov_len,
780 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
Jon Maxwell00483692018-05-10 16:53:51 +1000782 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700783 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
784 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700785 local_bh_enable();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000786
787#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700788out:
789 rcu_read_unlock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000790#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791}
792
793/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
794 outside socket context is ugly, certainly. What can I do?
795 */
796
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900797static void tcp_v4_send_ack(const struct sock *sk,
Eric Dumazete62a1232016-01-21 08:02:54 -0800798 struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000799 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700800 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400801 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400803 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 struct {
805 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800806 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800807#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800808 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800809#endif
810 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 } rep;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900812 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 struct ip_reply_arg arg;
Jon Maxwell00483692018-05-10 16:53:51 +1000814 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700815 u64 transmit_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200818 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
820 arg.iov[0].iov_base = (unsigned char *)&rep;
821 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000822 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800823 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
824 (TCPOPT_TIMESTAMP << 8) |
825 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000826 rep.opt[1] = htonl(tsval);
827 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800828 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 }
830
831 /* Swap the send and the receive. */
832 rep.th.dest = th->source;
833 rep.th.source = th->dest;
834 rep.th.doff = arg.iov[0].iov_len / 4;
835 rep.th.seq = htonl(seq);
836 rep.th.ack_seq = htonl(ack);
837 rep.th.ack = 1;
838 rep.th.window = htons(win);
839
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800840#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800841 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000842 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800843
844 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
845 (TCPOPT_NOP << 16) |
846 (TCPOPT_MD5SIG << 8) |
847 TCPOLEN_MD5SIG);
848 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
849 rep.th.doff = arg.iov[0].iov_len/4;
850
Adam Langley49a72df2008-07-19 00:01:42 -0700851 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700852 key, ip_hdr(skb)->saddr,
853 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800854 }
855#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700856 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700857 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
858 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 arg.iov[0].iov_len, IPPROTO_TCP, 0);
860 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900861 if (oif)
862 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400863 arg.tos = tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900864 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700865 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700866 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700867 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
868 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700869 transmit_time = tcp_transmit_time(sk);
Jon Maxwell00483692018-05-10 16:53:51 +1000870 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800871 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700872 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700873 &arg, arg.iov[0].iov_len,
874 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875
Jon Maxwell00483692018-05-10 16:53:51 +1000876 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700877 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700878 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879}
880
881static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
882{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700883 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800884 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900886 tcp_v4_send_ack(sk, skb,
Eric Dumazete62a1232016-01-21 08:02:54 -0800887 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200888 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700889 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900890 tcptw->tw_ts_recent,
891 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700892 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400893 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
894 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900895 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700897 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898}
899
Eric Dumazeta00e7442015-09-29 07:42:39 -0700900static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200901 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902{
Jerry Chu168a8f52012-08-31 12:29:13 +0000903 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
904 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
905 */
Eric Dumazete62a1232016-01-21 08:02:54 -0800906 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
907 tcp_sk(sk)->snd_nxt;
908
Eric Dumazet20a2b492016-08-22 11:31:10 -0700909 /* RFC 7323 2.3
910 * The window field (SEG.WND) of every outgoing segment, with the
911 * exception of <SYN> segments, MUST be right-shifted by
912 * Rcv.Wind.Shift bits:
913 */
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900914 tcp_v4_send_ack(sk, skb, seq,
Eric Dumazet20a2b492016-08-22 11:31:10 -0700915 tcp_rsk(req)->rcv_nxt,
916 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700917 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900918 req->ts_recent,
919 0,
Christoph Paasch30791ac2017-12-11 00:05:46 -0800920 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000921 AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400922 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
923 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924}
925
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800927 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700928 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 * socket.
930 */
Eric Dumazet0f935db2015-09-25 07:39:21 -0700931static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300932 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800933 struct request_sock *req,
Eric Dumazetca6fb062015-10-02 11:43:35 -0700934 struct tcp_fastopen_cookie *foc,
Eric Dumazetb3d05142016-04-13 22:05:39 -0700935 enum tcp_synack_type synack_type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700937 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400938 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800940 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700943 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800944 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
Eric Dumazetb3d05142016-04-13 22:05:39 -0700946 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
948 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700949 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700951 rcu_read_lock();
Eric Dumazet634fb9792013-10-09 15:21:29 -0700952 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
953 ireq->ir_rmt_addr,
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700954 rcu_dereference(ireq->ireq_opt));
955 rcu_read_unlock();
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200956 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 }
958
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 return err;
960}
961
962/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700963 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700965static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966{
Eric Dumazetc92e8c02017-10-20 09:04:13 -0700967 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968}
969
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800970#ifdef CONFIG_TCP_MD5SIG
971/*
972 * RFC2385 MD5 checksumming requires a mapping of
973 * IP address->MD5 Key.
974 * We need to maintain these in the sk structure.
975 */
976
Eric Dumazet921f9a02019-02-26 09:49:11 -0800977DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
Eric Dumazet6015c712018-11-27 15:03:21 -0800978EXPORT_SYMBOL(tcp_md5_needed);
979
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800980/* Find the Key structure for an address. */
Eric Dumazet6015c712018-11-27 15:03:21 -0800981struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
982 const union tcp_md5_addr *addr,
983 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800984{
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700985 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000986 struct tcp_md5sig_key *key;
Eric Dumazetfd3a1542015-03-24 15:58:56 -0700987 const struct tcp_md5sig_info *md5sig;
Ivan Delalande67973182017-06-15 18:07:06 -0700988 __be32 mask;
989 struct tcp_md5sig_key *best_match = NULL;
990 bool match;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800991
Eric Dumazeta8afca02012-01-31 18:45:40 +0000992 /* caller either holds rcu_read_lock() or socket lock */
993 md5sig = rcu_dereference_check(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200994 lockdep_sock_is_held(sk));
Eric Dumazeta8afca02012-01-31 18:45:40 +0000995 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800996 return NULL;
Arnd Bergmann083a0322017-06-20 22:11:21 +0200997
Sasha Levinb67bfe02013-02-27 17:06:00 -0800998 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000999 if (key->family != family)
1000 continue;
Ivan Delalande67973182017-06-15 18:07:06 -07001001
1002 if (family == AF_INET) {
1003 mask = inet_make_mask(key->prefixlen);
1004 match = (key->addr.a4.s_addr & mask) ==
1005 (addr->a4.s_addr & mask);
1006#if IS_ENABLED(CONFIG_IPV6)
1007 } else if (family == AF_INET6) {
1008 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1009 key->prefixlen);
1010#endif
1011 } else {
1012 match = false;
1013 }
1014
1015 if (match && (!best_match ||
1016 key->prefixlen > best_match->prefixlen))
1017 best_match = key;
1018 }
1019 return best_match;
1020}
Eric Dumazet6015c712018-11-27 15:03:21 -08001021EXPORT_SYMBOL(__tcp_md5_do_lookup);
Ivan Delalande67973182017-06-15 18:07:06 -07001022
Wu Fengguange8f37d52017-07-06 07:58:53 +08001023static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1024 const union tcp_md5_addr *addr,
1025 int family, u8 prefixlen)
Ivan Delalande67973182017-06-15 18:07:06 -07001026{
1027 const struct tcp_sock *tp = tcp_sk(sk);
1028 struct tcp_md5sig_key *key;
1029 unsigned int size = sizeof(struct in_addr);
1030 const struct tcp_md5sig_info *md5sig;
1031
1032 /* caller either holds rcu_read_lock() or socket lock */
1033 md5sig = rcu_dereference_check(tp->md5sig_info,
1034 lockdep_sock_is_held(sk));
1035 if (!md5sig)
1036 return NULL;
1037#if IS_ENABLED(CONFIG_IPV6)
1038 if (family == AF_INET6)
1039 size = sizeof(struct in6_addr);
1040#endif
1041 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1042 if (key->family != family)
1043 continue;
1044 if (!memcmp(&key->addr, addr, size) &&
1045 key->prefixlen == prefixlen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001046 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001047 }
1048 return NULL;
1049}
1050
Eric Dumazetb83e3de2015-09-25 07:39:15 -07001051struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001052 const struct sock *addr_sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001053{
Eric Dumazetb52e6922015-04-09 14:36:42 -07001054 const union tcp_md5_addr *addr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001055
Eric Dumazetb52e6922015-04-09 14:36:42 -07001056 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001057 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001058}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001059EXPORT_SYMBOL(tcp_v4_md5_lookup);
1060
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001061/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001062int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
Ivan Delalande67973182017-06-15 18:07:06 -07001063 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1064 gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001065{
1066 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001067 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001068 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001069 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001070
Ivan Delalande67973182017-06-15 18:07:06 -07001071 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001072 if (key) {
1073 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001074 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001075 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001076 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001077 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001078
Eric Dumazeta8afca02012-01-31 18:45:40 +00001079 md5sig = rcu_dereference_protected(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +02001080 lockdep_sock_is_held(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001081 if (!md5sig) {
1082 md5sig = kmalloc(sizeof(*md5sig), gfp);
1083 if (!md5sig)
1084 return -ENOMEM;
1085
1086 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1087 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001088 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001089 }
1090
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001091 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001092 if (!key)
1093 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +00001094 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001095 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001096 return -ENOMEM;
1097 }
1098
1099 memcpy(key->key, newkey, newkeylen);
1100 key->keylen = newkeylen;
1101 key->family = family;
Ivan Delalande67973182017-06-15 18:07:06 -07001102 key->prefixlen = prefixlen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001103 memcpy(&key->addr, addr,
1104 (family == AF_INET6) ? sizeof(struct in6_addr) :
1105 sizeof(struct in_addr));
1106 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001107 return 0;
1108}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001109EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001110
Ivan Delalande67973182017-06-15 18:07:06 -07001111int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
1112 u8 prefixlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001113{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001114 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001115
Ivan Delalande67973182017-06-15 18:07:06 -07001116 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001117 if (!key)
1118 return -ENOENT;
1119 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001120 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001121 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001122 return 0;
1123}
1124EXPORT_SYMBOL(tcp_md5_do_del);
1125
stephen hemmingere0683e702012-10-26 14:31:40 +00001126static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001127{
1128 struct tcp_sock *tp = tcp_sk(sk);
1129 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001130 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001131 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001132
Eric Dumazeta8afca02012-01-31 18:45:40 +00001133 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1134
Sasha Levinb67bfe02013-02-27 17:06:00 -08001135 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001136 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001137 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001138 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001139 }
1140}
1141
Ivan Delalande8917a772017-06-15 18:07:07 -07001142static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1143 char __user *optval, int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001144{
1145 struct tcp_md5sig cmd;
1146 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
Ivan Delalande8917a772017-06-15 18:07:07 -07001147 u8 prefixlen = 32;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001148
1149 if (optlen < sizeof(cmd))
1150 return -EINVAL;
1151
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001152 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001153 return -EFAULT;
1154
1155 if (sin->sin_family != AF_INET)
1156 return -EINVAL;
1157
Ivan Delalande8917a772017-06-15 18:07:07 -07001158 if (optname == TCP_MD5SIG_EXT &&
1159 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1160 prefixlen = cmd.tcpm_prefixlen;
1161 if (prefixlen > 32)
1162 return -EINVAL;
1163 }
1164
Dmitry Popov64a124e2014-08-03 22:45:19 +04001165 if (!cmd.tcpm_keylen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001166 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001167 AF_INET, prefixlen);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001168
1169 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1170 return -EINVAL;
1171
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001172 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
Ivan Delalande8917a772017-06-15 18:07:07 -07001173 AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001174 GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001175}
1176
Eric Dumazet19689e32016-06-27 18:51:53 +02001177static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1178 __be32 daddr, __be32 saddr,
1179 const struct tcphdr *th, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001180{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001181 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001182 struct scatterlist sg;
Eric Dumazet19689e32016-06-27 18:51:53 +02001183 struct tcphdr *_th;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001184
Eric Dumazet19689e32016-06-27 18:51:53 +02001185 bp = hp->scratch;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001186 bp->saddr = saddr;
1187 bp->daddr = daddr;
1188 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001189 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001190 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001191
Eric Dumazet19689e32016-06-27 18:51:53 +02001192 _th = (struct tcphdr *)(bp + 1);
1193 memcpy(_th, th, sizeof(*th));
1194 _th->check = 0;
1195
1196 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1197 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1198 sizeof(*bp) + sizeof(*th));
Herbert Xucf80e0e2016-01-24 21:20:23 +08001199 return crypto_ahash_update(hp->md5_req);
Adam Langley49a72df2008-07-19 00:01:42 -07001200}
1201
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001202static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001203 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001204{
1205 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001206 struct ahash_request *req;
Adam Langley49a72df2008-07-19 00:01:42 -07001207
1208 hp = tcp_get_md5sig_pool();
1209 if (!hp)
1210 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001211 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001212
Herbert Xucf80e0e2016-01-24 21:20:23 +08001213 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001214 goto clear_hash;
Eric Dumazet19689e32016-06-27 18:51:53 +02001215 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
Adam Langley49a72df2008-07-19 00:01:42 -07001216 goto clear_hash;
1217 if (tcp_md5_hash_key(hp, key))
1218 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001219 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1220 if (crypto_ahash_final(req))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001221 goto clear_hash;
1222
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001223 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001224 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001225
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001226clear_hash:
1227 tcp_put_md5sig_pool();
1228clear_hash_noput:
1229 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001230 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001231}
1232
Eric Dumazet39f8e582015-03-24 15:58:55 -07001233int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1234 const struct sock *sk,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001235 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236{
Adam Langley49a72df2008-07-19 00:01:42 -07001237 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001238 struct ahash_request *req;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001239 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001240 __be32 saddr, daddr;
1241
Eric Dumazet39f8e582015-03-24 15:58:55 -07001242 if (sk) { /* valid for establish/request sockets */
1243 saddr = sk->sk_rcv_saddr;
1244 daddr = sk->sk_daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001245 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001246 const struct iphdr *iph = ip_hdr(skb);
1247 saddr = iph->saddr;
1248 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001249 }
Adam Langley49a72df2008-07-19 00:01:42 -07001250
1251 hp = tcp_get_md5sig_pool();
1252 if (!hp)
1253 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001254 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001255
Herbert Xucf80e0e2016-01-24 21:20:23 +08001256 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001257 goto clear_hash;
1258
Eric Dumazet19689e32016-06-27 18:51:53 +02001259 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
Adam Langley49a72df2008-07-19 00:01:42 -07001260 goto clear_hash;
1261 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1262 goto clear_hash;
1263 if (tcp_md5_hash_key(hp, key))
1264 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001265 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1266 if (crypto_ahash_final(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001267 goto clear_hash;
1268
1269 tcp_put_md5sig_pool();
1270 return 0;
1271
1272clear_hash:
1273 tcp_put_md5sig_pool();
1274clear_hash_noput:
1275 memset(md5_hash, 0, 16);
1276 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001277}
Adam Langley49a72df2008-07-19 00:01:42 -07001278EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001279
Eric Dumazetba8e2752015-10-02 11:43:28 -07001280#endif
1281
Eric Dumazetff74e232015-03-24 15:58:54 -07001282/* Called with rcu_read_lock() */
Eric Dumazetba8e2752015-10-02 11:43:28 -07001283static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
Eric Dumazetff74e232015-03-24 15:58:54 -07001284 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001285{
Eric Dumazetba8e2752015-10-02 11:43:28 -07001286#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001287 /*
1288 * This gets called for each TCP segment that arrives
1289 * so we want to be efficient.
1290 * We have 3 drop cases:
1291 * o No MD5 hash and one expected.
1292 * o MD5 hash and we're not expecting one.
1293 * o MD5 hash and its wrong.
1294 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001295 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001296 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001297 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001298 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001299 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001300 unsigned char newhash[16];
1301
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001302 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1303 AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001304 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001305
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001306 /* We've parsed the options - do we have a hash? */
1307 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001308 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001309
1310 if (hash_expected && !hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001311 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001312 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001313 }
1314
1315 if (!hash_expected && hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001316 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001317 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001318 }
1319
1320 /* Okay, so this is hash_expected and hash_location -
1321 * so we need to calculate the checksum.
1322 */
Adam Langley49a72df2008-07-19 00:01:42 -07001323 genhash = tcp_v4_md5_hash_skb(newhash,
1324 hash_expected,
Eric Dumazet39f8e582015-03-24 15:58:55 -07001325 NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001326
1327 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Eric Dumazet72145a62016-08-24 09:01:23 -07001328 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
Joe Perchese87cc472012-05-13 21:56:26 +00001329 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1330 &iph->saddr, ntohs(th->source),
1331 &iph->daddr, ntohs(th->dest),
1332 genhash ? " tcp_v4_calc_md5_hash failed"
1333 : "");
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001334 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001335 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001336 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001337#endif
Eric Dumazetba8e2752015-10-02 11:43:28 -07001338 return false;
1339}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001340
Eric Dumazetb40cf182015-09-25 07:39:08 -07001341static void tcp_v4_init_req(struct request_sock *req,
1342 const struct sock *sk_listener,
Octavian Purdila16bea702014-06-25 17:09:53 +03001343 struct sk_buff *skb)
1344{
1345 struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001346 struct net *net = sock_net(sk_listener);
Octavian Purdila16bea702014-06-25 17:09:53 +03001347
Eric Dumazet08d2cc3b2015-03-18 14:05:38 -07001348 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1349 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001350 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
Octavian Purdila16bea702014-06-25 17:09:53 +03001351}
1352
Eric Dumazetf9646292015-09-29 07:42:50 -07001353static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1354 struct flowi *fl,
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001355 const struct request_sock *req)
Octavian Purdilad94e0412014-06-25 17:09:55 +03001356{
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001357 return inet_csk_route_req(sk, &fl->u.ip4, req);
Octavian Purdilad94e0412014-06-25 17:09:55 +03001358}
1359
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001360struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001362 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001363 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001364 .send_ack = tcp_v4_reqsk_send_ack,
1365 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001367 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368};
1369
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00001370static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001371 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001372#ifdef CONFIG_TCP_MD5SIG
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001373 .req_md5_lookup = tcp_v4_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001374 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001375#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001376 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001377#ifdef CONFIG_SYN_COOKIES
1378 .cookie_init_seq = cookie_v4_init_sequence,
1379#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001380 .route_req = tcp_v4_route_req,
Eric Dumazet84b114b2017-05-05 06:56:54 -07001381 .init_seq = tcp_v4_init_seq,
1382 .init_ts_off = tcp_v4_init_ts_off,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001383 .send_synack = tcp_v4_send_synack,
Octavian Purdila16bea702014-06-25 17:09:53 +03001384};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001385
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1387{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001389 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 goto drop;
1391
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001392 return tcp_conn_request(&tcp_request_sock_ops,
1393 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395drop:
Eric Dumazet9caad862016-04-01 08:52:20 -07001396 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 return 0;
1398}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001399EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
1401
1402/*
1403 * The three way handshake has completed - we got a valid synack -
1404 * now create the new socket.
1405 */
Eric Dumazet0c271712015-09-29 07:42:48 -07001406struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001407 struct request_sock *req,
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001408 struct dst_entry *dst,
1409 struct request_sock *req_unhash,
1410 bool *own_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001412 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 struct inet_sock *newinet;
1414 struct tcp_sock *newtp;
1415 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001416#ifdef CONFIG_TCP_MD5SIG
1417 struct tcp_md5sig_key *key;
1418#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001419 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
1421 if (sk_acceptq_is_full(sk))
1422 goto exit_overflow;
1423
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 newsk = tcp_create_openreq_child(sk, req, skb);
1425 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001426 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
Herbert Xubcd76112006-06-30 13:36:35 -07001428 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001429 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
1431 newtp = tcp_sk(newsk);
1432 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001433 ireq = inet_rsk(req);
Eric Dumazetd1e559d2015-03-18 14:05:35 -07001434 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1435 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
David Ahern6dd9a142015-12-16 13:20:44 -08001436 newsk->sk_bound_dev_if = ireq->ir_iif;
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001437 newinet->inet_saddr = ireq->ir_loc_addr;
1438 inet_opt = rcu_dereference(ireq->ireq_opt);
1439 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001440 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001441 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001442 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001443 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001444 if (inet_opt)
1445 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001446 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001448 if (!dst) {
1449 dst = inet_csk_route_child_sock(sk, newsk, req);
1450 if (!dst)
1451 goto put_and_exit;
1452 } else {
1453 /* syncookie case : see end of cookie_v4_check() */
1454 }
David S. Miller0e734412011-05-08 15:28:03 -07001455 sk_setup_caps(newsk, dst);
1456
Daniel Borkmann81164412015-01-05 23:57:48 +01001457 tcp_ca_openreq_child(newsk, dst);
1458
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 tcp_sync_mss(newsk, dst_mtu(dst));
Eric Dumazet3541f9e2017-02-02 08:04:56 -08001460 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001461
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 tcp_initialize_rcv_mss(newsk);
1463
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001464#ifdef CONFIG_TCP_MD5SIG
1465 /* Copy over the MD5 key from the original socket */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001466 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1467 AF_INET);
Ian Morris00db4122015-04-03 09:17:27 +01001468 if (key) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001469 /*
1470 * We're using one, so create a matching key
1471 * on the newsk structure. If we fail to get
1472 * memory, then we end up not copying the key
1473 * across. Shucks.
1474 */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001475 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
Ivan Delalande67973182017-06-15 18:07:06 -07001476 AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001477 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001478 }
1479#endif
1480
David S. Miller0e734412011-05-08 15:28:03 -07001481 if (__inet_inherit_port(sk, newsk) < 0)
1482 goto put_and_exit;
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001483 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001484 if (likely(*own_req)) {
Eric Dumazet49a496c2015-11-05 12:50:19 -08001485 tcp_move_syn(newtp, req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001486 ireq->ireq_opt = NULL;
1487 } else {
1488 newinet->inet_opt = NULL;
1489 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 return newsk;
1491
1492exit_overflow:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001493 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001494exit_nonewsk:
1495 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496exit:
Eric Dumazet9caad862016-04-01 08:52:20 -07001497 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001499put_and_exit:
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001500 newinet->inet_opt = NULL;
Christoph Paasche337e242012-12-14 04:07:58 +00001501 inet_csk_prepare_forced_close(newsk);
1502 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001503 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001505EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
Eric Dumazet079096f2015-10-02 11:43:32 -07001507static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509#ifdef CONFIG_SYN_COOKIES
Eric Dumazet079096f2015-10-02 11:43:32 -07001510 const struct tcphdr *th = tcp_hdr(skb);
1511
Florian Westphalaf9b4732010-06-03 00:43:44 +00001512 if (!th->syn)
Cong Wang461b74c2014-10-15 14:33:22 -07001513 sk = cookie_v4_check(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514#endif
1515 return sk;
1516}
1517
Petar Penkov9349d602019-07-29 09:59:14 -07001518u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
1519 struct tcphdr *th, u32 *cookie)
1520{
1521 u16 mss = 0;
1522#ifdef CONFIG_SYN_COOKIES
1523 mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
1524 &tcp_request_sock_ipv4_ops, sk, th);
1525 if (mss) {
1526 *cookie = __cookie_v4_init_sequence(iph, th, &mss);
1527 tcp_synq_overflow(sk);
1528 }
1529#endif
1530 return mss;
1531}
1532
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533/* The socket must have it's spinlock held when we get
Eric Dumazete994b2f2015-10-02 11:43:39 -07001534 * here, unless it is a TCP_LISTEN socket.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 *
1536 * We have a potential double-lock case here, so even when
1537 * doing backlog processing we use the BH locking scheme.
1538 * This is because we cannot sleep with the original spinlock
1539 * held.
1540 */
1541int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1542{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001543 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001544
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001546 struct dst_entry *dst = sk->sk_rx_dst;
1547
Tom Herbertbdeab992011-08-14 19:45:55 +00001548 sock_rps_save_rxhash(sk, skb);
Eric Dumazet3d973792014-11-11 05:54:27 -08001549 sk_mark_napi_id(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001550 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001551 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
Ian Morris51456b22015-04-03 09:17:26 +01001552 !dst->ops->check(dst, 0)) {
David S. Miller92101b32012-07-23 16:29:00 -07001553 dst_release(dst);
1554 sk->sk_rx_dst = NULL;
1555 }
1556 }
Yafang Shao3d97d882018-05-29 23:27:31 +08001557 tcp_rcv_established(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 return 0;
1559 }
1560
Eric Dumazet12e25e12015-06-03 23:49:21 -07001561 if (tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 goto csum_err;
1563
1564 if (sk->sk_state == TCP_LISTEN) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001565 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1566
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 if (!nsk)
1568 goto discard;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001570 if (tcp_child_process(sk, nsk, skb)) {
1571 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001573 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 return 0;
1575 }
Eric Dumazetca551582010-06-03 09:03:58 +00001576 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001577 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001578
Eric Dumazet72ab4a82015-09-29 07:42:41 -07001579 if (tcp_rcv_state_process(sk, skb)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001580 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001582 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 return 0;
1584
1585reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001586 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587discard:
1588 kfree_skb(skb);
1589 /* Be careful here. If this function gets more complicated and
1590 * gcc suffers from register pressure on the x86, sk (in %ebx)
1591 * might be destroyed here. This current version compiles correctly,
1592 * but you have been warned.
1593 */
1594 return 0;
1595
1596csum_err:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001597 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1598 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 goto discard;
1600}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001601EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602
Paolo Abeni74874492017-09-28 15:51:36 +02001603int tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001604{
David S. Miller41063e92012-06-19 21:22:05 -07001605 const struct iphdr *iph;
1606 const struct tcphdr *th;
1607 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001608
David S. Miller41063e92012-06-19 21:22:05 -07001609 if (skb->pkt_type != PACKET_HOST)
Paolo Abeni74874492017-09-28 15:51:36 +02001610 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001611
Eric Dumazet45f00f92012-10-22 21:42:47 +00001612 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
Paolo Abeni74874492017-09-28 15:51:36 +02001613 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001614
1615 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001616 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001617
1618 if (th->doff < sizeof(struct tcphdr) / 4)
Paolo Abeni74874492017-09-28 15:51:36 +02001619 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001620
Eric Dumazet45f00f92012-10-22 21:42:47 +00001621 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001622 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001623 iph->daddr, ntohs(th->dest),
David Ahern3fa6f612017-08-07 08:44:17 -07001624 skb->skb_iif, inet_sdif(skb));
David S. Miller41063e92012-06-19 21:22:05 -07001625 if (sk) {
1626 skb->sk = sk;
1627 skb->destructor = sock_edemux;
Eric Dumazetf7e4eb02015-03-15 21:12:13 -07001628 if (sk_fullsock(sk)) {
Michal Kubečekd0c294c2015-03-23 15:14:00 +01001629 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001630
David S. Miller41063e92012-06-19 21:22:05 -07001631 if (dst)
1632 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001633 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001634 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001635 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001636 }
1637 }
Paolo Abeni74874492017-09-28 15:51:36 +02001638 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001639}
1640
Eric Dumazetc9c33212016-08-27 07:37:54 -07001641bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1642{
1643 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
Eric Dumazet4f693b52018-11-27 14:42:03 -08001644 struct skb_shared_info *shinfo;
1645 const struct tcphdr *th;
1646 struct tcphdr *thtail;
1647 struct sk_buff *tail;
1648 unsigned int hdrlen;
1649 bool fragstolen;
1650 u32 gso_segs;
1651 int delta;
Eric Dumazetc9c33212016-08-27 07:37:54 -07001652
1653 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1654 * we can fix skb->truesize to its real value to avoid future drops.
1655 * This is valid because skb is not yet charged to the socket.
1656 * It has been noticed pure SACK packets were sometimes dropped
1657 * (if cooked by drivers without copybreak feature).
1658 */
Eric Dumazet60b1af32017-01-24 14:57:36 -08001659 skb_condense(skb);
Eric Dumazetc9c33212016-08-27 07:37:54 -07001660
Eric Dumazetade96282018-11-19 17:45:55 -08001661 skb_dst_drop(skb);
1662
Eric Dumazet4f693b52018-11-27 14:42:03 -08001663 if (unlikely(tcp_checksum_complete(skb))) {
1664 bh_unlock_sock(sk);
1665 __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1666 __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1667 return true;
1668 }
1669
1670 /* Attempt coalescing to last skb in backlog, even if we are
1671 * above the limits.
1672 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
1673 */
1674 th = (const struct tcphdr *)skb->data;
1675 hdrlen = th->doff * 4;
1676 shinfo = skb_shinfo(skb);
1677
1678 if (!shinfo->gso_size)
1679 shinfo->gso_size = skb->len - hdrlen;
1680
1681 if (!shinfo->gso_segs)
1682 shinfo->gso_segs = 1;
1683
1684 tail = sk->sk_backlog.tail;
1685 if (!tail)
1686 goto no_coalesce;
1687 thtail = (struct tcphdr *)tail->data;
1688
1689 if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
1690 TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
1691 ((TCP_SKB_CB(tail)->tcp_flags |
Eric Dumazetca2fe292019-04-26 10:10:05 -07001692 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
1693 !((TCP_SKB_CB(tail)->tcp_flags &
1694 TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
Eric Dumazet4f693b52018-11-27 14:42:03 -08001695 ((TCP_SKB_CB(tail)->tcp_flags ^
1696 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
1697#ifdef CONFIG_TLS_DEVICE
1698 tail->decrypted != skb->decrypted ||
1699#endif
1700 thtail->doff != th->doff ||
1701 memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
1702 goto no_coalesce;
1703
1704 __skb_pull(skb, hdrlen);
1705 if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
1706 thtail->window = th->window;
1707
1708 TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
1709
1710 if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
1711 TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
1712
Eric Dumazetca2fe292019-04-26 10:10:05 -07001713 /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
1714 * thtail->fin, so that the fast path in tcp_rcv_established()
1715 * is not entered if we append a packet with a FIN.
1716 * SYN, RST, URG are not present.
1717 * ACK is set on both packets.
1718 * PSH : we do not really care in TCP stack,
1719 * at least for 'GRO' packets.
1720 */
1721 thtail->fin |= th->fin;
Eric Dumazet4f693b52018-11-27 14:42:03 -08001722 TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1723
1724 if (TCP_SKB_CB(skb)->has_rxtstamp) {
1725 TCP_SKB_CB(tail)->has_rxtstamp = true;
1726 tail->tstamp = skb->tstamp;
1727 skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
1728 }
1729
1730 /* Not as strict as GRO. We only need to carry mss max value */
1731 skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
1732 skb_shinfo(tail)->gso_size);
1733
1734 gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
1735 skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
1736
1737 sk->sk_backlog.len += delta;
1738 __NET_INC_STATS(sock_net(sk),
1739 LINUX_MIB_TCPBACKLOGCOALESCE);
1740 kfree_skb_partial(skb, fragstolen);
1741 return false;
1742 }
1743 __skb_push(skb, hdrlen);
1744
1745no_coalesce:
1746 /* Only socket owner can try to collapse/prune rx queues
1747 * to reduce memory overhead, so add a little headroom here.
1748 * Few sockets backlog are possibly concurrently non empty.
1749 */
1750 limit += 64*1024;
1751
Eric Dumazetc9c33212016-08-27 07:37:54 -07001752 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1753 bh_unlock_sock(sk);
1754 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1755 return true;
1756 }
1757 return false;
1758}
1759EXPORT_SYMBOL(tcp_add_backlog);
1760
Eric Dumazetac6e7802016-11-10 13:12:35 -08001761int tcp_filter(struct sock *sk, struct sk_buff *skb)
1762{
1763 struct tcphdr *th = (struct tcphdr *)skb->data;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001764
Christoph Paaschf2feaef2019-03-11 11:41:05 -07001765 return sk_filter_trim_cap(sk, skb, th->doff * 4);
Eric Dumazetac6e7802016-11-10 13:12:35 -08001766}
1767EXPORT_SYMBOL(tcp_filter);
1768
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001769static void tcp_v4_restore_cb(struct sk_buff *skb)
1770{
1771 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1772 sizeof(struct inet_skb_parm));
1773}
1774
1775static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1776 const struct tcphdr *th)
1777{
1778 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1779 * barrier() makes sure compiler wont play fool^Waliasing games.
1780 */
1781 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1782 sizeof(struct inet_skb_parm));
1783 barrier();
1784
1785 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1786 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1787 skb->len - th->doff * 4);
1788 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1789 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1790 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1791 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1792 TCP_SKB_CB(skb)->sacked = 0;
1793 TCP_SKB_CB(skb)->has_rxtstamp =
1794 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1795}
1796
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797/*
1798 * From tcp_input.c
1799 */
1800
1801int tcp_v4_rcv(struct sk_buff *skb)
1802{
Eric Dumazet3b24d852016-04-01 08:52:17 -07001803 struct net *net = dev_net(skb->dev);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001804 struct sk_buff *skb_to_free;
David Ahern3fa6f612017-08-07 08:44:17 -07001805 int sdif = inet_sdif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001806 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001807 const struct tcphdr *th;
Eric Dumazet3b24d852016-04-01 08:52:17 -07001808 bool refcounted;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 struct sock *sk;
1810 int ret;
1811
1812 if (skb->pkt_type != PACKET_HOST)
1813 goto discard_it;
1814
1815 /* Count it even if it's bad */
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001816 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817
1818 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1819 goto discard_it;
1820
Eric Dumazetea1627c2016-05-13 09:16:40 -07001821 th = (const struct tcphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
Eric Dumazetea1627c2016-05-13 09:16:40 -07001823 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 goto bad_packet;
1825 if (!pskb_may_pull(skb, th->doff * 4))
1826 goto discard_it;
1827
1828 /* An explanation is required here, I think.
1829 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001830 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001832
1833 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001834 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835
Eric Dumazetea1627c2016-05-13 09:16:40 -07001836 th = (const struct tcphdr *)skb->data;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001837 iph = ip_hdr(skb);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001838lookup:
Craig Galleka5836362016-02-10 11:50:38 -05001839 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
David Ahern3fa6f612017-08-07 08:44:17 -07001840 th->dest, sdif, &refcounted);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 if (!sk)
1842 goto no_tcp_socket;
1843
Eric Dumazetbb134d52010-03-09 05:55:56 +00001844process:
1845 if (sk->sk_state == TCP_TIME_WAIT)
1846 goto do_time_wait;
1847
Eric Dumazet079096f2015-10-02 11:43:32 -07001848 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1849 struct request_sock *req = inet_reqsk(sk);
Eric Dumazete0f97592018-02-13 06:14:12 -08001850 bool req_stolen = false;
Eric Dumazet77166822016-02-18 05:39:18 -08001851 struct sock *nsk;
Eric Dumazet079096f2015-10-02 11:43:32 -07001852
1853 sk = req->rsk_listener;
Eric Dumazet72923552016-02-11 22:50:29 -08001854 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
Eric Dumazete65c3322016-08-24 08:50:24 -07001855 sk_drops_add(sk, skb);
Eric Dumazet72923552016-02-11 22:50:29 -08001856 reqsk_put(req);
1857 goto discard_it;
1858 }
Frank van der Linden4fd44a92018-06-12 23:09:37 +00001859 if (tcp_checksum_complete(skb)) {
1860 reqsk_put(req);
1861 goto csum_error;
1862 }
Eric Dumazet77166822016-02-18 05:39:18 -08001863 if (unlikely(sk->sk_state != TCP_LISTEN)) {
Eric Dumazetf03f2e12015-10-14 11:16:27 -07001864 inet_csk_reqsk_queue_drop_and_put(sk, req);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001865 goto lookup;
1866 }
Eric Dumazet3b24d852016-04-01 08:52:17 -07001867 /* We own a reference on the listener, increase it again
1868 * as we might lose it too soon.
1869 */
Eric Dumazet77166822016-02-18 05:39:18 -08001870 sock_hold(sk);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001871 refcounted = true;
Eric Dumazet1f3b3592017-09-08 12:44:47 -07001872 nsk = NULL;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001873 if (!tcp_filter(sk, skb)) {
1874 th = (const struct tcphdr *)skb->data;
1875 iph = ip_hdr(skb);
1876 tcp_v4_fill_cb(skb, iph, th);
Eric Dumazete0f97592018-02-13 06:14:12 -08001877 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001878 }
Eric Dumazet079096f2015-10-02 11:43:32 -07001879 if (!nsk) {
1880 reqsk_put(req);
Eric Dumazete0f97592018-02-13 06:14:12 -08001881 if (req_stolen) {
1882 /* Another cpu got exclusive access to req
1883 * and created a full blown socket.
1884 * Try to feed this packet to this socket
1885 * instead of discarding it.
1886 */
1887 tcp_v4_restore_cb(skb);
1888 sock_put(sk);
1889 goto lookup;
1890 }
Eric Dumazet77166822016-02-18 05:39:18 -08001891 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001892 }
1893 if (nsk == sk) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001894 reqsk_put(req);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001895 tcp_v4_restore_cb(skb);
Eric Dumazet079096f2015-10-02 11:43:32 -07001896 } else if (tcp_child_process(sk, nsk, skb)) {
1897 tcp_v4_send_reset(nsk, skb);
Eric Dumazet77166822016-02-18 05:39:18 -08001898 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001899 } else {
Eric Dumazet77166822016-02-18 05:39:18 -08001900 sock_put(sk);
Eric Dumazet079096f2015-10-02 11:43:32 -07001901 return 0;
1902 }
1903 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001904 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -07001905 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001906 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001907 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001908
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1910 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001911
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001912 if (tcp_v4_inbound_md5_hash(sk, skb))
1913 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001914
Patrick McHardyb59c2702006-01-06 23:06:10 -08001915 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916
Eric Dumazetac6e7802016-11-10 13:12:35 -08001917 if (tcp_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 goto discard_and_relse;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001919 th = (const struct tcphdr *)skb->data;
1920 iph = ip_hdr(skb);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001921 tcp_v4_fill_cb(skb, iph, th);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
1923 skb->dev = NULL;
1924
Eric Dumazete994b2f2015-10-02 11:43:39 -07001925 if (sk->sk_state == TCP_LISTEN) {
1926 ret = tcp_v4_do_rcv(sk, skb);
1927 goto put_and_return;
1928 }
1929
1930 sk_incoming_cpu_update(sk);
1931
Ingo Molnarc6366182006-07-03 00:25:13 -07001932 bh_lock_sock_nested(sk);
Martin KaFai Laua44d6ea2016-03-14 10:52:15 -07001933 tcp_segs_in(tcp_sk(sk), skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 ret = 0;
1935 if (!sock_owned_by_user(sk)) {
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001936 skb_to_free = sk->sk_rx_skb_cache;
1937 sk->sk_rx_skb_cache = NULL;
Florian Westphale7942d02017-07-30 03:57:18 +02001938 ret = tcp_v4_do_rcv(sk, skb);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001939 } else {
1940 if (tcp_add_backlog(sk, skb))
1941 goto discard_and_relse;
1942 skb_to_free = NULL;
Zhu Yi6b03a532010-03-04 18:01:41 +00001943 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 bh_unlock_sock(sk);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001945 if (skb_to_free)
1946 __kfree_skb(skb_to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947
Eric Dumazete994b2f2015-10-02 11:43:39 -07001948put_and_return:
Eric Dumazet3b24d852016-04-01 08:52:17 -07001949 if (refcounted)
1950 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951
1952 return ret;
1953
1954no_tcp_socket:
1955 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1956 goto discard_it;
1957
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001958 tcp_v4_fill_cb(skb, iph, th);
1959
Eric Dumazet12e25e12015-06-03 23:49:21 -07001960 if (tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001961csum_error:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001962 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963bad_packet:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001964 __TCP_INC_STATS(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001966 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 }
1968
1969discard_it:
1970 /* Discard frame. */
1971 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001972 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
1974discard_and_relse:
Eric Dumazet532182c2016-04-01 08:52:19 -07001975 sk_drops_add(sk, skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001976 if (refcounted)
1977 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 goto discard_it;
1979
1980do_time_wait:
1981 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001982 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 goto discard_it;
1984 }
1985
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001986 tcp_v4_fill_cb(skb, iph, th);
1987
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001988 if (tcp_checksum_complete(skb)) {
1989 inet_twsk_put(inet_twsk(sk));
1990 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001992 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001994 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Craig Galleka5836362016-02-10 11:50:38 -05001995 &tcp_hashinfo, skb,
1996 __tcp_hdrlen(th),
Tom Herbertda5e3632013-01-22 09:50:24 +00001997 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001998 iph->daddr, th->dest,
David Ahern3fa6f612017-08-07 08:44:17 -07001999 inet_iif(skb),
2000 sdif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 if (sk2) {
Eric Dumazetdbe7faa2015-07-08 14:28:30 -07002002 inet_twsk_deschedule_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 sk = sk2;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08002004 tcp_v4_restore_cb(skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002005 refcounted = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 goto process;
2007 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 }
Gustavo A. R. Silvafcfd6df2017-10-16 15:48:55 -05002009 /* to ACK */
2010 /* fall through */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 case TCP_TW_ACK:
2012 tcp_v4_timewait_ack(sk, skb);
2013 break;
2014 case TCP_TW_RST:
Florian Westphal271c3b92015-12-21 21:29:26 +01002015 tcp_v4_send_reset(sk, skb);
2016 inet_twsk_deschedule_put(inet_twsk(sk));
2017 goto discard_it;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 case TCP_TW_SUCCESS:;
2019 }
2020 goto discard_it;
2021}
2022
David S. Millerccb7c412010-12-01 18:09:13 -08002023static struct timewait_sock_ops tcp_timewait_sock_ops = {
2024 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2025 .twsk_unique = tcp_twsk_unique,
2026 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08002027};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028
Eric Dumazet63d02d12012-08-09 14:11:00 +00002029void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00002030{
2031 struct dst_entry *dst = skb_dst(skb);
2032
Eric Dumazet5037e9e2015-12-14 14:08:53 -08002033 if (dst && dst_hold_safe(dst)) {
Eric Dumazetca777ef2014-09-08 08:06:07 -07002034 sk->sk_rx_dst = dst;
2035 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2036 }
Eric Dumazet5d299f32012-08-06 05:09:33 +00002037}
Eric Dumazet63d02d12012-08-09 14:11:00 +00002038EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00002039
Stephen Hemminger3b401a82009-09-01 19:25:04 +00002040const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002041 .queue_xmit = ip_queue_xmit,
2042 .send_check = tcp_v4_send_check,
2043 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00002044 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002045 .conn_request = tcp_v4_conn_request,
2046 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002047 .net_header_len = sizeof(struct iphdr),
2048 .setsockopt = ip_setsockopt,
2049 .getsockopt = ip_getsockopt,
2050 .addr2sockaddr = inet_csk_addr2sockaddr,
2051 .sockaddr_len = sizeof(struct sockaddr_in),
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002052#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002053 .compat_setsockopt = compat_ip_setsockopt,
2054 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002055#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04002056 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002058EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002060#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00002061static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002062 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07002063 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002064 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002065};
Andrew Mortonb6332e62006-11-30 19:16:28 -08002066#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002067
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068/* NOTE: A lot of things set to zero explicitly by call to
2069 * sk_alloc() so need not be done here.
2070 */
2071static int tcp_v4_init_sock(struct sock *sk)
2072{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002073 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
Neal Cardwell900f65d2012-04-19 09:55:21 +00002075 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08002077 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00002078
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002079#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04002080 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002081#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 return 0;
2084}
2085
Brian Haley7d06b2e2008-06-14 17:04:49 -07002086void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087{
2088 struct tcp_sock *tp = tcp_sk(sk);
2089
Song Liue1a4aa52017-10-23 09:20:26 -07002090 trace_tcp_destroy_sock(sk);
2091
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 tcp_clear_xmit_timers(sk);
2093
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002094 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07002095
Dave Watson734942c2017-06-14 11:37:14 -07002096 tcp_cleanup_ulp(sk);
2097
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08002099 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100
Wei Wangcf1ef3f2017-04-20 14:45:46 -07002101 /* Check if we want to disable active TFO */
2102 tcp_fastopen_active_disable_ofo_check(sk);
2103
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 /* Cleans up our, hopefully empty, out_of_order_queue. */
Yaogong Wang9f5afea2016-09-07 14:49:28 -07002105 skb_rbtree_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002107#ifdef CONFIG_TCP_MD5SIG
2108 /* Clean up the MD5 key list, if any */
2109 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00002110 tcp_clear_md5_list(sk);
Mat Martineaufb7df5e2017-12-21 10:29:10 -08002111 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002112 tp->md5sig_info = NULL;
2113 }
2114#endif
2115
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002117 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002118 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119
Ian Morris00db4122015-04-03 09:17:27 +01002120 BUG_ON(tp->fastopen_rsk);
William Allen Simpson435cf552009-12-02 18:17:05 +00002121
Yuchung Chengcf60af02012-07-19 06:43:09 +00002122 /* If socket is aborted during connect operation */
2123 tcp_free_fastopen_req(tp);
Yuchung Cheng1fba70e2017-10-18 11:22:51 -07002124 tcp_fastopen_destroy_cipher(sk);
Eric Dumazetcd8ae852015-05-03 21:34:46 -07002125 tcp_saved_syn_free(tp);
Yuchung Chengcf60af02012-07-19 06:43:09 +00002126
Glauber Costa180d8cd2011-12-11 21:47:02 +00002127 sk_sockets_allocated_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129EXPORT_SYMBOL(tcp_v4_destroy_sock);
2130
2131#ifdef CONFIG_PROC_FS
2132/* Proc filesystem TCP sock list dumping. */
2133
Tom Herberta8b690f2010-06-07 00:43:42 -07002134/*
2135 * Get next listener socket follow cur. If cur is NULL, get first socket
2136 * starting from bucket given in st->bucket; when st->bucket is zero the
2137 * very first socket in the hash table is returned.
2138 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139static void *listening_get_next(struct seq_file *seq, void *cur)
2140{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002141 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002142 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002143 struct net *net = seq_file_net(seq);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002144 struct inet_listen_hashbucket *ilb;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002145 struct sock *sk = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146
2147 if (!sk) {
Eric Dumazet3b24d852016-04-01 08:52:17 -07002148get_head:
Tom Herberta8b690f2010-06-07 00:43:42 -07002149 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet9652dc22016-10-19 21:24:58 -07002150 spin_lock(&ilb->lock);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002151 sk = sk_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07002152 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 goto get_sk;
2154 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002155 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002157 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158
Eric Dumazet3b24d852016-04-01 08:52:17 -07002159 sk = sk_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160get_sk:
Eric Dumazet3b24d852016-04-01 08:52:17 -07002161 sk_for_each_from(sk) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002162 if (!net_eq(sock_net(sk), net))
2163 continue;
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002164 if (sk->sk_family == afinfo->family)
Eric Dumazet3b24d852016-04-01 08:52:17 -07002165 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 }
Eric Dumazet9652dc22016-10-19 21:24:58 -07002167 spin_unlock(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002168 st->offset = 0;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002169 if (++st->bucket < INET_LHTABLE_SIZE)
2170 goto get_head;
2171 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172}
2173
2174static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2175{
Tom Herberta8b690f2010-06-07 00:43:42 -07002176 struct tcp_iter_state *st = seq->private;
2177 void *rc;
2178
2179 st->bucket = 0;
2180 st->offset = 0;
2181 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182
2183 while (rc && *pos) {
2184 rc = listening_get_next(seq, rc);
2185 --*pos;
2186 }
2187 return rc;
2188}
2189
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002190static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07002191{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002192 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002193}
2194
Tom Herberta8b690f2010-06-07 00:43:42 -07002195/*
2196 * Get first established socket starting from bucket given in st->bucket.
2197 * If st->bucket is zero, the very first socket in the hash is returned.
2198 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199static void *established_get_first(struct seq_file *seq)
2200{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002201 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002202 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002203 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 void *rc = NULL;
2205
Tom Herberta8b690f2010-06-07 00:43:42 -07002206 st->offset = 0;
2207 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002209 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002210 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211
Andi Kleen6eac5602008-08-28 01:08:02 -07002212 /* Lockless fast path for the common case of empty buckets */
2213 if (empty_bucket(st))
2214 continue;
2215
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002216 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002217 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002218 if (sk->sk_family != afinfo->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002219 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 continue;
2221 }
2222 rc = sk;
2223 goto out;
2224 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002225 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 }
2227out:
2228 return rc;
2229}
2230
2231static void *established_get_next(struct seq_file *seq, void *cur)
2232{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002233 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002235 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002236 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002237 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002240 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002242 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002244 sk_nulls_for_each_from(sk, node) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002245 if (sk->sk_family == afinfo->family &&
2246 net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002247 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 }
2249
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002250 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2251 ++st->bucket;
2252 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253}
2254
2255static void *established_get_idx(struct seq_file *seq, loff_t pos)
2256{
Tom Herberta8b690f2010-06-07 00:43:42 -07002257 struct tcp_iter_state *st = seq->private;
2258 void *rc;
2259
2260 st->bucket = 0;
2261 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
2263 while (rc && pos) {
2264 rc = established_get_next(seq, rc);
2265 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002266 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 return rc;
2268}
2269
2270static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2271{
2272 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002273 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 st->state = TCP_SEQ_STATE_LISTENING;
2276 rc = listening_get_idx(seq, &pos);
2277
2278 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 st->state = TCP_SEQ_STATE_ESTABLISHED;
2280 rc = established_get_idx(seq, pos);
2281 }
2282
2283 return rc;
2284}
2285
Tom Herberta8b690f2010-06-07 00:43:42 -07002286static void *tcp_seek_last_pos(struct seq_file *seq)
2287{
2288 struct tcp_iter_state *st = seq->private;
2289 int offset = st->offset;
2290 int orig_num = st->num;
2291 void *rc = NULL;
2292
2293 switch (st->state) {
Tom Herberta8b690f2010-06-07 00:43:42 -07002294 case TCP_SEQ_STATE_LISTENING:
2295 if (st->bucket >= INET_LHTABLE_SIZE)
2296 break;
2297 st->state = TCP_SEQ_STATE_LISTENING;
2298 rc = listening_get_next(seq, NULL);
2299 while (offset-- && rc)
2300 rc = listening_get_next(seq, rc);
2301 if (rc)
2302 break;
2303 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002304 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002305 /* Fallthrough */
2306 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002307 if (st->bucket > tcp_hashinfo.ehash_mask)
2308 break;
2309 rc = established_get_first(seq);
2310 while (offset-- && rc)
2311 rc = established_get_next(seq, rc);
2312 }
2313
2314 st->num = orig_num;
2315
2316 return rc;
2317}
2318
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002319void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320{
Jianjun Kong5799de02008-11-03 02:49:10 -08002321 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002322 void *rc;
2323
2324 if (*pos && *pos == st->last_pos) {
2325 rc = tcp_seek_last_pos(seq);
2326 if (rc)
2327 goto out;
2328 }
2329
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 st->state = TCP_SEQ_STATE_LISTENING;
2331 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002332 st->bucket = 0;
2333 st->offset = 0;
2334 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2335
2336out:
2337 st->last_pos = *pos;
2338 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002340EXPORT_SYMBOL(tcp_seq_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002342void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343{
Tom Herberta8b690f2010-06-07 00:43:42 -07002344 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346
2347 if (v == SEQ_START_TOKEN) {
2348 rc = tcp_get_idx(seq, 0);
2349 goto out;
2350 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351
2352 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 case TCP_SEQ_STATE_LISTENING:
2354 rc = listening_get_next(seq, v);
2355 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002357 st->bucket = 0;
2358 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 rc = established_get_first(seq);
2360 }
2361 break;
2362 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 rc = established_get_next(seq, v);
2364 break;
2365 }
2366out:
2367 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002368 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 return rc;
2370}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002371EXPORT_SYMBOL(tcp_seq_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002373void tcp_seq_stop(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374{
Jianjun Kong5799de02008-11-03 02:49:10 -08002375 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376
2377 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 case TCP_SEQ_STATE_LISTENING:
2379 if (v != SEQ_START_TOKEN)
Eric Dumazet9652dc22016-10-19 21:24:58 -07002380 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 case TCP_SEQ_STATE_ESTABLISHED:
2383 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002384 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 break;
2386 }
2387}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002388EXPORT_SYMBOL(tcp_seq_stop);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
Eric Dumazetd4f06872015-03-12 16:44:09 -07002390static void get_openreq4(const struct request_sock *req,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002391 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002393 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetfa76ce732015-03-19 19:04:20 -07002394 long delta = req->rsk_timer.expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002396 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002397 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002399 ireq->ir_loc_addr,
Eric Dumazetd4f06872015-03-12 16:44:09 -07002400 ireq->ir_num,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002401 ireq->ir_rmt_addr,
2402 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 TCP_SYN_RECV,
2404 0, 0, /* could print option size, but that is af dependent. */
2405 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002406 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002407 req->num_timeout,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002408 from_kuid_munged(seq_user_ns(f),
2409 sock_i_uid(req->rsk_listener)),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 0, /* non standard timer */
2411 0, /* open_requests have no inode */
Eric Dumazetd4f06872015-03-12 16:44:09 -07002412 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002413 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414}
2415
Tetsuo Handa652586d2013-11-14 14:31:57 -08002416static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417{
2418 int timer_active;
2419 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002420 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002421 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002422 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet0536fcc2015-09-29 07:42:52 -07002423 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002424 __be32 dest = inet->inet_daddr;
2425 __be32 src = inet->inet_rcv_saddr;
2426 __u16 destp = ntohs(inet->inet_dport);
2427 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002428 int rx_queue;
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002429 int state;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002431 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
Yuchung Cheng57dde7f2017-01-12 22:11:33 -08002432 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002433 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002435 timer_expires = icsk->icsk_timeout;
2436 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002438 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002439 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002441 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 } else {
2443 timer_active = 0;
2444 timer_expires = jiffies;
2445 }
2446
Yafang Shao986ffdf2017-12-20 11:12:52 +08002447 state = inet_sk_state_load(sk);
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002448 if (state == TCP_LISTEN)
Eric Dumazet49d09002009-12-03 16:06:13 -08002449 rx_queue = sk->sk_ack_backlog;
2450 else
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002451 /* Because we don't lock the socket,
2452 * we might find a transient negative value.
Eric Dumazet49d09002009-12-03 16:06:13 -08002453 */
2454 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2455
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002456 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002457 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002458 i, src, srcp, dest, destp, state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002459 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002460 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002462 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002463 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002464 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002465 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002466 sock_i_ino(sk),
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002467 refcount_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002468 jiffies_to_clock_t(icsk->icsk_rto),
2469 jiffies_to_clock_t(icsk->icsk_ack.ato),
Wei Wang31954cd2019-01-25 10:53:19 -08002470 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 tp->snd_cwnd,
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002472 state == TCP_LISTEN ?
2473 fastopenq->max_qlen :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002474 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475}
2476
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002477static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002478 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479{
Eric Dumazet789f5582015-04-12 18:51:09 -07002480 long delta = tw->tw_timer.expires - jiffies;
Al Viro23f33c22006-09-27 18:43:50 -07002481 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 __u16 destp, srcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483
2484 dest = tw->tw_daddr;
2485 src = tw->tw_rcv_saddr;
2486 destp = ntohs(tw->tw_dport);
2487 srcp = ntohs(tw->tw_sport);
2488
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002489 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002490 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002492 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002493 refcount_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494}
2495
2496#define TMPSZ 150
2497
2498static int tcp4_seq_show(struct seq_file *seq, void *v)
2499{
Jianjun Kong5799de02008-11-03 02:49:10 -08002500 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002501 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502
Tetsuo Handa652586d2013-11-14 14:31:57 -08002503 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002505 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 "rx_queue tr tm->when retrnsmt uid timeout "
2507 "inode");
2508 goto out;
2509 }
2510 st = seq->private;
2511
Eric Dumazet079096f2015-10-02 11:43:32 -07002512 if (sk->sk_state == TCP_TIME_WAIT)
2513 get_timewait4_sock(v, seq, st->num);
2514 else if (sk->sk_state == TCP_NEW_SYN_RECV)
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002515 get_openreq4(v, seq, st->num);
Eric Dumazet079096f2015-10-02 11:43:32 -07002516 else
2517 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002519 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 return 0;
2521}
2522
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002523static const struct seq_operations tcp4_seq_ops = {
2524 .show = tcp4_seq_show,
2525 .start = tcp_seq_start,
2526 .next = tcp_seq_next,
2527 .stop = tcp_seq_stop,
2528};
2529
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532};
2533
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002534static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002535{
Christoph Hellwigc3506372018-04-10 19:42:55 +02002536 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2537 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002538 return -ENOMEM;
2539 return 0;
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002540}
2541
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002542static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002543{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002544 remove_proc_entry("tcp", net->proc_net);
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002545}
2546
2547static struct pernet_operations tcp4_net_ops = {
2548 .init = tcp4_proc_init_net,
2549 .exit = tcp4_proc_exit_net,
2550};
2551
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552int __init tcp4_proc_init(void)
2553{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002554 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555}
2556
2557void tcp4_proc_exit(void)
2558{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002559 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560}
2561#endif /* CONFIG_PROC_FS */
2562
2563struct proto tcp_prot = {
2564 .name = "TCP",
2565 .owner = THIS_MODULE,
2566 .close = tcp_close,
Andrey Ignatovd74bad42018-03-30 15:08:05 -07002567 .pre_connect = tcp_v4_pre_connect,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .connect = tcp_v4_connect,
2569 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002570 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 .ioctl = tcp_ioctl,
2572 .init = tcp_v4_init_sock,
2573 .destroy = tcp_v4_destroy_sock,
2574 .shutdown = tcp_shutdown,
2575 .setsockopt = tcp_setsockopt,
2576 .getsockopt = tcp_getsockopt,
Ursula Braun4b9d07a2017-01-09 16:55:12 +01002577 .keepalive = tcp_set_keepalive,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002579 .sendmsg = tcp_sendmsg,
2580 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002582 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002583 .hash = inet_hash,
2584 .unhash = inet_unhash,
2585 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazet06044752017-06-07 13:29:12 -07002587 .leave_memory_pressure = tcp_leave_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002588 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002590 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 .memory_allocated = &tcp_memory_allocated,
2592 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002593 .sysctl_mem = sysctl_tcp_mem,
Eric Dumazet356d1832017-11-07 00:29:28 -08002594 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2595 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 .max_header = MAX_TCP_HEADER,
2597 .obj_size = sizeof(struct tcp_sock),
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08002598 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002599 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002600 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002601 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002602 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002603#ifdef CONFIG_COMPAT
2604 .compat_setsockopt = compat_tcp_setsockopt,
2605 .compat_getsockopt = compat_tcp_getsockopt,
2606#endif
Lorenzo Colittic1e64e22015-12-16 12:30:05 +09002607 .diag_destroy = tcp_abort,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002609EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610
Denis V. Lunev046ee902008-04-03 14:31:33 -07002611static void __net_exit tcp_sk_exit(struct net *net)
2612{
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002613 int cpu;
2614
Dust Lib506bc92019-04-01 16:04:53 +08002615 if (net->ipv4.tcp_congestion_control)
2616 module_put(net->ipv4.tcp_congestion_control->owner);
Stephen Hemminger6670e152017-11-14 08:25:49 -08002617
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002618 for_each_possible_cpu(cpu)
2619 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2620 free_percpu(net->ipv4.tcp_sk);
2621}
2622
2623static int __net_init tcp_sk_init(struct net *net)
2624{
Haishuang Yanfee83d02016-12-28 17:52:33 +08002625 int res, cpu, cnt;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002626
2627 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2628 if (!net->ipv4.tcp_sk)
2629 return -ENOMEM;
2630
2631 for_each_possible_cpu(cpu) {
2632 struct sock *sk;
2633
2634 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2635 IPPROTO_TCP, net);
2636 if (res)
2637 goto fail;
Eric Dumazeta9d65322016-04-01 08:52:21 -07002638 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
Eric Dumazet431280e2018-08-22 13:30:45 -07002639
2640 /* Please enforce IP_DF and IPID==0 for RST and
2641 * ACK sent in SYN-RECV and TIME-WAIT state.
2642 */
2643 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2644
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002645 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2646 }
Daniel Borkmann49213552015-05-19 21:04:22 +02002647
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002648 net->ipv4.sysctl_tcp_ecn = 2;
Daniel Borkmann49213552015-05-19 21:04:22 +02002649 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2650
Fan Dub0f9ca52015-02-10 09:53:16 +08002651 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
Eric Dumazet5f3e2bf002019-06-06 09:15:31 -07002652 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
Fan Du6b58e0a2015-03-06 11:18:23 +08002653 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
Fan Du05cbc0d2015-03-06 11:18:24 +08002654 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
Josh Huntc04b79b2019-08-07 19:52:29 -04002655 net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002656
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002657 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
Nikolay Borisov9bd68612016-01-07 16:38:44 +02002658 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
Nikolay Borisovb840d152016-01-07 16:38:45 +02002659 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002660
Nikolay Borisov6fa25162016-02-03 09:46:49 +02002661 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
Nikolay Borisov7c083ec2016-02-03 09:46:50 +02002662 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
David S. Miller0aca7372016-02-08 04:24:33 -05002663 net->ipv4.sysctl_tcp_syncookies = 1;
Nikolay Borisov1043e252016-02-03 09:46:52 +02002664 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
Nikolay Borisovae5c3f42016-02-03 09:46:53 +02002665 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
Nikolay Borisovc6214a92016-02-03 09:46:54 +02002666 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
Nikolay Borisovc402d9b2016-02-03 09:46:55 +02002667 net->ipv4.sysctl_tcp_orphan_retries = 0;
Nikolay Borisov1e579ca2016-02-03 09:46:56 +02002668 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
Nikolay Borisov4979f2d2016-02-03 09:46:57 +02002669 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -07002670 net->ipv4.sysctl_tcp_tw_reuse = 2;
Nikolay Borisov12ed8242016-02-03 09:46:51 +02002671
Haishuang Yanfee83d02016-12-28 17:52:33 +08002672 cnt = tcp_hashinfo.ehash_mask + 1;
Yafang Shao743e4812018-09-01 20:21:05 +08002673 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
Haishuang Yan1946e672016-12-28 17:52:32 +08002674 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2675
Haishuang Yanfee83d02016-12-28 17:52:33 +08002676 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
Eric Dumazetf9301032017-06-07 10:34:37 -07002677 net->ipv4.sysctl_tcp_sack = 1;
Eric Dumazet9bb37ef2017-06-07 10:34:38 -07002678 net->ipv4.sysctl_tcp_window_scaling = 1;
Eric Dumazet5d2ed052017-06-07 10:34:39 -07002679 net->ipv4.sysctl_tcp_timestamps = 1;
Eric Dumazet2ae21cf2017-10-26 21:54:56 -07002680 net->ipv4.sysctl_tcp_early_retrans = 3;
Eric Dumazete20223f2017-10-26 21:54:57 -07002681 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
Eric Dumazetb510f0d2017-10-26 21:54:59 -07002682 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
Eric Dumazete0a1e5b2017-10-26 21:55:00 -07002683 net->ipv4.sysctl_tcp_retrans_collapse = 1;
Eric Dumazetc6e21802017-10-26 21:55:06 -07002684 net->ipv4.sysctl_tcp_max_reordering = 300;
Eric Dumazet6496f6b2017-10-26 21:55:07 -07002685 net->ipv4.sysctl_tcp_dsack = 1;
Eric Dumazet0c126542017-10-26 21:55:08 -07002686 net->ipv4.sysctl_tcp_app_win = 31;
Eric Dumazet94f08932017-10-26 21:55:09 -07002687 net->ipv4.sysctl_tcp_adv_win_scale = 1;
Eric Dumazetaf9b69a2017-10-26 21:55:10 -07002688 net->ipv4.sysctl_tcp_frto = 2;
Eric Dumazet4540c0c2017-10-27 07:47:22 -07002689 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
Eric Dumazetd06a9902017-10-27 07:47:23 -07002690 /* This limits the percentage of the congestion window which we
2691 * will allow a single TSO frame to consume. Building TSO frames
2692 * which are too large can cause TCP streams to be bursty.
2693 */
2694 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
Eric Dumazetc73e5802018-11-11 07:34:28 -08002695 /* Default TSQ limit of 16 TSO segments */
2696 net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
Eric Dumazetb530b682017-10-27 07:47:26 -07002697 /* rfc5961 challenge ack rate limiting */
2698 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
Eric Dumazet26e95962017-10-27 07:47:27 -07002699 net->ipv4.sysctl_tcp_min_tso_segs = 2;
Eric Dumazetbd239702017-10-27 07:47:28 -07002700 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
Eric Dumazet790f00e2017-10-27 07:47:29 -07002701 net->ipv4.sysctl_tcp_autocorking = 1;
Eric Dumazet4170ba62017-10-27 07:47:30 -07002702 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
Eric Dumazet23a7102a2017-10-27 07:47:31 -07002703 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
Eric Dumazetc26e91f2017-10-27 07:47:32 -07002704 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
Eric Dumazet356d1832017-11-07 00:29:28 -08002705 if (net != &init_net) {
2706 memcpy(net->ipv4.sysctl_tcp_rmem,
2707 init_net.ipv4.sysctl_tcp_rmem,
2708 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2709 memcpy(net->ipv4.sysctl_tcp_wmem,
2710 init_net.ipv4.sysctl_tcp_wmem,
2711 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2712 }
Eric Dumazet6d82aa22018-05-17 14:47:28 -07002713 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
Eric Dumazet9c21d2f2018-05-17 14:47:29 -07002714 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002715 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
Haishuang Yan43713842017-09-27 11:35:42 +08002716 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
Haishuang Yan3733be12017-09-27 11:35:43 +08002717 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2718 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002719
Stephen Hemminger6670e152017-11-14 08:25:49 -08002720 /* Reno is always built in */
2721 if (!net_eq(net, &init_net) &&
2722 try_module_get(init_net.ipv4.tcp_congestion_control->owner))
2723 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2724 else
2725 net->ipv4.tcp_congestion_control = &tcp_reno;
2726
Daniel Borkmann49213552015-05-19 21:04:22 +02002727 return 0;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002728fail:
2729 tcp_sk_exit(net);
2730
2731 return res;
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002732}
2733
2734static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2735{
Haishuang Yan43713842017-09-27 11:35:42 +08002736 struct net *net;
2737
Haishuang Yan1946e672016-12-28 17:52:32 +08002738 inet_twsk_purge(&tcp_hashinfo, AF_INET);
Haishuang Yan43713842017-09-27 11:35:42 +08002739
2740 list_for_each_entry(net, net_exit_list, exit_list)
2741 tcp_fastopen_ctx_destroy(net);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002742}
2743
2744static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002745 .init = tcp_sk_init,
2746 .exit = tcp_sk_exit,
2747 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002748};
2749
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002750void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751{
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002752 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002754}