blob: df1166b76126742c21cdbde8c09f76268c2f42cb [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * Implementation of the Transmission Control Protocol(TCP).
8 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * IPv4 specific functions
10 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
18
19/*
20 * Changes:
21 * David S. Miller : New socket lookup architecture.
22 * This code is dedicated to John Dyson.
23 * David S. Miller : Change semantics of established hash,
24 * half is devoted to TIME_WAIT sockets
25 * and the rest go in the other half.
26 * Andi Kleen : Add support for syncookies and fixed
27 * some bugs: ip options weren't passed to
28 * the TCP layer, missed a check for an
29 * ACK bit.
30 * Andi Kleen : Implemented fast path mtu discovery.
31 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070032 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 * most of it into the af independent code.
34 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080035 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * Mike McLagan : Routing by source
37 * Juan Jose Ciarlante: ip_dynaddr bits
38 * Andi Kleen: various fixes.
39 * Vitaly E. Lavrov : Transparent proxy revived after year
40 * coma.
41 * Andi Kleen : Fix new listen.
42 * Andi Kleen : Fix accept error reporting.
43 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
44 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
45 * a single port at the same time.
46 */
47
Joe Perchesafd465032012-03-12 07:03:32 +000048#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Herbert Xueb4dea52008-12-29 23:04:08 -080050#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
52#include <linux/fcntl.h>
53#include <linux/module.h>
54#include <linux/random.h>
55#include <linux/cache.h>
56#include <linux/jhash.h>
57#include <linux/init.h>
58#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090059#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020061#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070063#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030065#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/ipv6.h>
67#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080068#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/xfrm.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070070#include <net/secure_seq.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030071#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73#include <linux/inet.h>
74#include <linux/ipv6.h>
75#include <linux/stddef.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
Ivan Delalande67973182017-06-15 18:07:06 -070078#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
Herbert Xucf80e0e2016-01-24 21:20:23 +080080#include <crypto/hash.h>
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080081#include <linux/scatterlist.h>
82
Song Liuc24b14c2017-10-23 09:20:24 -070083#include <trace/events/tcp.h>
84
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080085#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000086static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040087 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080088#endif
89
Eric Dumazet5caea4e2008-11-20 00:40:07 -080090struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000091EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
Eric Dumazet84b114b2017-05-05 06:56:54 -070093static u32 tcp_v4_init_seq(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070094{
Eric Dumazet84b114b2017-05-05 06:56:54 -070095 return secure_tcp_seq(ip_hdr(skb)->daddr,
96 ip_hdr(skb)->saddr,
97 tcp_hdr(skb)->dest,
98 tcp_hdr(skb)->source);
99}
100
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700101static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
Eric Dumazet84b114b2017-05-05 06:56:54 -0700102{
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700103 return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104}
105
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800106int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
107{
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700108 const struct inet_timewait_sock *tw = inet_twsk(sktw);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800109 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110 struct tcp_sock *tp = tcp_sk(sk);
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700111 int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
112
113 if (reuse == 2) {
114 /* Still does not detect *everything* that goes through
115 * lo, since we require a loopback src or dst address
116 * or direct binding to 'lo' interface.
117 */
118 bool loopback = false;
119 if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
120 loopback = true;
121#if IS_ENABLED(CONFIG_IPV6)
122 if (tw->tw_family == AF_INET6) {
123 if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
Eric Dumazetbe2644a2019-10-01 10:49:06 -0700124 ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700125 ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
Eric Dumazetbe2644a2019-10-01 10:49:06 -0700126 ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -0700127 loopback = true;
128 } else
129#endif
130 {
131 if (ipv4_is_loopback(tw->tw_daddr) ||
132 ipv4_is_loopback(tw->tw_rcv_saddr))
133 loopback = true;
134 }
135 if (!loopback)
136 reuse = 0;
137 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800138
139 /* With PAWS, it is safe from the viewpoint
140 of data integrity. Even without PAWS it is safe provided sequence
141 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
142
143 Actually, the idea is close to VJ's one, only timestamp cache is
144 held not per host, but per port pair and TW bucket is used as state
145 holder.
146
147 If TW bucket has been already destroyed we fall back to VJ's scheme
148 and use initial timestamp retrieved from peer table.
149 */
150 if (tcptw->tw_ts_recent_stamp &&
Arnd Bergmanncca9bab2018-07-11 12:16:12 +0200151 (!twp || (reuse && time_after32(ktime_get_seconds(),
152 tcptw->tw_ts_recent_stamp)))) {
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400153 /* In case of repair and re-using TIME-WAIT sockets we still
154 * want to be sure that it is safe as above but honor the
155 * sequence numbers and time stamps set as part of the repair
156 * process.
157 *
158 * Without this check re-using a TIME-WAIT socket with TCP
159 * repair would accumulate a -1 on the repair assigned
160 * sequence number. The first time it is reused the sequence
161 * is -1, the second time -2, etc. This fixes that issue
162 * without appearing to create any others.
163 */
164 if (likely(!tp->repair)) {
Eric Dumazet0f317462019-10-10 20:17:41 -0700165 u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
166
167 if (!seq)
168 seq = 1;
169 WRITE_ONCE(tp->write_seq, seq);
Stefan Baranoff21684dc2018-07-10 17:25:20 -0400170 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
171 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
172 }
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800173 sock_hold(sktw);
174 return 1;
175 }
176
177 return 0;
178}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800179EXPORT_SYMBOL_GPL(tcp_twsk_unique);
180
Andrey Ignatovd74bad42018-03-30 15:08:05 -0700181static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
182 int addr_len)
183{
184 /* This check is replicated from tcp_v4_connect() and intended to
185 * prevent BPF program called below from accessing bytes that are out
186 * of the bound specified by user in addr_len.
187 */
188 if (addr_len < sizeof(struct sockaddr_in))
189 return -EINVAL;
190
191 sock_owned_by_me(sk);
192
193 return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
194}
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196/* This will initiate an outgoing connection. */
197int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
198{
David S. Miller2d7192d2011-04-26 13:28:44 -0700199 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 struct inet_sock *inet = inet_sk(sk);
201 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800202 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700203 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700204 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700205 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000207 struct ip_options_rcu *inet_opt;
Haishuang Yan1946e672016-12-28 17:52:32 +0800208 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
210 if (addr_len < sizeof(struct sockaddr_in))
211 return -EINVAL;
212
213 if (usin->sin_family != AF_INET)
214 return -EAFNOSUPPORT;
215
216 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000217 inet_opt = rcu_dereference_protected(inet->inet_opt,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +0200218 lockdep_sock_is_held(sk));
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000219 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 if (!daddr)
221 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000222 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
224
David S. Millerdca8b082011-02-24 13:38:12 -0800225 orig_sport = inet->inet_sport;
226 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700227 fl4 = &inet->cork.fl.u.ip4;
228 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800229 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
230 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200231 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800232 if (IS_ERR(rt)) {
233 err = PTR_ERR(rt);
234 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800235 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800236 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
239 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
240 ip_rt_put(rt);
241 return -ENETUNREACH;
242 }
243
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000244 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700245 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000247 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700248 inet->inet_saddr = fl4->saddr;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700249 sk_rcv_saddr_set(sk, inet->inet_saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000251 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 /* Reset inherited state */
253 tp->rx_opt.ts_recent = 0;
254 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000255 if (likely(!tp->repair))
Eric Dumazet0f317462019-10-10 20:17:41 -0700256 WRITE_ONCE(tp->write_seq, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 }
258
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000259 inet->inet_dport = usin->sin_port;
Eric Dumazetd1e559d2015-03-18 14:05:35 -0700260 sk_daddr_set(sk, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800262 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000263 if (inet_opt)
264 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000266 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
268 /* Socket identity is still unknown (sport may be zero).
269 * However we set state to SYN-SENT and not releasing socket
270 * lock select source port, enter ourselves into the hash tables and
271 * complete initialization after this.
272 */
273 tcp_set_state(sk, TCP_SYN_SENT);
Haishuang Yan1946e672016-12-28 17:52:32 +0800274 err = inet_hash_connect(tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (err)
276 goto failure;
277
Tom Herbert877d1f62015-07-28 16:02:05 -0700278 sk_set_txhash(sk);
Sathya Perla9e7ceb02014-10-22 21:42:01 +0530279
David S. Millerda905bd2011-05-06 16:11:19 -0700280 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800281 inet->inet_sport, inet->inet_dport, sk);
282 if (IS_ERR(rt)) {
283 err = PTR_ERR(rt);
284 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800286 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700288 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700289 sk_setup_caps(sk, &rt->dst);
Wei Wang19f6d3f2017-01-23 10:59:22 -0800290 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300292 if (likely(!tp->repair)) {
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300293 if (!tp->write_seq)
Eric Dumazet0f317462019-10-10 20:17:41 -0700294 WRITE_ONCE(tp->write_seq,
295 secure_tcp_seq(inet->inet_saddr,
296 inet->inet_daddr,
297 inet->inet_sport,
298 usin->sin_port));
Eric Dumazet5d2ed052017-06-07 10:34:39 -0700299 tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
300 inet->inet_saddr,
Eric Dumazet84b114b2017-05-05 06:56:54 -0700301 inet->inet_daddr);
Alexey Kodanev00355fa2017-02-22 13:23:55 +0300302 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303
Eric Dumazeta904a062019-11-01 10:32:19 -0700304 inet->inet_id = prandom_u32();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Wei Wang19f6d3f2017-01-23 10:59:22 -0800306 if (tcp_fastopen_defer_connect(sk, &err))
307 return err;
308 if (err)
309 goto failure;
310
Andrey Vagin2b916472012-11-22 01:13:58 +0000311 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 if (err)
314 goto failure;
315
316 return 0;
317
318failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200319 /*
320 * This unhashes the socket and releases the local port,
321 * if necessary.
322 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 tcp_set_state(sk, TCP_CLOSE);
324 ip_rt_put(rt);
325 sk->sk_route_caps = 0;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000326 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 return err;
328}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000329EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200332 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
333 * It can be called through tcp_release_cb() if socket was owned by user
334 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400336void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800339 struct dst_entry *dst;
340 u32 mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
Eric Dumazet02b2faa2017-03-03 14:08:21 -0800342 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
343 return;
344 mtu = tcp_sk(sk)->mtu_info;
David S. Miller80d0a692012-07-16 03:28:06 -0700345 dst = inet_csk_update_pmtu(sk, mtu);
346 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 return;
348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 /* Something is about to be wrong... Remember soft error
350 * for the case, if this connection will not able to recover.
351 */
352 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
353 sk->sk_err_soft = EMSGSIZE;
354
355 mtu = dst_mtu(dst);
356
357 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100358 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800359 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 tcp_sync_mss(sk, mtu);
361
362 /* Resend the TCP packet because it's
363 * clear that the old packet has been
364 * dropped. This is the new "fast" path mtu
365 * discovery.
366 */
367 tcp_simple_retransmit(sk);
368 } /* else let the usual retransmit timer handle it */
369}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400370EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
David S. Miller55be7a92012-07-11 21:27:49 -0700372static void do_redirect(struct sk_buff *skb, struct sock *sk)
373{
374 struct dst_entry *dst = __sk_dst_check(sk, 0);
375
David S. Miller1ed5c482012-07-12 00:41:25 -0700376 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700377 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700378}
379
Eric Dumazet26e37362015-03-22 10:22:22 -0700380
381/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
Eric Dumazet9cf74902016-02-02 19:31:12 -0800382void tcp_req_err(struct sock *sk, u32 seq, bool abort)
Eric Dumazet26e37362015-03-22 10:22:22 -0700383{
384 struct request_sock *req = inet_reqsk(sk);
385 struct net *net = sock_net(sk);
386
387 /* ICMPs are not backlogged, hence we cannot get
388 * an established socket here.
389 */
Eric Dumazet26e37362015-03-22 10:22:22 -0700390 if (seq != tcp_rsk(req)->snt_isn) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700391 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Eric Dumazet9cf74902016-02-02 19:31:12 -0800392 } else if (abort) {
Eric Dumazet26e37362015-03-22 10:22:22 -0700393 /*
394 * Still in SYN_RECV, just remove it silently.
395 * There is no good way to pass the error to the newly
396 * created socket, and POSIX does not want network
397 * errors returned from accept().
398 */
Fan Duc6973662015-03-23 15:00:41 -0700399 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
Eric Dumazet9caad862016-04-01 08:52:20 -0700400 tcp_listendrop(req->rsk_listener);
Eric Dumazet26e37362015-03-22 10:22:22 -0700401 }
Eric Dumazetef84d8c2015-10-14 11:16:26 -0700402 reqsk_put(req);
Eric Dumazet26e37362015-03-22 10:22:22 -0700403}
404EXPORT_SYMBOL(tcp_req_err);
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406/*
407 * This routine is called by the ICMP module when it gets some
408 * sort of error condition. If err < 0 then the socket should
409 * be closed and the error returned to the user. If err > 0
410 * it's just the icmp type << 8 | icmp code. After adjustment
411 * header points to the first 8 bytes of the tcp header. We need
412 * to find the appropriate port.
413 *
414 * The locking strategy used here is very "optimistic". When
415 * someone else accesses the socket the ICMP is just dropped
416 * and for some paths there is no check at all.
417 * A more general error queue to queue errors for later handling
418 * is probably better.
419 *
420 */
421
Stefano Brivio32bbd872018-11-08 12:19:21 +0100422int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000424 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000425 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000426 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 struct tcp_sock *tp;
428 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000429 const int type = icmp_hdr(icmp_skb)->type;
430 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000432 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700433 struct request_sock *fastopen;
Eric Dumazet9a568de2017-05-16 14:00:14 -0700434 u32 seq, snd_una;
435 s32 remaining;
436 u32 delta_us;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000438 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
Eric Dumazet26e37362015-03-22 10:22:22 -0700440 sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
441 th->dest, iph->saddr, ntohs(th->source),
David Ahern3fa6f612017-08-07 08:44:17 -0700442 inet_iif(icmp_skb), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 if (!sk) {
Eric Dumazet5d3848b2016-04-27 16:44:29 -0700444 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100445 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
447 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700448 inet_twsk_put(inet_twsk(sk));
Stefano Brivio32bbd872018-11-08 12:19:21 +0100449 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 }
Eric Dumazet26e37362015-03-22 10:22:22 -0700451 seq = ntohl(th->seq);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100452 if (sk->sk_state == TCP_NEW_SYN_RECV) {
453 tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
454 type == ICMP_TIME_EXCEEDED ||
455 (type == ICMP_DEST_UNREACH &&
456 (code == ICMP_NET_UNREACH ||
457 code == ICMP_HOST_UNREACH)));
458 return 0;
459 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
461 bh_lock_sock(sk);
462 /* If too many ICMPs get dropped on busy
463 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200464 * We do take care of PMTU discovery (RFC1191) special case :
465 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000467 if (sock_owned_by_user(sk)) {
468 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700469 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
Eric Dumazetb74aa932013-01-19 16:10:37 +0000470 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 if (sk->sk_state == TCP_CLOSE)
472 goto out;
473
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000474 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700475 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000476 goto out;
477 }
478
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000479 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 tp = tcp_sk(sk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700481 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
Eric Dumazetd983ea62019-10-10 20:17:38 -0700482 fastopen = rcu_dereference(tp->fastopen_rsk);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700483 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700485 !between(seq, snd_una, tp->snd_nxt)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -0700486 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 goto out;
488 }
489
490 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700491 case ICMP_REDIRECT:
Jon Maxwell45caeaa2017-03-10 16:40:33 +1100492 if (!sock_owned_by_user(sk))
493 do_redirect(icmp_skb, sk);
David S. Miller55be7a92012-07-11 21:27:49 -0700494 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 case ICMP_SOURCE_QUENCH:
496 /* Just silently ignore these. */
497 goto out;
498 case ICMP_PARAMETERPROB:
499 err = EPROTO;
500 break;
501 case ICMP_DEST_UNREACH:
502 if (code > NR_ICMP_UNREACH)
503 goto out;
504
505 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000506 /* We are not interested in TCP_LISTEN and open_requests
507 * (SYN-ACKs send out by Linux are always <576bytes so
508 * they should go through unfragmented).
509 */
510 if (sk->sk_state == TCP_LISTEN)
511 goto out;
512
Eric Dumazet563d34d2012-07-23 09:48:52 +0200513 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000514 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200515 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000516 } else {
Eric Dumazet7aa54702016-12-03 11:14:57 -0800517 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
Eric Dumazet144d56e2012-08-20 00:22:46 +0000518 sock_hold(sk);
519 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 goto out;
521 }
522
523 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000524 /* check if icmp_skb allows revert of backoff
525 * (see draft-zimmermann-tcp-lcd) */
526 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
527 break;
528 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700529 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000530 break;
531
David S. Miller8f49c272010-11-12 13:35:00 -0800532 if (sock_owned_by_user(sk))
533 break;
534
Eric Dumazet2c4cc972019-02-15 13:36:21 -0800535 skb = tcp_rtx_queue_head(sk);
536 if (WARN_ON_ONCE(!skb))
537 break;
538
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000539 icsk->icsk_backoff--;
Eric Dumazetfcdd1cf2014-09-22 13:19:44 -0700540 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
541 TCP_TIMEOUT_INIT;
542 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000543
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000544
Eric Dumazet9a568de2017-05-16 14:00:14 -0700545 tcp_mstamp_refresh(tp);
Eric Dumazet2fd66ff2018-09-21 08:51:47 -0700546 delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700547 remaining = icsk->icsk_rto -
Eric Dumazet9a568de2017-05-16 14:00:14 -0700548 usecs_to_jiffies(delta_us);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000549
Eric Dumazet9a568de2017-05-16 14:00:14 -0700550 if (remaining > 0) {
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000551 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
552 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000553 } else {
554 /* RTO revert clocked out retransmission.
555 * Will retransmit now */
556 tcp_retransmit_timer(sk);
557 }
558
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 break;
560 case ICMP_TIME_EXCEEDED:
561 err = EHOSTUNREACH;
562 break;
563 default:
564 goto out;
565 }
566
567 switch (sk->sk_state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700569 case TCP_SYN_RECV:
570 /* Only in fast or simultaneous open. If a fast open socket is
571 * is already accepted it is treated as a connected one below.
572 */
Ian Morris51456b22015-04-03 09:17:26 +0100573 if (fastopen && !fastopen->sk)
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700574 break;
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 sk->sk_err = err;
578
579 sk->sk_error_report(sk);
580
581 tcp_done(sk);
582 } else {
583 sk->sk_err_soft = err;
584 }
585 goto out;
586 }
587
588 /* If we've already connected we will keep trying
589 * until we time out, or the user gives up.
590 *
591 * rfc1122 4.2.3.9 allows to consider as hard errors
592 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
593 * but it is obsoleted by pmtu discovery).
594 *
595 * Note, that in modern internet, where routing is unreliable
596 * and in each dark corner broken firewalls sit, sending random
597 * errors ordered by their masters even this two messages finally lose
598 * their original sense (even Linux sends invalid PORT_UNREACHs)
599 *
600 * Now we are in compliance with RFCs.
601 * --ANK (980905)
602 */
603
604 inet = inet_sk(sk);
605 if (!sock_owned_by_user(sk) && inet->recverr) {
606 sk->sk_err = err;
607 sk->sk_error_report(sk);
608 } else { /* Only an error on timeout */
609 sk->sk_err_soft = err;
610 }
611
612out:
613 bh_unlock_sock(sk);
614 sock_put(sk);
Stefano Brivio32bbd872018-11-08 12:19:21 +0100615 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616}
617
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000618void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700620 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
Eric Dumazet98be9b12018-02-19 11:56:52 -0800622 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
623 skb->csum_start = skb_transport_header(skb) - skb->head;
624 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625}
626
Herbert Xu419f9f82010-04-11 02:15:53 +0000627/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000628void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000629{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400630 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000631
632 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
633}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000634EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636/*
637 * This routine will send an RST to the other tcp.
638 *
639 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
640 * for reset.
641 * Answer: if a packet caused RST, it is not for a socket
642 * existing in our system, if it is matched to a socket,
643 * it is just duplicate segment or bug in other side's TCP.
644 * So that we build reply only basing on parameters
645 * arrived with segment.
646 * Exception: precedence violation. We do not implement it in any case.
647 */
648
Eric Dumazeta00e7442015-09-29 07:42:39 -0700649static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400651 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800652 struct {
653 struct tcphdr th;
654#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800655 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800656#endif
657 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800659#ifdef CONFIG_TCP_MD5SIG
Florian Westphale46787f2015-12-21 21:29:25 +0100660 struct tcp_md5sig_key *key = NULL;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000661 const __u8 *hash_location = NULL;
662 unsigned char newhash[16];
663 int genhash;
664 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800665#endif
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700666 u64 transmit_time = 0;
Jon Maxwell00483692018-05-10 16:53:51 +1000667 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700668 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 /* Never send a reset in response to a reset. */
671 if (th->rst)
672 return;
673
Eric Dumazetc3658e82014-11-25 07:40:04 -0800674 /* If sk not NULL, it means we did a successful lookup and incoming
675 * route had to be correct. prequeue might have dropped our dst.
676 */
677 if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 return;
679
680 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800681 memset(&rep, 0, sizeof(rep));
682 rep.th.dest = th->source;
683 rep.th.source = th->dest;
684 rep.th.doff = sizeof(struct tcphdr) / 4;
685 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800688 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800690 rep.th.ack = 1;
691 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
692 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 }
694
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200695 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800696 arg.iov[0].iov_base = (unsigned char *)&rep;
697 arg.iov[0].iov_len = sizeof(rep.th);
698
Eric Dumazet0f85fea2014-12-09 09:56:08 -0800699 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800700#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700701 rcu_read_lock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000702 hash_location = tcp_parse_md5sig_option(th);
Florian Westphal271c3b92015-12-21 21:29:26 +0100703 if (sk && sk_fullsock(sk)) {
David Aherncea97602019-12-30 14:14:25 -0800704 const union tcp_md5_addr *addr;
David Aherndea53bb2019-12-30 14:14:28 -0800705 int l3index;
David Aherncea97602019-12-30 14:14:25 -0800706
David Aherndea53bb2019-12-30 14:14:28 -0800707 /* sdif set, means packet ingressed via a device
708 * in an L3 domain and inet_iif is set to it.
709 */
710 l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
David Aherncea97602019-12-30 14:14:25 -0800711 addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
David Aherndea53bb2019-12-30 14:14:28 -0800712 key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
Florian Westphale46787f2015-12-21 21:29:25 +0100713 } else if (hash_location) {
David Aherncea97602019-12-30 14:14:25 -0800714 const union tcp_md5_addr *addr;
David Ahern534322c2019-12-30 14:14:27 -0800715 int sdif = tcp_v4_sdif(skb);
716 int dif = inet_iif(skb);
David Aherndea53bb2019-12-30 14:14:28 -0800717 int l3index;
David Aherncea97602019-12-30 14:14:25 -0800718
Shawn Lu658ddaa2012-01-31 22:35:48 +0000719 /*
720 * active side is lost. Try to find listening socket through
721 * source port, and then find md5 key through listening socket.
722 * we are not loose security here:
723 * Incoming packet is checked with md5 hash with finding key,
724 * no RST generated if md5 hash doesn't match.
725 */
Craig Galleka5836362016-02-10 11:50:38 -0500726 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
727 ip_hdr(skb)->saddr,
Tom Herbertda5e3632013-01-22 09:50:24 +0000728 th->source, ip_hdr(skb)->daddr,
David Ahern534322c2019-12-30 14:14:27 -0800729 ntohs(th->source), dif, sdif);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000730 /* don't send rst if it can't find key */
731 if (!sk1)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700732 goto out;
733
David Aherndea53bb2019-12-30 14:14:28 -0800734 /* sdif set, means packet ingressed via a device
735 * in an L3 domain and dif is set to it.
736 */
737 l3index = sdif ? dif : 0;
David Aherncea97602019-12-30 14:14:25 -0800738 addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
David Aherndea53bb2019-12-30 14:14:28 -0800739 key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000740 if (!key)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700741 goto out;
742
Shawn Lu658ddaa2012-01-31 22:35:48 +0000743
Eric Dumazet39f8e582015-03-24 15:58:55 -0700744 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000745 if (genhash || memcmp(hash_location, newhash, 16) != 0)
Eric Dumazet3b24d852016-04-01 08:52:17 -0700746 goto out;
747
Shawn Lu658ddaa2012-01-31 22:35:48 +0000748 }
749
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800750 if (key) {
751 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
752 (TCPOPT_NOP << 16) |
753 (TCPOPT_MD5SIG << 8) |
754 TCPOLEN_MD5SIG);
755 /* Update length and the length the header thinks exists */
756 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
757 rep.th.doff = arg.iov[0].iov_len / 4;
758
Adam Langley49a72df2008-07-19 00:01:42 -0700759 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700760 key, ip_hdr(skb)->saddr,
761 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800762 }
763#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700764 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
765 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700766 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
Florian Westphal271c3b92015-12-21 21:29:26 +0100768 arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
769
Shawn Lue2446ea2012-02-04 12:38:09 +0000770 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000771 * routing might fail in this case. No choice here, if we choose to force
772 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000773 */
Song Liuc24b14c2017-10-23 09:20:24 -0700774 if (sk) {
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000775 arg.bound_dev_if = sk->sk_bound_dev_if;
Song Liu5c487bb2018-02-06 20:50:23 -0800776 if (sk_fullsock(sk))
777 trace_tcp_send_reset(sk, skb);
Song Liuc24b14c2017-10-23 09:20:24 -0700778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Florian Westphal271c3b92015-12-21 21:29:26 +0100780 BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
781 offsetof(struct inet_timewait_sock, tw_bound_dev_if));
782
Eric Dumazet66b13d92011-10-24 03:06:21 -0400783 arg.tos = ip_hdr(skb)->tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900784 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700785 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700786 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700787 if (sk) {
Jon Maxwell00483692018-05-10 16:53:51 +1000788 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
789 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetf6c0f5d2019-09-24 08:01:16 -0700790 ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
791 inet_twsk(sk)->tw_priority : sk->sk_priority;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700792 transmit_time = tcp_transmit_time(sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700793 }
Jon Maxwell00483692018-05-10 16:53:51 +1000794 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800795 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700796 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700797 &arg, arg.iov[0].iov_len,
798 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
Jon Maxwell00483692018-05-10 16:53:51 +1000800 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700801 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
802 __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700803 local_bh_enable();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000804
805#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet3b24d852016-04-01 08:52:17 -0700806out:
807 rcu_read_unlock();
Shawn Lu658ddaa2012-01-31 22:35:48 +0000808#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809}
810
811/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
812 outside socket context is ugly, certainly. What can I do?
813 */
814
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900815static void tcp_v4_send_ack(const struct sock *sk,
Eric Dumazete62a1232016-01-21 08:02:54 -0800816 struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000817 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700818 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400819 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400821 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 struct {
823 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800824 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800825#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800826 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800827#endif
828 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 } rep;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900830 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 struct ip_reply_arg arg;
Jon Maxwell00483692018-05-10 16:53:51 +1000832 struct sock *ctl_sk;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700833 u64 transmit_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
835 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200836 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837
838 arg.iov[0].iov_base = (unsigned char *)&rep;
839 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000840 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800841 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
842 (TCPOPT_TIMESTAMP << 8) |
843 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000844 rep.opt[1] = htonl(tsval);
845 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800846 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 }
848
849 /* Swap the send and the receive. */
850 rep.th.dest = th->source;
851 rep.th.source = th->dest;
852 rep.th.doff = arg.iov[0].iov_len / 4;
853 rep.th.seq = htonl(seq);
854 rep.th.ack_seq = htonl(ack);
855 rep.th.ack = 1;
856 rep.th.window = htons(win);
857
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800858#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800859 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000860 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800861
862 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
863 (TCPOPT_NOP << 16) |
864 (TCPOPT_MD5SIG << 8) |
865 TCPOLEN_MD5SIG);
866 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
867 rep.th.doff = arg.iov[0].iov_len/4;
868
Adam Langley49a72df2008-07-19 00:01:42 -0700869 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700870 key, ip_hdr(skb)->saddr,
871 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800872 }
873#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700874 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700875 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
876 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 arg.iov[0].iov_len, IPPROTO_TCP, 0);
878 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900879 if (oif)
880 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400881 arg.tos = tos;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900882 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700883 local_bh_disable();
Eric Dumazet5472c3c2019-05-31 19:17:33 -0700884 ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
Eric Dumazeta842fe12019-06-12 11:57:25 -0700885 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
886 inet_twsk(sk)->tw_mark : sk->sk_mark;
Eric Dumazetf6c0f5d2019-09-24 08:01:16 -0700887 ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
888 inet_twsk(sk)->tw_priority : sk->sk_priority;
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700889 transmit_time = tcp_transmit_time(sk);
Jon Maxwell00483692018-05-10 16:53:51 +1000890 ip_send_unicast_reply(ctl_sk,
Eric Dumazetbdbbb852015-01-29 21:35:05 -0800891 skb, &TCP_SKB_CB(skb)->header.h4.opt,
Eric Dumazet24a2d432014-09-27 09:50:55 -0700892 ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
Eric Dumazetd6fb3962019-06-13 21:22:35 -0700893 &arg, arg.iov[0].iov_len,
894 transmit_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895
Jon Maxwell00483692018-05-10 16:53:51 +1000896 ctl_sk->sk_mark = 0;
Eric Dumazet90bbcc62016-04-27 16:44:32 -0700897 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
Eric Dumazet47dcc202016-05-06 09:46:18 -0700898 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899}
900
901static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
902{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700903 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800904 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900906 tcp_v4_send_ack(sk, skb,
Eric Dumazete62a1232016-01-21 08:02:54 -0800907 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200908 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700909 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900910 tcptw->tw_ts_recent,
911 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700912 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400913 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
914 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900915 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700917 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918}
919
Eric Dumazeta00e7442015-09-29 07:42:39 -0700920static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200921 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922{
David Aherncea97602019-12-30 14:14:25 -0800923 const union tcp_md5_addr *addr;
David Aherndea53bb2019-12-30 14:14:28 -0800924 int l3index;
David Aherncea97602019-12-30 14:14:25 -0800925
Jerry Chu168a8f52012-08-31 12:29:13 +0000926 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
927 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
928 */
Eric Dumazete62a1232016-01-21 08:02:54 -0800929 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
930 tcp_sk(sk)->snd_nxt;
931
Eric Dumazet20a2b492016-08-22 11:31:10 -0700932 /* RFC 7323 2.3
933 * The window field (SEG.WND) of every outgoing segment, with the
934 * exception of <SYN> segments, MUST be right-shifted by
935 * Rcv.Wind.Shift bits:
936 */
David Aherncea97602019-12-30 14:14:25 -0800937 addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
David Aherndea53bb2019-12-30 14:14:28 -0800938 l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900939 tcp_v4_send_ack(sk, skb, seq,
Eric Dumazet20a2b492016-08-22 11:31:10 -0700940 tcp_rsk(req)->rcv_nxt,
941 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
Eric Dumazet9a568de2017-05-16 14:00:14 -0700942 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900943 req->ts_recent,
944 0,
David Aherndea53bb2019-12-30 14:14:28 -0800945 tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400946 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
947 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948}
949
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800951 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700952 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 * socket.
954 */
Eric Dumazet0f935db2015-09-25 07:39:21 -0700955static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300956 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800957 struct request_sock *req,
Eric Dumazetca6fb062015-10-02 11:43:35 -0700958 struct tcp_fastopen_cookie *foc,
Eric Dumazetb3d05142016-04-13 22:05:39 -0700959 enum tcp_synack_type synack_type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700961 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400962 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800964 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965
966 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700967 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800968 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
Eric Dumazetb3d05142016-04-13 22:05:39 -0700970 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971
972 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700973 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700975 rcu_read_lock();
Eric Dumazet634fb9792013-10-09 15:21:29 -0700976 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
977 ireq->ir_rmt_addr,
Eric Dumazet2ab2ddd2018-10-02 12:35:05 -0700978 rcu_dereference(ireq->ireq_opt));
979 rcu_read_unlock();
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200980 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 }
982
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 return err;
984}
985
986/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700987 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700989static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990{
Eric Dumazetc92e8c02017-10-20 09:04:13 -0700991 kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992}
993
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800994#ifdef CONFIG_TCP_MD5SIG
995/*
996 * RFC2385 MD5 checksumming requires a mapping of
997 * IP address->MD5 Key.
998 * We need to maintain these in the sk structure.
999 */
1000
Eric Dumazet921f9a02019-02-26 09:49:11 -08001001DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
Eric Dumazet6015c712018-11-27 15:03:21 -08001002EXPORT_SYMBOL(tcp_md5_needed);
1003
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001004/* Find the Key structure for an address. */
David Aherndea53bb2019-12-30 14:14:28 -08001005struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
Eric Dumazet6015c712018-11-27 15:03:21 -08001006 const union tcp_md5_addr *addr,
1007 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001008{
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001009 const struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001010 struct tcp_md5sig_key *key;
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001011 const struct tcp_md5sig_info *md5sig;
Ivan Delalande67973182017-06-15 18:07:06 -07001012 __be32 mask;
1013 struct tcp_md5sig_key *best_match = NULL;
1014 bool match;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001015
Eric Dumazeta8afca02012-01-31 18:45:40 +00001016 /* caller either holds rcu_read_lock() or socket lock */
1017 md5sig = rcu_dereference_check(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +02001018 lockdep_sock_is_held(sk));
Eric Dumazeta8afca02012-01-31 18:45:40 +00001019 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001020 return NULL;
Arnd Bergmann083a0322017-06-20 22:11:21 +02001021
Sasha Levinb67bfe02013-02-27 17:06:00 -08001022 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001023 if (key->family != family)
1024 continue;
David Aherndea53bb2019-12-30 14:14:28 -08001025 if (key->l3index && key->l3index != l3index)
1026 continue;
Ivan Delalande67973182017-06-15 18:07:06 -07001027 if (family == AF_INET) {
1028 mask = inet_make_mask(key->prefixlen);
1029 match = (key->addr.a4.s_addr & mask) ==
1030 (addr->a4.s_addr & mask);
1031#if IS_ENABLED(CONFIG_IPV6)
1032 } else if (family == AF_INET6) {
1033 match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
1034 key->prefixlen);
1035#endif
1036 } else {
1037 match = false;
1038 }
1039
1040 if (match && (!best_match ||
1041 key->prefixlen > best_match->prefixlen))
1042 best_match = key;
1043 }
1044 return best_match;
1045}
Eric Dumazet6015c712018-11-27 15:03:21 -08001046EXPORT_SYMBOL(__tcp_md5_do_lookup);
Ivan Delalande67973182017-06-15 18:07:06 -07001047
Wu Fengguange8f37d52017-07-06 07:58:53 +08001048static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
1049 const union tcp_md5_addr *addr,
David Aherndea53bb2019-12-30 14:14:28 -08001050 int family, u8 prefixlen,
1051 int l3index)
Ivan Delalande67973182017-06-15 18:07:06 -07001052{
1053 const struct tcp_sock *tp = tcp_sk(sk);
1054 struct tcp_md5sig_key *key;
1055 unsigned int size = sizeof(struct in_addr);
1056 const struct tcp_md5sig_info *md5sig;
1057
1058 /* caller either holds rcu_read_lock() or socket lock */
1059 md5sig = rcu_dereference_check(tp->md5sig_info,
1060 lockdep_sock_is_held(sk));
1061 if (!md5sig)
1062 return NULL;
1063#if IS_ENABLED(CONFIG_IPV6)
1064 if (family == AF_INET6)
1065 size = sizeof(struct in6_addr);
1066#endif
1067 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
1068 if (key->family != family)
1069 continue;
David Aherndea53bb2019-12-30 14:14:28 -08001070 if (key->l3index && key->l3index != l3index)
1071 continue;
Ivan Delalande67973182017-06-15 18:07:06 -07001072 if (!memcmp(&key->addr, addr, size) &&
1073 key->prefixlen == prefixlen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001074 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001075 }
1076 return NULL;
1077}
1078
Eric Dumazetb83e3de2015-09-25 07:39:15 -07001079struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001080 const struct sock *addr_sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001081{
Eric Dumazetb52e6922015-04-09 14:36:42 -07001082 const union tcp_md5_addr *addr;
David Aherndea53bb2019-12-30 14:14:28 -08001083 int l3index;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001084
David Aherndea53bb2019-12-30 14:14:28 -08001085 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
1086 addr_sk->sk_bound_dev_if);
Eric Dumazetb52e6922015-04-09 14:36:42 -07001087 addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
David Aherndea53bb2019-12-30 14:14:28 -08001088 return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001089}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001090EXPORT_SYMBOL(tcp_v4_md5_lookup);
1091
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001092/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001093int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
David Aherndea53bb2019-12-30 14:14:28 -08001094 int family, u8 prefixlen, int l3index,
1095 const u8 *newkey, u8 newkeylen, gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001096{
1097 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001098 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001099 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001100 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001101
David Aherndea53bb2019-12-30 14:14:28 -08001102 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001103 if (key) {
1104 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001105 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -07001106 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001107 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001108 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001109
Eric Dumazeta8afca02012-01-31 18:45:40 +00001110 md5sig = rcu_dereference_protected(tp->md5sig_info,
Hannes Frederic Sowa1e1d04e2016-04-05 17:10:15 +02001111 lockdep_sock_is_held(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001112 if (!md5sig) {
1113 md5sig = kmalloc(sizeof(*md5sig), gfp);
1114 if (!md5sig)
1115 return -ENOMEM;
1116
1117 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1118 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001119 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001120 }
1121
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001122 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001123 if (!key)
1124 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +00001125 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001126 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001127 return -ENOMEM;
1128 }
1129
1130 memcpy(key->key, newkey, newkeylen);
1131 key->keylen = newkeylen;
1132 key->family = family;
Ivan Delalande67973182017-06-15 18:07:06 -07001133 key->prefixlen = prefixlen;
David Aherndea53bb2019-12-30 14:14:28 -08001134 key->l3index = l3index;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001135 memcpy(&key->addr, addr,
1136 (family == AF_INET6) ? sizeof(struct in6_addr) :
1137 sizeof(struct in_addr));
1138 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001139 return 0;
1140}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001141EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001142
Ivan Delalande67973182017-06-15 18:07:06 -07001143int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
David Aherndea53bb2019-12-30 14:14:28 -08001144 u8 prefixlen, int l3index)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001145{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001146 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001147
David Aherndea53bb2019-12-30 14:14:28 -08001148 key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001149 if (!key)
1150 return -ENOENT;
1151 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001152 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001153 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001154 return 0;
1155}
1156EXPORT_SYMBOL(tcp_md5_do_del);
1157
stephen hemmingere0683e702012-10-26 14:31:40 +00001158static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001159{
1160 struct tcp_sock *tp = tcp_sk(sk);
1161 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001162 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001163 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001164
Eric Dumazeta8afca02012-01-31 18:45:40 +00001165 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1166
Sasha Levinb67bfe02013-02-27 17:06:00 -08001167 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001168 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001169 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001170 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001171 }
1172}
1173
Ivan Delalande8917a772017-06-15 18:07:07 -07001174static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
1175 char __user *optval, int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001176{
1177 struct tcp_md5sig cmd;
1178 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
David Aherncea97602019-12-30 14:14:25 -08001179 const union tcp_md5_addr *addr;
Ivan Delalande8917a772017-06-15 18:07:07 -07001180 u8 prefixlen = 32;
David Aherndea53bb2019-12-30 14:14:28 -08001181 int l3index = 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001182
1183 if (optlen < sizeof(cmd))
1184 return -EINVAL;
1185
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001186 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001187 return -EFAULT;
1188
1189 if (sin->sin_family != AF_INET)
1190 return -EINVAL;
1191
Ivan Delalande8917a772017-06-15 18:07:07 -07001192 if (optname == TCP_MD5SIG_EXT &&
1193 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
1194 prefixlen = cmd.tcpm_prefixlen;
1195 if (prefixlen > 32)
1196 return -EINVAL;
1197 }
1198
David Ahern6b102db2019-12-30 14:14:29 -08001199 if (optname == TCP_MD5SIG_EXT &&
1200 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
1201 struct net_device *dev;
1202
1203 rcu_read_lock();
1204 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
1205 if (dev && netif_is_l3_master(dev))
1206 l3index = dev->ifindex;
1207
1208 rcu_read_unlock();
1209
1210 /* ok to reference set/not set outside of rcu;
1211 * right now device MUST be an L3 master
1212 */
1213 if (!dev || !l3index)
1214 return -EINVAL;
1215 }
1216
David Aherncea97602019-12-30 14:14:25 -08001217 addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
1218
Dmitry Popov64a124e2014-08-03 22:45:19 +04001219 if (!cmd.tcpm_keylen)
David Aherndea53bb2019-12-30 14:14:28 -08001220 return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001221
1222 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1223 return -EINVAL;
1224
David Aherndea53bb2019-12-30 14:14:28 -08001225 return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
David Aherncea97602019-12-30 14:14:25 -08001226 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001227}
1228
Eric Dumazet19689e32016-06-27 18:51:53 +02001229static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1230 __be32 daddr, __be32 saddr,
1231 const struct tcphdr *th, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001232{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001233 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001234 struct scatterlist sg;
Eric Dumazet19689e32016-06-27 18:51:53 +02001235 struct tcphdr *_th;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236
Eric Dumazet19689e32016-06-27 18:51:53 +02001237 bp = hp->scratch;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001238 bp->saddr = saddr;
1239 bp->daddr = daddr;
1240 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001241 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001242 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001243
Eric Dumazet19689e32016-06-27 18:51:53 +02001244 _th = (struct tcphdr *)(bp + 1);
1245 memcpy(_th, th, sizeof(*th));
1246 _th->check = 0;
1247
1248 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1249 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1250 sizeof(*bp) + sizeof(*th));
Herbert Xucf80e0e2016-01-24 21:20:23 +08001251 return crypto_ahash_update(hp->md5_req);
Adam Langley49a72df2008-07-19 00:01:42 -07001252}
1253
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001254static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001255 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001256{
1257 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001258 struct ahash_request *req;
Adam Langley49a72df2008-07-19 00:01:42 -07001259
1260 hp = tcp_get_md5sig_pool();
1261 if (!hp)
1262 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001263 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001264
Herbert Xucf80e0e2016-01-24 21:20:23 +08001265 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001266 goto clear_hash;
Eric Dumazet19689e32016-06-27 18:51:53 +02001267 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
Adam Langley49a72df2008-07-19 00:01:42 -07001268 goto clear_hash;
1269 if (tcp_md5_hash_key(hp, key))
1270 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001271 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1272 if (crypto_ahash_final(req))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001273 goto clear_hash;
1274
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001275 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001276 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001277
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001278clear_hash:
1279 tcp_put_md5sig_pool();
1280clear_hash_noput:
1281 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001282 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001283}
1284
Eric Dumazet39f8e582015-03-24 15:58:55 -07001285int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1286 const struct sock *sk,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001287 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001288{
Adam Langley49a72df2008-07-19 00:01:42 -07001289 struct tcp_md5sig_pool *hp;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001290 struct ahash_request *req;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001291 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001292 __be32 saddr, daddr;
1293
Eric Dumazet39f8e582015-03-24 15:58:55 -07001294 if (sk) { /* valid for establish/request sockets */
1295 saddr = sk->sk_rcv_saddr;
1296 daddr = sk->sk_daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001297 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001298 const struct iphdr *iph = ip_hdr(skb);
1299 saddr = iph->saddr;
1300 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001301 }
Adam Langley49a72df2008-07-19 00:01:42 -07001302
1303 hp = tcp_get_md5sig_pool();
1304 if (!hp)
1305 goto clear_hash_noput;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001306 req = hp->md5_req;
Adam Langley49a72df2008-07-19 00:01:42 -07001307
Herbert Xucf80e0e2016-01-24 21:20:23 +08001308 if (crypto_ahash_init(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001309 goto clear_hash;
1310
Eric Dumazet19689e32016-06-27 18:51:53 +02001311 if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
Adam Langley49a72df2008-07-19 00:01:42 -07001312 goto clear_hash;
1313 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1314 goto clear_hash;
1315 if (tcp_md5_hash_key(hp, key))
1316 goto clear_hash;
Herbert Xucf80e0e2016-01-24 21:20:23 +08001317 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1318 if (crypto_ahash_final(req))
Adam Langley49a72df2008-07-19 00:01:42 -07001319 goto clear_hash;
1320
1321 tcp_put_md5sig_pool();
1322 return 0;
1323
1324clear_hash:
1325 tcp_put_md5sig_pool();
1326clear_hash_noput:
1327 memset(md5_hash, 0, 16);
1328 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001329}
Adam Langley49a72df2008-07-19 00:01:42 -07001330EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001331
Eric Dumazetba8e2752015-10-02 11:43:28 -07001332#endif
1333
Eric Dumazetff74e232015-03-24 15:58:54 -07001334/* Called with rcu_read_lock() */
Eric Dumazetba8e2752015-10-02 11:43:28 -07001335static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
David Ahern534322c2019-12-30 14:14:27 -08001336 const struct sk_buff *skb,
1337 int dif, int sdif)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001338{
Eric Dumazetba8e2752015-10-02 11:43:28 -07001339#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001340 /*
1341 * This gets called for each TCP segment that arrives
1342 * so we want to be efficient.
1343 * We have 3 drop cases:
1344 * o No MD5 hash and one expected.
1345 * o MD5 hash and we're not expecting one.
1346 * o MD5 hash and its wrong.
1347 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001348 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001349 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001350 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001351 const struct tcphdr *th = tcp_hdr(skb);
David Aherncea97602019-12-30 14:14:25 -08001352 const union tcp_md5_addr *addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001353 unsigned char newhash[16];
David Aherndea53bb2019-12-30 14:14:28 -08001354 int genhash, l3index;
1355
1356 /* sdif set, means packet ingressed via a device
1357 * in an L3 domain and dif is set to the l3mdev
1358 */
1359 l3index = sdif ? dif : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001360
David Aherncea97602019-12-30 14:14:25 -08001361 addr = (union tcp_md5_addr *)&iph->saddr;
David Aherndea53bb2019-12-30 14:14:28 -08001362 hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001363 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001364
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001365 /* We've parsed the options - do we have a hash? */
1366 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001367 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001368
1369 if (hash_expected && !hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001370 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001371 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001372 }
1373
1374 if (!hash_expected && hash_location) {
Eric Dumazetc10d9312016-04-29 14:16:47 -07001375 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001376 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001377 }
1378
1379 /* Okay, so this is hash_expected and hash_location -
1380 * so we need to calculate the checksum.
1381 */
Adam Langley49a72df2008-07-19 00:01:42 -07001382 genhash = tcp_v4_md5_hash_skb(newhash,
1383 hash_expected,
Eric Dumazet39f8e582015-03-24 15:58:55 -07001384 NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001385
1386 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Eric Dumazet72145a62016-08-24 09:01:23 -07001387 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
David Aherndea53bb2019-12-30 14:14:28 -08001388 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
Joe Perchese87cc472012-05-13 21:56:26 +00001389 &iph->saddr, ntohs(th->source),
1390 &iph->daddr, ntohs(th->dest),
1391 genhash ? " tcp_v4_calc_md5_hash failed"
David Aherndea53bb2019-12-30 14:14:28 -08001392 : "", l3index);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001393 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001394 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001395 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001396#endif
Eric Dumazetba8e2752015-10-02 11:43:28 -07001397 return false;
1398}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001399
Eric Dumazetb40cf182015-09-25 07:39:08 -07001400static void tcp_v4_init_req(struct request_sock *req,
1401 const struct sock *sk_listener,
Octavian Purdila16bea702014-06-25 17:09:53 +03001402 struct sk_buff *skb)
1403{
1404 struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001405 struct net *net = sock_net(sk_listener);
Octavian Purdila16bea702014-06-25 17:09:53 +03001406
Eric Dumazet08d2cc3b2015-03-18 14:05:38 -07001407 sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1408 sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001409 RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
Octavian Purdila16bea702014-06-25 17:09:53 +03001410}
1411
Eric Dumazetf9646292015-09-29 07:42:50 -07001412static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1413 struct flowi *fl,
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001414 const struct request_sock *req)
Octavian Purdilad94e0412014-06-25 17:09:55 +03001415{
Soheil Hassas Yeganeh4396e462017-03-15 16:30:46 -04001416 return inet_csk_route_req(sk, &fl->u.ip4, req);
Octavian Purdilad94e0412014-06-25 17:09:55 +03001417}
1418
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001419struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001421 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001422 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001423 .send_ack = tcp_v4_reqsk_send_ack,
1424 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001426 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427};
1428
Mat Martineau35b2c322020-01-09 07:59:21 -08001429const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001430 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001431#ifdef CONFIG_TCP_MD5SIG
Eric Dumazetfd3a1542015-03-24 15:58:56 -07001432 .req_md5_lookup = tcp_v4_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001433 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001434#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001435 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001436#ifdef CONFIG_SYN_COOKIES
1437 .cookie_init_seq = cookie_v4_init_sequence,
1438#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001439 .route_req = tcp_v4_route_req,
Eric Dumazet84b114b2017-05-05 06:56:54 -07001440 .init_seq = tcp_v4_init_seq,
1441 .init_ts_off = tcp_v4_init_ts_off,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001442 .send_synack = tcp_v4_send_synack,
Octavian Purdila16bea702014-06-25 17:09:53 +03001443};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001444
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1446{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001448 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 goto drop;
1450
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001451 return tcp_conn_request(&tcp_request_sock_ops,
1452 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454drop:
Eric Dumazet9caad862016-04-01 08:52:20 -07001455 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 return 0;
1457}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001458EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
1460
1461/*
1462 * The three way handshake has completed - we got a valid synack -
1463 * now create the new socket.
1464 */
Eric Dumazet0c271712015-09-29 07:42:48 -07001465struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001466 struct request_sock *req,
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001467 struct dst_entry *dst,
1468 struct request_sock *req_unhash,
1469 bool *own_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001471 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 struct inet_sock *newinet;
1473 struct tcp_sock *newtp;
1474 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001475#ifdef CONFIG_TCP_MD5SIG
David Aherncea97602019-12-30 14:14:25 -08001476 const union tcp_md5_addr *addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001477 struct tcp_md5sig_key *key;
David Aherndea53bb2019-12-30 14:14:28 -08001478 int l3index;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001479#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001480 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481
1482 if (sk_acceptq_is_full(sk))
1483 goto exit_overflow;
1484
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 newsk = tcp_create_openreq_child(sk, req, skb);
1486 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001487 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488
Herbert Xubcd76112006-06-30 13:36:35 -07001489 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001490 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491
1492 newtp = tcp_sk(newsk);
1493 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001494 ireq = inet_rsk(req);
Eric Dumazetd1e559d2015-03-18 14:05:35 -07001495 sk_daddr_set(newsk, ireq->ir_rmt_addr);
1496 sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
David Ahern6dd9a142015-12-16 13:20:44 -08001497 newsk->sk_bound_dev_if = ireq->ir_iif;
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001498 newinet->inet_saddr = ireq->ir_loc_addr;
1499 inet_opt = rcu_dereference(ireq->ireq_opt);
1500 RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001501 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001502 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001503 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001504 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001505 if (inet_opt)
1506 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazeta904a062019-11-01 10:32:19 -07001507 newinet->inet_id = prandom_u32();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001509 if (!dst) {
1510 dst = inet_csk_route_child_sock(sk, newsk, req);
1511 if (!dst)
1512 goto put_and_exit;
1513 } else {
1514 /* syncookie case : see end of cookie_v4_check() */
1515 }
David S. Miller0e734412011-05-08 15:28:03 -07001516 sk_setup_caps(newsk, dst);
1517
Daniel Borkmann81164412015-01-05 23:57:48 +01001518 tcp_ca_openreq_child(newsk, dst);
1519
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 tcp_sync_mss(newsk, dst_mtu(dst));
Eric Dumazet3541f9e2017-02-02 08:04:56 -08001521 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001522
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 tcp_initialize_rcv_mss(newsk);
1524
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001525#ifdef CONFIG_TCP_MD5SIG
David Aherndea53bb2019-12-30 14:14:28 -08001526 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001527 /* Copy over the MD5 key from the original socket */
David Aherncea97602019-12-30 14:14:25 -08001528 addr = (union tcp_md5_addr *)&newinet->inet_daddr;
David Aherndea53bb2019-12-30 14:14:28 -08001529 key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
Ian Morris00db4122015-04-03 09:17:27 +01001530 if (key) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001531 /*
1532 * We're using one, so create a matching key
1533 * on the newsk structure. If we fail to get
1534 * memory, then we end up not copying the key
1535 * across. Shucks.
1536 */
David Aherndea53bb2019-12-30 14:14:28 -08001537 tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
David Aherncea97602019-12-30 14:14:25 -08001538 key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001539 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001540 }
1541#endif
1542
David S. Miller0e734412011-05-08 15:28:03 -07001543 if (__inet_inherit_port(sk, newsk) < 0)
1544 goto put_and_exit;
Eric Dumazet5e0724d2015-10-22 08:20:46 -07001545 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001546 if (likely(*own_req)) {
Eric Dumazet49a496c2015-11-05 12:50:19 -08001547 tcp_move_syn(newtp, req);
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001548 ireq->ireq_opt = NULL;
1549 } else {
1550 newinet->inet_opt = NULL;
1551 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return newsk;
1553
1554exit_overflow:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001555 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001556exit_nonewsk:
1557 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558exit:
Eric Dumazet9caad862016-04-01 08:52:20 -07001559 tcp_listendrop(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001561put_and_exit:
Eric Dumazetc92e8c02017-10-20 09:04:13 -07001562 newinet->inet_opt = NULL;
Christoph Paasche337e242012-12-14 04:07:58 +00001563 inet_csk_prepare_forced_close(newsk);
1564 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001565 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001567EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
Eric Dumazet079096f2015-10-02 11:43:32 -07001569static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571#ifdef CONFIG_SYN_COOKIES
Eric Dumazet079096f2015-10-02 11:43:32 -07001572 const struct tcphdr *th = tcp_hdr(skb);
1573
Florian Westphalaf9b4732010-06-03 00:43:44 +00001574 if (!th->syn)
Cong Wang461b74c2014-10-15 14:33:22 -07001575 sk = cookie_v4_check(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576#endif
1577 return sk;
1578}
1579
Petar Penkov9349d602019-07-29 09:59:14 -07001580u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
1581 struct tcphdr *th, u32 *cookie)
1582{
1583 u16 mss = 0;
1584#ifdef CONFIG_SYN_COOKIES
1585 mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
1586 &tcp_request_sock_ipv4_ops, sk, th);
1587 if (mss) {
1588 *cookie = __cookie_v4_init_sequence(iph, th, &mss);
1589 tcp_synq_overflow(sk);
1590 }
1591#endif
1592 return mss;
1593}
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595/* The socket must have it's spinlock held when we get
Eric Dumazete994b2f2015-10-02 11:43:39 -07001596 * here, unless it is a TCP_LISTEN socket.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 *
1598 * We have a potential double-lock case here, so even when
1599 * doing backlog processing we use the BH locking scheme.
1600 * This is because we cannot sleep with the original spinlock
1601 * held.
1602 */
1603int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1604{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001605 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001606
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001608 struct dst_entry *dst = sk->sk_rx_dst;
1609
Tom Herbertbdeab992011-08-14 19:45:55 +00001610 sock_rps_save_rxhash(sk, skb);
Eric Dumazet3d973792014-11-11 05:54:27 -08001611 sk_mark_napi_id(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001612 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001613 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
Ian Morris51456b22015-04-03 09:17:26 +01001614 !dst->ops->check(dst, 0)) {
David S. Miller92101b32012-07-23 16:29:00 -07001615 dst_release(dst);
1616 sk->sk_rx_dst = NULL;
1617 }
1618 }
Yafang Shao3d97d882018-05-29 23:27:31 +08001619 tcp_rcv_established(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 return 0;
1621 }
1622
Eric Dumazet12e25e12015-06-03 23:49:21 -07001623 if (tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 goto csum_err;
1625
1626 if (sk->sk_state == TCP_LISTEN) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001627 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1628
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 if (!nsk)
1630 goto discard;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001632 if (tcp_child_process(sk, nsk, skb)) {
1633 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001635 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 return 0;
1637 }
Eric Dumazetca551582010-06-03 09:03:58 +00001638 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001639 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001640
Eric Dumazet72ab4a82015-09-29 07:42:41 -07001641 if (tcp_rcv_state_process(sk, skb)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001642 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001644 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 return 0;
1646
1647reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001648 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649discard:
1650 kfree_skb(skb);
1651 /* Be careful here. If this function gets more complicated and
1652 * gcc suffers from register pressure on the x86, sk (in %ebx)
1653 * might be destroyed here. This current version compiles correctly,
1654 * but you have been warned.
1655 */
1656 return 0;
1657
1658csum_err:
Eric Dumazetc10d9312016-04-29 14:16:47 -07001659 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1660 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 goto discard;
1662}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001663EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
Paolo Abeni74874492017-09-28 15:51:36 +02001665int tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001666{
David S. Miller41063e92012-06-19 21:22:05 -07001667 const struct iphdr *iph;
1668 const struct tcphdr *th;
1669 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001670
David S. Miller41063e92012-06-19 21:22:05 -07001671 if (skb->pkt_type != PACKET_HOST)
Paolo Abeni74874492017-09-28 15:51:36 +02001672 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001673
Eric Dumazet45f00f92012-10-22 21:42:47 +00001674 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
Paolo Abeni74874492017-09-28 15:51:36 +02001675 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001676
1677 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001678 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001679
1680 if (th->doff < sizeof(struct tcphdr) / 4)
Paolo Abeni74874492017-09-28 15:51:36 +02001681 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001682
Eric Dumazet45f00f92012-10-22 21:42:47 +00001683 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001684 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001685 iph->daddr, ntohs(th->dest),
David Ahern3fa6f612017-08-07 08:44:17 -07001686 skb->skb_iif, inet_sdif(skb));
David S. Miller41063e92012-06-19 21:22:05 -07001687 if (sk) {
1688 skb->sk = sk;
1689 skb->destructor = sock_edemux;
Eric Dumazetf7e4eb02015-03-15 21:12:13 -07001690 if (sk_fullsock(sk)) {
Michal Kubečekd0c294c2015-03-23 15:14:00 +01001691 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001692
David S. Miller41063e92012-06-19 21:22:05 -07001693 if (dst)
1694 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001695 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001696 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001697 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001698 }
1699 }
Paolo Abeni74874492017-09-28 15:51:36 +02001700 return 0;
David S. Miller41063e92012-06-19 21:22:05 -07001701}
1702
Eric Dumazetc9c33212016-08-27 07:37:54 -07001703bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1704{
Eric Dumazet82657922019-10-09 15:21:13 -07001705 u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
Eric Dumazet4f693b52018-11-27 14:42:03 -08001706 struct skb_shared_info *shinfo;
1707 const struct tcphdr *th;
1708 struct tcphdr *thtail;
1709 struct sk_buff *tail;
1710 unsigned int hdrlen;
1711 bool fragstolen;
1712 u32 gso_segs;
1713 int delta;
Eric Dumazetc9c33212016-08-27 07:37:54 -07001714
1715 /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1716 * we can fix skb->truesize to its real value to avoid future drops.
1717 * This is valid because skb is not yet charged to the socket.
1718 * It has been noticed pure SACK packets were sometimes dropped
1719 * (if cooked by drivers without copybreak feature).
1720 */
Eric Dumazet60b1af32017-01-24 14:57:36 -08001721 skb_condense(skb);
Eric Dumazetc9c33212016-08-27 07:37:54 -07001722
Eric Dumazetade96282018-11-19 17:45:55 -08001723 skb_dst_drop(skb);
1724
Eric Dumazet4f693b52018-11-27 14:42:03 -08001725 if (unlikely(tcp_checksum_complete(skb))) {
1726 bh_unlock_sock(sk);
1727 __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1728 __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1729 return true;
1730 }
1731
1732 /* Attempt coalescing to last skb in backlog, even if we are
1733 * above the limits.
1734 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
1735 */
1736 th = (const struct tcphdr *)skb->data;
1737 hdrlen = th->doff * 4;
1738 shinfo = skb_shinfo(skb);
1739
1740 if (!shinfo->gso_size)
1741 shinfo->gso_size = skb->len - hdrlen;
1742
1743 if (!shinfo->gso_segs)
1744 shinfo->gso_segs = 1;
1745
1746 tail = sk->sk_backlog.tail;
1747 if (!tail)
1748 goto no_coalesce;
1749 thtail = (struct tcphdr *)tail->data;
1750
1751 if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
1752 TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
1753 ((TCP_SKB_CB(tail)->tcp_flags |
Eric Dumazetca2fe292019-04-26 10:10:05 -07001754 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
1755 !((TCP_SKB_CB(tail)->tcp_flags &
1756 TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
Eric Dumazet4f693b52018-11-27 14:42:03 -08001757 ((TCP_SKB_CB(tail)->tcp_flags ^
1758 TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
1759#ifdef CONFIG_TLS_DEVICE
1760 tail->decrypted != skb->decrypted ||
1761#endif
1762 thtail->doff != th->doff ||
1763 memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
1764 goto no_coalesce;
1765
1766 __skb_pull(skb, hdrlen);
1767 if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
1768 thtail->window = th->window;
1769
1770 TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
1771
1772 if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
1773 TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
1774
Eric Dumazetca2fe292019-04-26 10:10:05 -07001775 /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
1776 * thtail->fin, so that the fast path in tcp_rcv_established()
1777 * is not entered if we append a packet with a FIN.
1778 * SYN, RST, URG are not present.
1779 * ACK is set on both packets.
1780 * PSH : we do not really care in TCP stack,
1781 * at least for 'GRO' packets.
1782 */
1783 thtail->fin |= th->fin;
Eric Dumazet4f693b52018-11-27 14:42:03 -08001784 TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1785
1786 if (TCP_SKB_CB(skb)->has_rxtstamp) {
1787 TCP_SKB_CB(tail)->has_rxtstamp = true;
1788 tail->tstamp = skb->tstamp;
1789 skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
1790 }
1791
1792 /* Not as strict as GRO. We only need to carry mss max value */
1793 skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
1794 skb_shinfo(tail)->gso_size);
1795
1796 gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
1797 skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
1798
1799 sk->sk_backlog.len += delta;
1800 __NET_INC_STATS(sock_net(sk),
1801 LINUX_MIB_TCPBACKLOGCOALESCE);
1802 kfree_skb_partial(skb, fragstolen);
1803 return false;
1804 }
1805 __skb_push(skb, hdrlen);
1806
1807no_coalesce:
1808 /* Only socket owner can try to collapse/prune rx queues
1809 * to reduce memory overhead, so add a little headroom here.
1810 * Few sockets backlog are possibly concurrently non empty.
1811 */
1812 limit += 64*1024;
1813
Eric Dumazetc9c33212016-08-27 07:37:54 -07001814 if (unlikely(sk_add_backlog(sk, skb, limit))) {
1815 bh_unlock_sock(sk);
1816 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1817 return true;
1818 }
1819 return false;
1820}
1821EXPORT_SYMBOL(tcp_add_backlog);
1822
Eric Dumazetac6e7802016-11-10 13:12:35 -08001823int tcp_filter(struct sock *sk, struct sk_buff *skb)
1824{
1825 struct tcphdr *th = (struct tcphdr *)skb->data;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001826
Christoph Paaschf2feaef2019-03-11 11:41:05 -07001827 return sk_filter_trim_cap(sk, skb, th->doff * 4);
Eric Dumazetac6e7802016-11-10 13:12:35 -08001828}
1829EXPORT_SYMBOL(tcp_filter);
1830
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001831static void tcp_v4_restore_cb(struct sk_buff *skb)
1832{
1833 memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
1834 sizeof(struct inet_skb_parm));
1835}
1836
1837static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
1838 const struct tcphdr *th)
1839{
1840 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1841 * barrier() makes sure compiler wont play fool^Waliasing games.
1842 */
1843 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1844 sizeof(struct inet_skb_parm));
1845 barrier();
1846
1847 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1848 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1849 skb->len - th->doff * 4);
1850 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1851 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1852 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1853 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1854 TCP_SKB_CB(skb)->sacked = 0;
1855 TCP_SKB_CB(skb)->has_rxtstamp =
1856 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1857}
1858
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859/*
1860 * From tcp_input.c
1861 */
1862
1863int tcp_v4_rcv(struct sk_buff *skb)
1864{
Eric Dumazet3b24d852016-04-01 08:52:17 -07001865 struct net *net = dev_net(skb->dev);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001866 struct sk_buff *skb_to_free;
David Ahern3fa6f612017-08-07 08:44:17 -07001867 int sdif = inet_sdif(skb);
David Ahern534322c2019-12-30 14:14:27 -08001868 int dif = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001869 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001870 const struct tcphdr *th;
Eric Dumazet3b24d852016-04-01 08:52:17 -07001871 bool refcounted;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 struct sock *sk;
1873 int ret;
1874
1875 if (skb->pkt_type != PACKET_HOST)
1876 goto discard_it;
1877
1878 /* Count it even if it's bad */
Eric Dumazet90bbcc62016-04-27 16:44:32 -07001879 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880
1881 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1882 goto discard_it;
1883
Eric Dumazetea1627c2016-05-13 09:16:40 -07001884 th = (const struct tcphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
Eric Dumazetea1627c2016-05-13 09:16:40 -07001886 if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 goto bad_packet;
1888 if (!pskb_may_pull(skb, th->doff * 4))
1889 goto discard_it;
1890
1891 /* An explanation is required here, I think.
1892 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001893 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001895
1896 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001897 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
Eric Dumazetea1627c2016-05-13 09:16:40 -07001899 th = (const struct tcphdr *)skb->data;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001900 iph = ip_hdr(skb);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001901lookup:
Craig Galleka5836362016-02-10 11:50:38 -05001902 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
David Ahern3fa6f612017-08-07 08:44:17 -07001903 th->dest, sdif, &refcounted);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 if (!sk)
1905 goto no_tcp_socket;
1906
Eric Dumazetbb134d52010-03-09 05:55:56 +00001907process:
1908 if (sk->sk_state == TCP_TIME_WAIT)
1909 goto do_time_wait;
1910
Eric Dumazet079096f2015-10-02 11:43:32 -07001911 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1912 struct request_sock *req = inet_reqsk(sk);
Eric Dumazete0f97592018-02-13 06:14:12 -08001913 bool req_stolen = false;
Eric Dumazet77166822016-02-18 05:39:18 -08001914 struct sock *nsk;
Eric Dumazet079096f2015-10-02 11:43:32 -07001915
1916 sk = req->rsk_listener;
David Ahern534322c2019-12-30 14:14:27 -08001917 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
Eric Dumazete65c3322016-08-24 08:50:24 -07001918 sk_drops_add(sk, skb);
Eric Dumazet72923552016-02-11 22:50:29 -08001919 reqsk_put(req);
1920 goto discard_it;
1921 }
Frank van der Linden4fd44a92018-06-12 23:09:37 +00001922 if (tcp_checksum_complete(skb)) {
1923 reqsk_put(req);
1924 goto csum_error;
1925 }
Eric Dumazet77166822016-02-18 05:39:18 -08001926 if (unlikely(sk->sk_state != TCP_LISTEN)) {
Eric Dumazetf03f2e12015-10-14 11:16:27 -07001927 inet_csk_reqsk_queue_drop_and_put(sk, req);
Eric Dumazet4bdc3d62015-10-13 17:12:54 -07001928 goto lookup;
1929 }
Eric Dumazet3b24d852016-04-01 08:52:17 -07001930 /* We own a reference on the listener, increase it again
1931 * as we might lose it too soon.
1932 */
Eric Dumazet77166822016-02-18 05:39:18 -08001933 sock_hold(sk);
Eric Dumazet3b24d852016-04-01 08:52:17 -07001934 refcounted = true;
Eric Dumazet1f3b3592017-09-08 12:44:47 -07001935 nsk = NULL;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001936 if (!tcp_filter(sk, skb)) {
1937 th = (const struct tcphdr *)skb->data;
1938 iph = ip_hdr(skb);
1939 tcp_v4_fill_cb(skb, iph, th);
Eric Dumazete0f97592018-02-13 06:14:12 -08001940 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001941 }
Eric Dumazet079096f2015-10-02 11:43:32 -07001942 if (!nsk) {
1943 reqsk_put(req);
Eric Dumazete0f97592018-02-13 06:14:12 -08001944 if (req_stolen) {
1945 /* Another cpu got exclusive access to req
1946 * and created a full blown socket.
1947 * Try to feed this packet to this socket
1948 * instead of discarding it.
1949 */
1950 tcp_v4_restore_cb(skb);
1951 sock_put(sk);
1952 goto lookup;
1953 }
Eric Dumazet77166822016-02-18 05:39:18 -08001954 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001955 }
1956 if (nsk == sk) {
Eric Dumazet079096f2015-10-02 11:43:32 -07001957 reqsk_put(req);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001958 tcp_v4_restore_cb(skb);
Eric Dumazet079096f2015-10-02 11:43:32 -07001959 } else if (tcp_child_process(sk, nsk, skb)) {
1960 tcp_v4_send_reset(nsk, skb);
Eric Dumazet77166822016-02-18 05:39:18 -08001961 goto discard_and_relse;
Eric Dumazet079096f2015-10-02 11:43:32 -07001962 } else {
Eric Dumazet77166822016-02-18 05:39:18 -08001963 sock_put(sk);
Eric Dumazet079096f2015-10-02 11:43:32 -07001964 return 0;
1965 }
1966 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001967 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
Eric Dumazet02a1d6e2016-04-27 16:44:39 -07001968 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001969 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001970 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1973 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001974
David Ahern534322c2019-12-30 14:14:27 -08001975 if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001976 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001977
Florian Westphal895b5c92019-09-29 20:54:03 +02001978 nf_reset_ct(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979
Eric Dumazetac6e7802016-11-10 13:12:35 -08001980 if (tcp_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 goto discard_and_relse;
Eric Dumazetac6e7802016-11-10 13:12:35 -08001982 th = (const struct tcphdr *)skb->data;
1983 iph = ip_hdr(skb);
Eric Dumazeteeea10b2017-12-03 09:32:59 -08001984 tcp_v4_fill_cb(skb, iph, th);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 skb->dev = NULL;
1987
Eric Dumazete994b2f2015-10-02 11:43:39 -07001988 if (sk->sk_state == TCP_LISTEN) {
1989 ret = tcp_v4_do_rcv(sk, skb);
1990 goto put_and_return;
1991 }
1992
1993 sk_incoming_cpu_update(sk);
1994
Ingo Molnarc6366182006-07-03 00:25:13 -07001995 bh_lock_sock_nested(sk);
Martin KaFai Laua44d6ea2016-03-14 10:52:15 -07001996 tcp_segs_in(tcp_sk(sk), skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 ret = 0;
1998 if (!sock_owned_by_user(sk)) {
Eric Dumazet8b27dae2019-03-22 08:56:40 -07001999 skb_to_free = sk->sk_rx_skb_cache;
2000 sk->sk_rx_skb_cache = NULL;
Florian Westphale7942d02017-07-30 03:57:18 +02002001 ret = tcp_v4_do_rcv(sk, skb);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07002002 } else {
2003 if (tcp_add_backlog(sk, skb))
2004 goto discard_and_relse;
2005 skb_to_free = NULL;
Zhu Yi6b03a532010-03-04 18:01:41 +00002006 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 bh_unlock_sock(sk);
Eric Dumazet8b27dae2019-03-22 08:56:40 -07002008 if (skb_to_free)
2009 __kfree_skb(skb_to_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010
Eric Dumazete994b2f2015-10-02 11:43:39 -07002011put_and_return:
Eric Dumazet3b24d852016-04-01 08:52:17 -07002012 if (refcounted)
2013 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
2015 return ret;
2016
2017no_tcp_socket:
2018 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2019 goto discard_it;
2020
Eric Dumazeteeea10b2017-12-03 09:32:59 -08002021 tcp_v4_fill_cb(skb, iph, th);
2022
Eric Dumazet12e25e12015-06-03 23:49:21 -07002023 if (tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00002024csum_error:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07002025 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026bad_packet:
Eric Dumazet90bbcc62016-04-27 16:44:32 -07002027 __TCP_INC_STATS(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002029 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 }
2031
2032discard_it:
2033 /* Discard frame. */
2034 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002035 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036
2037discard_and_relse:
Eric Dumazet532182c2016-04-01 08:52:19 -07002038 sk_drops_add(sk, skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002039 if (refcounted)
2040 sock_put(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 goto discard_it;
2042
2043do_time_wait:
2044 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07002045 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 goto discard_it;
2047 }
2048
Eric Dumazeteeea10b2017-12-03 09:32:59 -08002049 tcp_v4_fill_cb(skb, iph, th);
2050
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00002051 if (tcp_checksum_complete(skb)) {
2052 inet_twsk_put(inet_twsk(sk));
2053 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07002055 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002057 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Craig Galleka5836362016-02-10 11:50:38 -05002058 &tcp_hashinfo, skb,
2059 __tcp_hdrlen(th),
Tom Herbertda5e3632013-01-22 09:50:24 +00002060 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002061 iph->daddr, th->dest,
David Ahern3fa6f612017-08-07 08:44:17 -07002062 inet_iif(skb),
2063 sdif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 if (sk2) {
Eric Dumazetdbe7faa2015-07-08 14:28:30 -07002065 inet_twsk_deschedule_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 sk = sk2;
Eric Dumazeteeea10b2017-12-03 09:32:59 -08002067 tcp_v4_restore_cb(skb);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002068 refcounted = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 goto process;
2070 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 }
Gustavo A. R. Silvafcfd6df2017-10-16 15:48:55 -05002072 /* to ACK */
2073 /* fall through */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 case TCP_TW_ACK:
2075 tcp_v4_timewait_ack(sk, skb);
2076 break;
2077 case TCP_TW_RST:
Florian Westphal271c3b92015-12-21 21:29:26 +01002078 tcp_v4_send_reset(sk, skb);
2079 inet_twsk_deschedule_put(inet_twsk(sk));
2080 goto discard_it;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 case TCP_TW_SUCCESS:;
2082 }
2083 goto discard_it;
2084}
2085
David S. Millerccb7c412010-12-01 18:09:13 -08002086static struct timewait_sock_ops tcp_timewait_sock_ops = {
2087 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2088 .twsk_unique = tcp_twsk_unique,
2089 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08002090};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091
Eric Dumazet63d02d12012-08-09 14:11:00 +00002092void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00002093{
2094 struct dst_entry *dst = skb_dst(skb);
2095
Eric Dumazet5037e9e2015-12-14 14:08:53 -08002096 if (dst && dst_hold_safe(dst)) {
Eric Dumazetca777ef2014-09-08 08:06:07 -07002097 sk->sk_rx_dst = dst;
2098 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2099 }
Eric Dumazet5d299f32012-08-06 05:09:33 +00002100}
Eric Dumazet63d02d12012-08-09 14:11:00 +00002101EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00002102
Stephen Hemminger3b401a82009-09-01 19:25:04 +00002103const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002104 .queue_xmit = ip_queue_xmit,
2105 .send_check = tcp_v4_send_check,
2106 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00002107 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002108 .conn_request = tcp_v4_conn_request,
2109 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002110 .net_header_len = sizeof(struct iphdr),
2111 .setsockopt = ip_setsockopt,
2112 .getsockopt = ip_getsockopt,
2113 .addr2sockaddr = inet_csk_addr2sockaddr,
2114 .sockaddr_len = sizeof(struct sockaddr_in),
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002115#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002116 .compat_setsockopt = compat_ip_setsockopt,
2117 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002118#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04002119 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002121EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002123#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3de2009-09-01 19:25:03 +00002124static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002125 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07002126 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002127 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002128};
Andrew Mortonb6332e62006-11-30 19:16:28 -08002129#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002130
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131/* NOTE: A lot of things set to zero explicitly by call to
2132 * sk_alloc() so need not be done here.
2133 */
2134static int tcp_v4_init_sock(struct sock *sk)
2135{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002136 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137
Neal Cardwell900f65d2012-04-19 09:55:21 +00002138 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08002140 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00002141
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002142#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04002143 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002144#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 return 0;
2147}
2148
Brian Haley7d06b2e2008-06-14 17:04:49 -07002149void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150{
2151 struct tcp_sock *tp = tcp_sk(sk);
2152
Song Liue1a4aa52017-10-23 09:20:26 -07002153 trace_tcp_destroy_sock(sk);
2154
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 tcp_clear_xmit_timers(sk);
2156
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002157 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07002158
Dave Watson734942c2017-06-14 11:37:14 -07002159 tcp_cleanup_ulp(sk);
2160
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08002162 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163
Wei Wangcf1ef3f2017-04-20 14:45:46 -07002164 /* Check if we want to disable active TFO */
2165 tcp_fastopen_active_disable_ofo_check(sk);
2166
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 /* Cleans up our, hopefully empty, out_of_order_queue. */
Yaogong Wang9f5afea2016-09-07 14:49:28 -07002168 skb_rbtree_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002170#ifdef CONFIG_TCP_MD5SIG
2171 /* Clean up the MD5 key list, if any */
2172 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00002173 tcp_clear_md5_list(sk);
Mat Martineaufb7df5e2017-12-21 10:29:10 -08002174 kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08002175 tp->md5sig_info = NULL;
2176 }
2177#endif
2178
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002180 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002181 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182
Eric Dumazetd983ea62019-10-10 20:17:38 -07002183 BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
William Allen Simpson435cf552009-12-02 18:17:05 +00002184
Yuchung Chengcf60af02012-07-19 06:43:09 +00002185 /* If socket is aborted during connect operation */
2186 tcp_free_fastopen_req(tp);
Yuchung Cheng1fba70e2017-10-18 11:22:51 -07002187 tcp_fastopen_destroy_cipher(sk);
Eric Dumazetcd8ae852015-05-03 21:34:46 -07002188 tcp_saved_syn_free(tp);
Yuchung Chengcf60af02012-07-19 06:43:09 +00002189
Glauber Costa180d8cd2011-12-11 21:47:02 +00002190 sk_sockets_allocated_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192EXPORT_SYMBOL(tcp_v4_destroy_sock);
2193
2194#ifdef CONFIG_PROC_FS
2195/* Proc filesystem TCP sock list dumping. */
2196
Tom Herberta8b690f2010-06-07 00:43:42 -07002197/*
2198 * Get next listener socket follow cur. If cur is NULL, get first socket
2199 * starting from bucket given in st->bucket; when st->bucket is zero the
2200 * very first socket in the hash table is returned.
2201 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202static void *listening_get_next(struct seq_file *seq, void *cur)
2203{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002204 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002205 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002206 struct net *net = seq_file_net(seq);
Eric Dumazet3b24d852016-04-01 08:52:17 -07002207 struct inet_listen_hashbucket *ilb;
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002208 struct hlist_nulls_node *node;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002209 struct sock *sk = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
2211 if (!sk) {
Eric Dumazet3b24d852016-04-01 08:52:17 -07002212get_head:
Tom Herberta8b690f2010-06-07 00:43:42 -07002213 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet9652dc22016-10-19 21:24:58 -07002214 spin_lock(&ilb->lock);
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002215 sk = sk_nulls_head(&ilb->nulls_head);
Tom Herberta8b690f2010-06-07 00:43:42 -07002216 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 goto get_sk;
2218 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002219 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002221 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002223 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224get_sk:
Eric Dumazet8dbd76e2019-12-13 18:20:41 -08002225 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002226 if (!net_eq(sock_net(sk), net))
2227 continue;
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002228 if (sk->sk_family == afinfo->family)
Eric Dumazet3b24d852016-04-01 08:52:17 -07002229 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 }
Eric Dumazet9652dc22016-10-19 21:24:58 -07002231 spin_unlock(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002232 st->offset = 0;
Eric Dumazet3b24d852016-04-01 08:52:17 -07002233 if (++st->bucket < INET_LHTABLE_SIZE)
2234 goto get_head;
2235 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236}
2237
2238static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2239{
Tom Herberta8b690f2010-06-07 00:43:42 -07002240 struct tcp_iter_state *st = seq->private;
2241 void *rc;
2242
2243 st->bucket = 0;
2244 st->offset = 0;
2245 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246
2247 while (rc && *pos) {
2248 rc = listening_get_next(seq, rc);
2249 --*pos;
2250 }
2251 return rc;
2252}
2253
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002254static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07002255{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002256 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002257}
2258
Tom Herberta8b690f2010-06-07 00:43:42 -07002259/*
2260 * Get first established socket starting from bucket given in st->bucket.
2261 * If st->bucket is zero, the very first socket in the hash is returned.
2262 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263static void *established_get_first(struct seq_file *seq)
2264{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002265 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Jianjun Kong5799de02008-11-03 02:49:10 -08002266 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002267 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 void *rc = NULL;
2269
Tom Herberta8b690f2010-06-07 00:43:42 -07002270 st->offset = 0;
2271 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002273 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002274 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275
Andi Kleen6eac5602008-08-28 01:08:02 -07002276 /* Lockless fast path for the common case of empty buckets */
2277 if (empty_bucket(st))
2278 continue;
2279
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002280 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002281 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002282 if (sk->sk_family != afinfo->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002283 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 continue;
2285 }
2286 rc = sk;
2287 goto out;
2288 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002289 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 }
2291out:
2292 return rc;
2293}
2294
2295static void *established_get_next(struct seq_file *seq, void *cur)
2296{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002297 struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002299 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002300 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002301 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302
2303 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002304 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002306 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002308 sk_nulls_for_each_from(sk, node) {
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002309 if (sk->sk_family == afinfo->family &&
2310 net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002311 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 }
2313
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002314 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2315 ++st->bucket;
2316 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317}
2318
2319static void *established_get_idx(struct seq_file *seq, loff_t pos)
2320{
Tom Herberta8b690f2010-06-07 00:43:42 -07002321 struct tcp_iter_state *st = seq->private;
2322 void *rc;
2323
2324 st->bucket = 0;
2325 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326
2327 while (rc && pos) {
2328 rc = established_get_next(seq, rc);
2329 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002330 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 return rc;
2332}
2333
2334static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2335{
2336 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002337 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 st->state = TCP_SEQ_STATE_LISTENING;
2340 rc = listening_get_idx(seq, &pos);
2341
2342 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 st->state = TCP_SEQ_STATE_ESTABLISHED;
2344 rc = established_get_idx(seq, pos);
2345 }
2346
2347 return rc;
2348}
2349
Tom Herberta8b690f2010-06-07 00:43:42 -07002350static void *tcp_seek_last_pos(struct seq_file *seq)
2351{
2352 struct tcp_iter_state *st = seq->private;
2353 int offset = st->offset;
2354 int orig_num = st->num;
2355 void *rc = NULL;
2356
2357 switch (st->state) {
Tom Herberta8b690f2010-06-07 00:43:42 -07002358 case TCP_SEQ_STATE_LISTENING:
2359 if (st->bucket >= INET_LHTABLE_SIZE)
2360 break;
2361 st->state = TCP_SEQ_STATE_LISTENING;
2362 rc = listening_get_next(seq, NULL);
2363 while (offset-- && rc)
2364 rc = listening_get_next(seq, rc);
2365 if (rc)
2366 break;
2367 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002368 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002369 /* Fallthrough */
2370 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002371 if (st->bucket > tcp_hashinfo.ehash_mask)
2372 break;
2373 rc = established_get_first(seq);
2374 while (offset-- && rc)
2375 rc = established_get_next(seq, rc);
2376 }
2377
2378 st->num = orig_num;
2379
2380 return rc;
2381}
2382
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002383void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384{
Jianjun Kong5799de02008-11-03 02:49:10 -08002385 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002386 void *rc;
2387
2388 if (*pos && *pos == st->last_pos) {
2389 rc = tcp_seek_last_pos(seq);
2390 if (rc)
2391 goto out;
2392 }
2393
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 st->state = TCP_SEQ_STATE_LISTENING;
2395 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002396 st->bucket = 0;
2397 st->offset = 0;
2398 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2399
2400out:
2401 st->last_pos = *pos;
2402 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002404EXPORT_SYMBOL(tcp_seq_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002406void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407{
Tom Herberta8b690f2010-06-07 00:43:42 -07002408 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410
2411 if (v == SEQ_START_TOKEN) {
2412 rc = tcp_get_idx(seq, 0);
2413 goto out;
2414 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415
2416 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 case TCP_SEQ_STATE_LISTENING:
2418 rc = listening_get_next(seq, v);
2419 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002421 st->bucket = 0;
2422 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 rc = established_get_first(seq);
2424 }
2425 break;
2426 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 rc = established_get_next(seq, v);
2428 break;
2429 }
2430out:
2431 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002432 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 return rc;
2434}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002435EXPORT_SYMBOL(tcp_seq_next);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002437void tcp_seq_stop(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438{
Jianjun Kong5799de02008-11-03 02:49:10 -08002439 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440
2441 switch (st->state) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 case TCP_SEQ_STATE_LISTENING:
2443 if (v != SEQ_START_TOKEN)
Eric Dumazet9652dc22016-10-19 21:24:58 -07002444 spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 case TCP_SEQ_STATE_ESTABLISHED:
2447 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002448 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 break;
2450 }
2451}
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002452EXPORT_SYMBOL(tcp_seq_stop);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453
Eric Dumazetd4f06872015-03-12 16:44:09 -07002454static void get_openreq4(const struct request_sock *req,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002455 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002457 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazetfa76ce732015-03-19 19:04:20 -07002458 long delta = req->rsk_timer.expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002460 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002461 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002463 ireq->ir_loc_addr,
Eric Dumazetd4f06872015-03-12 16:44:09 -07002464 ireq->ir_num,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002465 ireq->ir_rmt_addr,
2466 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 TCP_SYN_RECV,
2468 0, 0, /* could print option size, but that is af dependent. */
2469 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002470 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002471 req->num_timeout,
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002472 from_kuid_munged(seq_user_ns(f),
2473 sock_i_uid(req->rsk_listener)),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 0, /* non standard timer */
2475 0, /* open_requests have no inode */
Eric Dumazetd4f06872015-03-12 16:44:09 -07002476 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002477 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478}
2479
Tetsuo Handa652586d2013-11-14 14:31:57 -08002480static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481{
2482 int timer_active;
2483 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002484 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002485 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002486 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet0536fcc2015-09-29 07:42:52 -07002487 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002488 __be32 dest = inet->inet_daddr;
2489 __be32 src = inet->inet_rcv_saddr;
2490 __u16 destp = ntohs(inet->inet_dport);
2491 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002492 int rx_queue;
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002493 int state;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002495 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
Yuchung Cheng57dde7f2017-01-12 22:11:33 -08002496 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002497 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002499 timer_expires = icsk->icsk_timeout;
2500 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002502 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002503 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002505 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 } else {
2507 timer_active = 0;
2508 timer_expires = jiffies;
2509 }
2510
Yafang Shao986ffdf2017-12-20 11:12:52 +08002511 state = inet_sk_state_load(sk);
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002512 if (state == TCP_LISTEN)
Eric Dumazet288efe82019-11-05 14:11:53 -08002513 rx_queue = READ_ONCE(sk->sk_ack_backlog);
Eric Dumazet49d09002009-12-03 16:06:13 -08002514 else
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002515 /* Because we don't lock the socket,
2516 * we might find a transient negative value.
Eric Dumazet49d09002009-12-03 16:06:13 -08002517 */
Eric Dumazetdba7d9b2019-10-10 20:17:39 -07002518 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
Eric Dumazet7db48e92019-10-10 20:17:40 -07002519 READ_ONCE(tp->copied_seq), 0);
Eric Dumazet49d09002009-12-03 16:06:13 -08002520
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002521 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002522 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002523 i, src, srcp, dest, destp, state,
Eric Dumazet0f317462019-10-10 20:17:41 -07002524 READ_ONCE(tp->write_seq) - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002525 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002527 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002528 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002529 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002530 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002531 sock_i_ino(sk),
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002532 refcount_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002533 jiffies_to_clock_t(icsk->icsk_rto),
2534 jiffies_to_clock_t(icsk->icsk_ack.ato),
Wei Wang31954cd2019-01-25 10:53:19 -08002535 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 tp->snd_cwnd,
Eric Dumazet00fd38d2015-11-12 08:43:18 -08002537 state == TCP_LISTEN ?
2538 fastopenq->max_qlen :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002539 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540}
2541
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002542static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002543 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544{
Eric Dumazet789f5582015-04-12 18:51:09 -07002545 long delta = tw->tw_timer.expires - jiffies;
Al Viro23f33c22006-09-27 18:43:50 -07002546 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 __u16 destp, srcp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548
2549 dest = tw->tw_daddr;
2550 src = tw->tw_rcv_saddr;
2551 destp = ntohs(tw->tw_dport);
2552 srcp = ntohs(tw->tw_sport);
2553
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002554 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002555 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002557 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Reshetova, Elena41c6d652017-06-30 13:08:01 +03002558 refcount_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559}
2560
2561#define TMPSZ 150
2562
2563static int tcp4_seq_show(struct seq_file *seq, void *v)
2564{
Jianjun Kong5799de02008-11-03 02:49:10 -08002565 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002566 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567
Tetsuo Handa652586d2013-11-14 14:31:57 -08002568 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002570 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 "rx_queue tr tm->when retrnsmt uid timeout "
2572 "inode");
2573 goto out;
2574 }
2575 st = seq->private;
2576
Eric Dumazet079096f2015-10-02 11:43:32 -07002577 if (sk->sk_state == TCP_TIME_WAIT)
2578 get_timewait4_sock(v, seq, st->num);
2579 else if (sk->sk_state == TCP_NEW_SYN_RECV)
Eric Dumazetaa3a0c82015-10-02 11:43:30 -07002580 get_openreq4(v, seq, st->num);
Eric Dumazet079096f2015-10-02 11:43:32 -07002581 else
2582 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002583out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002584 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585 return 0;
2586}
2587
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002588static const struct seq_operations tcp4_seq_ops = {
2589 .show = tcp4_seq_show,
2590 .start = tcp_seq_start,
2591 .next = tcp_seq_next,
2592 .stop = tcp_seq_stop,
2593};
2594
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597};
2598
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002599static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002600{
Christoph Hellwigc3506372018-04-10 19:42:55 +02002601 if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
2602 sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002603 return -ENOMEM;
2604 return 0;
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002605}
2606
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002607static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002608{
Christoph Hellwig37d849b2018-04-11 09:31:28 +02002609 remove_proc_entry("tcp", net->proc_net);
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002610}
2611
2612static struct pernet_operations tcp4_net_ops = {
2613 .init = tcp4_proc_init_net,
2614 .exit = tcp4_proc_exit_net,
2615};
2616
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617int __init tcp4_proc_init(void)
2618{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002619 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620}
2621
2622void tcp4_proc_exit(void)
2623{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002624 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625}
2626#endif /* CONFIG_PROC_FS */
2627
2628struct proto tcp_prot = {
2629 .name = "TCP",
2630 .owner = THIS_MODULE,
2631 .close = tcp_close,
Andrey Ignatovd74bad42018-03-30 15:08:05 -07002632 .pre_connect = tcp_v4_pre_connect,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 .connect = tcp_v4_connect,
2634 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002635 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636 .ioctl = tcp_ioctl,
2637 .init = tcp_v4_init_sock,
2638 .destroy = tcp_v4_destroy_sock,
2639 .shutdown = tcp_shutdown,
2640 .setsockopt = tcp_setsockopt,
2641 .getsockopt = tcp_getsockopt,
Ursula Braun4b9d07a2017-01-09 16:55:12 +01002642 .keepalive = tcp_set_keepalive,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002644 .sendmsg = tcp_sendmsg,
2645 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002646 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002647 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002648 .hash = inet_hash,
2649 .unhash = inet_unhash,
2650 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazet06044752017-06-07 13:29:12 -07002652 .leave_memory_pressure = tcp_leave_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002653 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002655 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 .memory_allocated = &tcp_memory_allocated,
2657 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002658 .sysctl_mem = sysctl_tcp_mem,
Eric Dumazet356d1832017-11-07 00:29:28 -08002659 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2660 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002661 .max_header = MAX_TCP_HEADER,
2662 .obj_size = sizeof(struct tcp_sock),
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08002663 .slab_flags = SLAB_TYPESAFE_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002664 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002665 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002666 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002667 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002668#ifdef CONFIG_COMPAT
2669 .compat_setsockopt = compat_tcp_setsockopt,
2670 .compat_getsockopt = compat_tcp_getsockopt,
2671#endif
Lorenzo Colittic1e64e22015-12-16 12:30:05 +09002672 .diag_destroy = tcp_abort,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002674EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675
Denis V. Lunev046ee902008-04-03 14:31:33 -07002676static void __net_exit tcp_sk_exit(struct net *net)
2677{
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002678 int cpu;
2679
Dust Lib506bc92019-04-01 16:04:53 +08002680 if (net->ipv4.tcp_congestion_control)
Martin KaFai Lau0baf26b2020-01-08 16:35:08 -08002681 bpf_module_put(net->ipv4.tcp_congestion_control,
2682 net->ipv4.tcp_congestion_control->owner);
Stephen Hemminger6670e152017-11-14 08:25:49 -08002683
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002684 for_each_possible_cpu(cpu)
2685 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2686 free_percpu(net->ipv4.tcp_sk);
2687}
2688
2689static int __net_init tcp_sk_init(struct net *net)
2690{
Haishuang Yanfee83d02016-12-28 17:52:33 +08002691 int res, cpu, cnt;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002692
2693 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2694 if (!net->ipv4.tcp_sk)
2695 return -ENOMEM;
2696
2697 for_each_possible_cpu(cpu) {
2698 struct sock *sk;
2699
2700 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2701 IPPROTO_TCP, net);
2702 if (res)
2703 goto fail;
Eric Dumazeta9d65322016-04-01 08:52:21 -07002704 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
Eric Dumazet431280e2018-08-22 13:30:45 -07002705
2706 /* Please enforce IP_DF and IPID==0 for RST and
2707 * ACK sent in SYN-RECV and TIME-WAIT state.
2708 */
2709 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
2710
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002711 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2712 }
Daniel Borkmann49213552015-05-19 21:04:22 +02002713
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002714 net->ipv4.sysctl_tcp_ecn = 2;
Daniel Borkmann49213552015-05-19 21:04:22 +02002715 net->ipv4.sysctl_tcp_ecn_fallback = 1;
2716
Fan Dub0f9ca52015-02-10 09:53:16 +08002717 net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
Eric Dumazet5f3e2bf002019-06-06 09:15:31 -07002718 net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
Fan Du6b58e0a2015-03-06 11:18:23 +08002719 net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
Fan Du05cbc0d2015-03-06 11:18:24 +08002720 net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
Josh Huntc04b79b2019-08-07 19:52:29 -04002721 net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002722
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002723 net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
Nikolay Borisov9bd68612016-01-07 16:38:44 +02002724 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
Nikolay Borisovb840d152016-01-07 16:38:45 +02002725 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
Nikolay Borisov13b287e2016-01-07 16:38:43 +02002726
Nikolay Borisov6fa25162016-02-03 09:46:49 +02002727 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
Nikolay Borisov7c083ec2016-02-03 09:46:50 +02002728 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
David S. Miller0aca7372016-02-08 04:24:33 -05002729 net->ipv4.sysctl_tcp_syncookies = 1;
Nikolay Borisov1043e252016-02-03 09:46:52 +02002730 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
Nikolay Borisovae5c3f42016-02-03 09:46:53 +02002731 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
Nikolay Borisovc6214a92016-02-03 09:46:54 +02002732 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
Nikolay Borisovc402d9b2016-02-03 09:46:55 +02002733 net->ipv4.sysctl_tcp_orphan_retries = 0;
Nikolay Borisov1e579ca2016-02-03 09:46:56 +02002734 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
Nikolay Borisov4979f2d2016-02-03 09:46:57 +02002735 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
Maciej Żenczykowski79e9fed2018-06-03 10:41:17 -07002736 net->ipv4.sysctl_tcp_tw_reuse = 2;
Kevin(Yudong) Yang65e6d902019-12-09 14:19:59 -05002737 net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
Nikolay Borisov12ed8242016-02-03 09:46:51 +02002738
Haishuang Yanfee83d02016-12-28 17:52:33 +08002739 cnt = tcp_hashinfo.ehash_mask + 1;
Yafang Shao743e4812018-09-01 20:21:05 +08002740 net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
Haishuang Yan1946e672016-12-28 17:52:32 +08002741 net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
2742
Eric Dumazet623d0c22019-10-30 10:05:46 -07002743 net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
Eric Dumazetf9301032017-06-07 10:34:37 -07002744 net->ipv4.sysctl_tcp_sack = 1;
Eric Dumazet9bb37ef2017-06-07 10:34:38 -07002745 net->ipv4.sysctl_tcp_window_scaling = 1;
Eric Dumazet5d2ed052017-06-07 10:34:39 -07002746 net->ipv4.sysctl_tcp_timestamps = 1;
Eric Dumazet2ae21cf2017-10-26 21:54:56 -07002747 net->ipv4.sysctl_tcp_early_retrans = 3;
Eric Dumazete20223f2017-10-26 21:54:57 -07002748 net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
Eric Dumazetb510f0d2017-10-26 21:54:59 -07002749 net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
Eric Dumazete0a1e5b2017-10-26 21:55:00 -07002750 net->ipv4.sysctl_tcp_retrans_collapse = 1;
Eric Dumazetc6e21802017-10-26 21:55:06 -07002751 net->ipv4.sysctl_tcp_max_reordering = 300;
Eric Dumazet6496f6b2017-10-26 21:55:07 -07002752 net->ipv4.sysctl_tcp_dsack = 1;
Eric Dumazet0c126542017-10-26 21:55:08 -07002753 net->ipv4.sysctl_tcp_app_win = 31;
Eric Dumazet94f08932017-10-26 21:55:09 -07002754 net->ipv4.sysctl_tcp_adv_win_scale = 1;
Eric Dumazetaf9b69a2017-10-26 21:55:10 -07002755 net->ipv4.sysctl_tcp_frto = 2;
Eric Dumazet4540c0c2017-10-27 07:47:22 -07002756 net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
Eric Dumazetd06a9902017-10-27 07:47:23 -07002757 /* This limits the percentage of the congestion window which we
2758 * will allow a single TSO frame to consume. Building TSO frames
2759 * which are too large can cause TCP streams to be bursty.
2760 */
2761 net->ipv4.sysctl_tcp_tso_win_divisor = 3;
Eric Dumazetc73e5802018-11-11 07:34:28 -08002762 /* Default TSQ limit of 16 TSO segments */
2763 net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
Eric Dumazetb530b682017-10-27 07:47:26 -07002764 /* rfc5961 challenge ack rate limiting */
2765 net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
Eric Dumazet26e95962017-10-27 07:47:27 -07002766 net->ipv4.sysctl_tcp_min_tso_segs = 2;
Eric Dumazetbd239702017-10-27 07:47:28 -07002767 net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
Eric Dumazet790f00e2017-10-27 07:47:29 -07002768 net->ipv4.sysctl_tcp_autocorking = 1;
Eric Dumazet4170ba62017-10-27 07:47:30 -07002769 net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
Eric Dumazet23a7102a2017-10-27 07:47:31 -07002770 net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
Eric Dumazetc26e91f2017-10-27 07:47:32 -07002771 net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
Eric Dumazet356d1832017-11-07 00:29:28 -08002772 if (net != &init_net) {
2773 memcpy(net->ipv4.sysctl_tcp_rmem,
2774 init_net.ipv4.sysctl_tcp_rmem,
2775 sizeof(init_net.ipv4.sysctl_tcp_rmem));
2776 memcpy(net->ipv4.sysctl_tcp_wmem,
2777 init_net.ipv4.sysctl_tcp_wmem,
2778 sizeof(init_net.ipv4.sysctl_tcp_wmem));
2779 }
Eric Dumazet6d82aa22018-05-17 14:47:28 -07002780 net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
Eric Dumazet9c21d2f2018-05-17 14:47:29 -07002781 net->ipv4.sysctl_tcp_comp_sack_nr = 44;
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002782 net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
Haishuang Yan43713842017-09-27 11:35:42 +08002783 spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
Haishuang Yan3733be12017-09-27 11:35:43 +08002784 net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
2785 atomic_set(&net->ipv4.tfo_active_disable_times, 0);
Haishuang Yane1cfcbe2017-09-27 11:35:40 +08002786
Stephen Hemminger6670e152017-11-14 08:25:49 -08002787 /* Reno is always built in */
2788 if (!net_eq(net, &init_net) &&
Martin KaFai Lau0baf26b2020-01-08 16:35:08 -08002789 bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
2790 init_net.ipv4.tcp_congestion_control->owner))
Stephen Hemminger6670e152017-11-14 08:25:49 -08002791 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
2792 else
2793 net->ipv4.tcp_congestion_control = &tcp_reno;
2794
Daniel Borkmann49213552015-05-19 21:04:22 +02002795 return 0;
Eric Dumazetbdbbb852015-01-29 21:35:05 -08002796fail:
2797 tcp_sk_exit(net);
2798
2799 return res;
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002800}
2801
2802static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2803{
Haishuang Yan43713842017-09-27 11:35:42 +08002804 struct net *net;
2805
Haishuang Yan1946e672016-12-28 17:52:32 +08002806 inet_twsk_purge(&tcp_hashinfo, AF_INET);
Haishuang Yan43713842017-09-27 11:35:42 +08002807
2808 list_for_each_entry(net, net_exit_list, exit_list)
2809 tcp_fastopen_ctx_destroy(net);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002810}
2811
2812static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002813 .init = tcp_sk_init,
2814 .exit = tcp_sk_exit,
2815 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002816};
2817
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002818void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819{
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002820 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002821 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002822}