blob: dadf802724131d199117bd1dbf7c6ee0b12a2079 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions
11 *
12 *
13 * code split from:
14 * linux/ipv4/tcp.c
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
17 *
18 * See tcp.c for author information
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26/*
27 * Changes:
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070039 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080042 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
47 * coma.
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
53 */
54
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64
65#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070066#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030068#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/ipv6.h>
70#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080071#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070073#include <net/netdma.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
75#include <linux/inet.h>
76#include <linux/ipv6.h>
77#include <linux/stddef.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080081#include <linux/crypto.h>
82#include <linux/scatterlist.h>
83
Brian Haleyab32ea52006-09-22 14:15:41 -070084int sysctl_tcp_tw_reuse __read_mostly;
85int sysctl_tcp_low_latency __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87/* Check TCP sequence numbers in ICMP packets. */
88#define ICMP_MIN_LENGTH 8
89
90/* Socket used for sending RSTs */
91static struct socket *tcp_socket;
92
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -080093void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080095#ifdef CONFIG_TCP_MD5SIG
96static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr);
97static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
98 __be32 saddr, __be32 daddr, struct tcphdr *th,
99 int protocol, int tcplen);
100#endif
101
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700102struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
Ingo Molnare4d91912006-07-03 00:24:34 -0700103 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700104 .lhash_users = ATOMIC_INIT(0),
105 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106};
107
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700108static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
109{
Arnaldo Carvalho de Melo971af182005-12-13 23:14:47 -0800110 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
111 inet_csk_bind_conflict);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700112}
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114static void tcp_v4_hash(struct sock *sk)
115{
Arnaldo Carvalho de Melo81849d12005-08-09 20:08:50 -0700116 inet_hash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117}
118
119void tcp_unhash(struct sock *sk)
120{
Arnaldo Carvalho de Melo81849d12005-08-09 20:08:50 -0700121 inet_unhash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122}
123
Gerrit Renkera94f7232006-11-10 14:06:49 -0800124static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125{
126 return secure_tcp_sequence_number(skb->nh.iph->daddr,
127 skb->nh.iph->saddr,
128 skb->h.th->dest,
129 skb->h.th->source);
130}
131
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800132int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
133{
134 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
135 struct tcp_sock *tp = tcp_sk(sk);
136
137 /* With PAWS, it is safe from the viewpoint
138 of data integrity. Even without PAWS it is safe provided sequence
139 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
140
141 Actually, the idea is close to VJ's one, only timestamp cache is
142 held not per host, but per port pair and TW bucket is used as state
143 holder.
144
145 If TW bucket has been already destroyed we fall back to VJ's scheme
146 and use initial timestamp retrieved from peer table.
147 */
148 if (tcptw->tw_ts_recent_stamp &&
149 (twp == NULL || (sysctl_tcp_tw_reuse &&
150 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
151 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
152 if (tp->write_seq == 0)
153 tp->write_seq = 1;
154 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
155 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
156 sock_hold(sktw);
157 return 1;
158 }
159
160 return 0;
161}
162
163EXPORT_SYMBOL_GPL(tcp_twsk_unique);
164
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165/* This will initiate an outgoing connection. */
166int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
167{
168 struct inet_sock *inet = inet_sk(sk);
169 struct tcp_sock *tp = tcp_sk(sk);
170 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
171 struct rtable *rt;
Al Virobada8ad2006-09-26 21:27:15 -0700172 __be32 daddr, nexthop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 int tmp;
174 int err;
175
176 if (addr_len < sizeof(struct sockaddr_in))
177 return -EINVAL;
178
179 if (usin->sin_family != AF_INET)
180 return -EAFNOSUPPORT;
181
182 nexthop = daddr = usin->sin_addr.s_addr;
183 if (inet->opt && inet->opt->srr) {
184 if (!daddr)
185 return -EINVAL;
186 nexthop = inet->opt->faddr;
187 }
188
189 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
190 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
191 IPPROTO_TCP,
192 inet->sport, usin->sin_port, sk);
193 if (tmp < 0)
194 return tmp;
195
196 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
197 ip_rt_put(rt);
198 return -ENETUNREACH;
199 }
200
201 if (!inet->opt || !inet->opt->srr)
202 daddr = rt->rt_dst;
203
204 if (!inet->saddr)
205 inet->saddr = rt->rt_src;
206 inet->rcv_saddr = inet->saddr;
207
208 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
209 /* Reset inherited state */
210 tp->rx_opt.ts_recent = 0;
211 tp->rx_opt.ts_recent_stamp = 0;
212 tp->write_seq = 0;
213 }
214
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700215 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
217 struct inet_peer *peer = rt_get_peer(rt);
218
219 /* VJ's idea. We save last timestamp seen from
220 * the destination in peer table, when entering state TIME-WAIT
221 * and initialize rx_opt.ts_recent from it, when trying new connection.
222 */
223
224 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
225 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
226 tp->rx_opt.ts_recent = peer->tcp_ts;
227 }
228 }
229
230 inet->dport = usin->sin_port;
231 inet->daddr = daddr;
232
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800233 inet_csk(sk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 if (inet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800235 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236
237 tp->rx_opt.mss_clamp = 536;
238
239 /* Socket identity is still unknown (sport may be zero).
240 * However we set state to SYN-SENT and not releasing socket
241 * lock select source port, enter ourselves into the hash tables and
242 * complete initialization after this.
243 */
244 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800245 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (err)
247 goto failure;
248
Patrick McHardy5d39a792006-01-31 17:35:35 -0800249 err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 if (err)
251 goto failure;
252
253 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700254 sk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -0700255 sk_setup_caps(sk, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257 if (!tp->write_seq)
258 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
259 inet->daddr,
260 inet->sport,
261 usin->sin_port);
262
263 inet->id = tp->write_seq ^ jiffies;
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
273 /* This unhashes the socket and releases the local port, if necessary. */
274 tcp_set_state(sk, TCP_CLOSE);
275 ip_rt_put(rt);
276 sk->sk_route_caps = 0;
277 inet->dport = 0;
278 return err;
279}
280
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281/*
282 * This routine does path mtu discovery as defined in RFC1191.
283 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800284static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285{
286 struct dst_entry *dst;
287 struct inet_sock *inet = inet_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
289 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
290 * send out by Linux are always <576bytes so they should go through
291 * unfragmented).
292 */
293 if (sk->sk_state == TCP_LISTEN)
294 return;
295
296 /* We don't check in the destentry if pmtu discovery is forbidden
297 * on this route. We just assume that no packet_to_big packets
298 * are send back when pmtu discovery is not active.
299 * There is a small race when the user changes this flag in the
300 * route, but I think that's acceptable.
301 */
302 if ((dst = __sk_dst_check(sk, 0)) == NULL)
303 return;
304
305 dst->ops->update_pmtu(dst, mtu);
306
307 /* Something is about to be wrong... Remember soft error
308 * for the case, if this connection will not able to recover.
309 */
310 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
311 sk->sk_err_soft = EMSGSIZE;
312
313 mtu = dst_mtu(dst);
314
315 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800316 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 tcp_sync_mss(sk, mtu);
318
319 /* Resend the TCP packet because it's
320 * clear that the old packet has been
321 * dropped. This is the new "fast" path mtu
322 * discovery.
323 */
324 tcp_simple_retransmit(sk);
325 } /* else let the usual retransmit timer handle it */
326}
327
328/*
329 * This routine is called by the ICMP module when it gets some
330 * sort of error condition. If err < 0 then the socket should
331 * be closed and the error returned to the user. If err > 0
332 * it's just the icmp type << 8 | icmp code. After adjustment
333 * header points to the first 8 bytes of the tcp header. We need
334 * to find the appropriate port.
335 *
336 * The locking strategy used here is very "optimistic". When
337 * someone else accesses the socket the ICMP is just dropped
338 * and for some paths there is no check at all.
339 * A more general error queue to queue errors for later handling
340 * is probably better.
341 *
342 */
343
344void tcp_v4_err(struct sk_buff *skb, u32 info)
345{
346 struct iphdr *iph = (struct iphdr *)skb->data;
347 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
348 struct tcp_sock *tp;
349 struct inet_sock *inet;
350 int type = skb->h.icmph->type;
351 int code = skb->h.icmph->code;
352 struct sock *sk;
353 __u32 seq;
354 int err;
355
356 if (skb->len < (iph->ihl << 2) + 8) {
357 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
358 return;
359 }
360
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -0700361 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700362 th->source, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (!sk) {
364 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
365 return;
366 }
367 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700368 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return;
370 }
371
372 bh_lock_sock(sk);
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
375 */
376 if (sock_owned_by_user(sk))
377 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
378
379 if (sk->sk_state == TCP_CLOSE)
380 goto out;
381
382 tp = tcp_sk(sk);
383 seq = ntohl(th->seq);
384 if (sk->sk_state != TCP_LISTEN &&
385 !between(seq, tp->snd_una, tp->snd_nxt)) {
Eric Dumazet06ca7192006-10-20 00:22:25 -0700386 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 goto out;
388 }
389
390 switch (type) {
391 case ICMP_SOURCE_QUENCH:
392 /* Just silently ignore these. */
393 goto out;
394 case ICMP_PARAMETERPROB:
395 err = EPROTO;
396 break;
397 case ICMP_DEST_UNREACH:
398 if (code > NR_ICMP_UNREACH)
399 goto out;
400
401 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
402 if (!sock_owned_by_user(sk))
403 do_pmtu_discovery(sk, iph, info);
404 goto out;
405 }
406
407 err = icmp_err_convert[code].errno;
408 break;
409 case ICMP_TIME_EXCEEDED:
410 err = EHOSTUNREACH;
411 break;
412 default:
413 goto out;
414 }
415
416 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700417 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 case TCP_LISTEN:
419 if (sock_owned_by_user(sk))
420 goto out;
421
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700422 req = inet_csk_search_req(sk, &prev, th->dest,
423 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 if (!req)
425 goto out;
426
427 /* ICMPs are not backlogged, hence we cannot get
428 an established socket here.
429 */
430 BUG_TRAP(!req->sk);
431
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700432 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
434 goto out;
435 }
436
437 /*
438 * Still in SYN_RECV, just remove it silently.
439 * There is no good way to pass the error to the newly
440 * created socket, and POSIX does not want network
441 * errors returned from accept().
442 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700443 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 goto out;
445
446 case TCP_SYN_SENT:
447 case TCP_SYN_RECV: /* Cannot happen.
448 It can f.e. if SYNs crossed.
449 */
450 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 sk->sk_err = err;
452
453 sk->sk_error_report(sk);
454
455 tcp_done(sk);
456 } else {
457 sk->sk_err_soft = err;
458 }
459 goto out;
460 }
461
462 /* If we've already connected we will keep trying
463 * until we time out, or the user gives up.
464 *
465 * rfc1122 4.2.3.9 allows to consider as hard errors
466 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
467 * but it is obsoleted by pmtu discovery).
468 *
469 * Note, that in modern internet, where routing is unreliable
470 * and in each dark corner broken firewalls sit, sending random
471 * errors ordered by their masters even this two messages finally lose
472 * their original sense (even Linux sends invalid PORT_UNREACHs)
473 *
474 * Now we are in compliance with RFCs.
475 * --ANK (980905)
476 */
477
478 inet = inet_sk(sk);
479 if (!sock_owned_by_user(sk) && inet->recverr) {
480 sk->sk_err = err;
481 sk->sk_error_report(sk);
482 } else { /* Only an error on timeout */
483 sk->sk_err_soft = err;
484 }
485
486out:
487 bh_unlock_sock(sk);
488 sock_put(sk);
489}
490
491/* This routine computes an IPv4 TCP checksum. */
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800492void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493{
494 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800495 struct tcphdr *th = skb->h.th;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
Patrick McHardy84fa7932006-08-29 16:44:56 -0700497 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
499 skb->csum = offsetof(struct tcphdr, check);
500 } else {
501 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
502 csum_partial((char *)th,
503 th->doff << 2,
504 skb->csum));
505 }
506}
507
Herbert Xua430a432006-07-08 13:34:56 -0700508int tcp_v4_gso_send_check(struct sk_buff *skb)
509{
510 struct iphdr *iph;
511 struct tcphdr *th;
512
513 if (!pskb_may_pull(skb, sizeof(*th)))
514 return -EINVAL;
515
516 iph = skb->nh.iph;
517 th = skb->h.th;
518
519 th->check = 0;
520 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
521 skb->csum = offsetof(struct tcphdr, check);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700522 skb->ip_summed = CHECKSUM_PARTIAL;
Herbert Xua430a432006-07-08 13:34:56 -0700523 return 0;
524}
525
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526/*
527 * This routine will send an RST to the other tcp.
528 *
529 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
530 * for reset.
531 * Answer: if a packet caused RST, it is not for a socket
532 * existing in our system, if it is matched to a socket,
533 * it is just duplicate segment or bug in other side's TCP.
534 * So that we build reply only basing on parameters
535 * arrived with segment.
536 * Exception: precedence violation. We do not implement it in any case.
537 */
538
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800539static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
541 struct tcphdr *th = skb->h.th;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800542 struct {
543 struct tcphdr th;
544#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800545 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800546#endif
547 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800549#ifdef CONFIG_TCP_MD5SIG
550 struct tcp_md5sig_key *key;
551#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552
553 /* Never send a reset in response to a reset. */
554 if (th->rst)
555 return;
556
557 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
558 return;
559
560 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800561 memset(&rep, 0, sizeof(rep));
562 rep.th.dest = th->source;
563 rep.th.source = th->dest;
564 rep.th.doff = sizeof(struct tcphdr) / 4;
565 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800568 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800570 rep.th.ack = 1;
571 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
572 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 }
574
575 memset(&arg, 0, sizeof arg);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800576 arg.iov[0].iov_base = (unsigned char *)&rep;
577 arg.iov[0].iov_len = sizeof(rep.th);
578
579#ifdef CONFIG_TCP_MD5SIG
580 key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
581 if (key) {
582 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
583 (TCPOPT_NOP << 16) |
584 (TCPOPT_MD5SIG << 8) |
585 TCPOLEN_MD5SIG);
586 /* Update length and the length the header thinks exists */
587 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
588 rep.th.doff = arg.iov[0].iov_len / 4;
589
590 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
591 key,
592 skb->nh.iph->daddr,
593 skb->nh.iph->saddr,
594 &rep.th, IPPROTO_TCP,
595 arg.iov[0].iov_len);
596 }
597#endif
598
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
600 skb->nh.iph->saddr, /*XXX*/
601 sizeof(struct tcphdr), IPPROTO_TCP, 0);
602 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
603
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800604 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
607 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
608}
609
610/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
611 outside socket context is ugly, certainly. What can I do?
612 */
613
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800614static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
615 struct sk_buff *skb, u32 seq, u32 ack,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 u32 win, u32 ts)
617{
618 struct tcphdr *th = skb->h.th;
619 struct {
620 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800621 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800622#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800623 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800624#endif
625 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 } rep;
627 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800628#ifdef CONFIG_TCP_MD5SIG
629 struct tcp_md5sig_key *key;
630 struct tcp_md5sig_key tw_key;
631#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633 memset(&rep.th, 0, sizeof(struct tcphdr));
634 memset(&arg, 0, sizeof arg);
635
636 arg.iov[0].iov_base = (unsigned char *)&rep;
637 arg.iov[0].iov_len = sizeof(rep.th);
638 if (ts) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800639 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
640 (TCPOPT_TIMESTAMP << 8) |
641 TCPOLEN_TIMESTAMP);
642 rep.opt[1] = htonl(tcp_time_stamp);
643 rep.opt[2] = htonl(ts);
644 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 }
646
647 /* Swap the send and the receive. */
648 rep.th.dest = th->source;
649 rep.th.source = th->dest;
650 rep.th.doff = arg.iov[0].iov_len / 4;
651 rep.th.seq = htonl(seq);
652 rep.th.ack_seq = htonl(ack);
653 rep.th.ack = 1;
654 rep.th.window = htons(win);
655
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800656#ifdef CONFIG_TCP_MD5SIG
657 /*
658 * The SKB holds an imcoming packet, but may not have a valid ->sk
659 * pointer. This is especially the case when we're dealing with a
660 * TIME_WAIT ack, because the sk structure is long gone, and only
661 * the tcp_timewait_sock remains. So the md5 key is stashed in that
662 * structure, and we use it in preference. I believe that (twsk ||
663 * skb->sk) holds true, but we program defensively.
664 */
665 if (!twsk && skb->sk) {
666 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
667 } else if (twsk && twsk->tw_md5_keylen) {
668 tw_key.key = twsk->tw_md5_key;
669 tw_key.keylen = twsk->tw_md5_keylen;
670 key = &tw_key;
671 } else {
672 key = NULL;
673 }
674
675 if (key) {
676 int offset = (ts) ? 3 : 0;
677
678 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
679 (TCPOPT_NOP << 16) |
680 (TCPOPT_MD5SIG << 8) |
681 TCPOLEN_MD5SIG);
682 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 rep.th.doff = arg.iov[0].iov_len/4;
684
685 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
686 key,
687 skb->nh.iph->daddr,
688 skb->nh.iph->saddr,
689 &rep.th, IPPROTO_TCP,
690 arg.iov[0].iov_len);
691 }
692#endif
693
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
695 skb->nh.iph->saddr, /*XXX*/
696 arg.iov[0].iov_len, IPPROTO_TCP, 0);
697 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
698
699 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
700
701 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
702}
703
704static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
705{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700706 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800707 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800709 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700710 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700712 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713}
714
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700715static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800717 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
718 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 req->ts_recent);
720}
721
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722/*
723 * Send a SYN-ACK after having received an ACK.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700724 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 * socket.
726 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700727static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 struct dst_entry *dst)
729{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700730 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 int err = -1;
732 struct sk_buff * skb;
733
734 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700735 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 goto out;
737
738 skb = tcp_make_synack(sk, dst, req);
739
740 if (skb) {
741 struct tcphdr *th = skb->h.th;
742
743 th->check = tcp_v4_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700744 ireq->loc_addr,
745 ireq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 csum_partial((char *)th, skb->len,
747 skb->csum));
748
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700749 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
750 ireq->rmt_addr,
751 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200752 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 }
754
755out:
756 dst_release(dst);
757 return err;
758}
759
760/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700761 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700763static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764{
Jesper Juhla51482b2005-11-08 09:41:34 -0800765 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766}
767
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200768#ifdef CONFIG_SYN_COOKIES
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800769static void syn_flood_warning(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
771 static unsigned long warntime;
772
773 if (time_after(jiffies, (warntime + HZ * 60))) {
774 warntime = jiffies;
775 printk(KERN_INFO
776 "possible SYN flooding on port %d. Sending cookies.\n",
777 ntohs(skb->h.th->dest));
778 }
779}
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200780#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781
782/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700783 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800785static struct ip_options *tcp_v4_save_options(struct sock *sk,
786 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787{
788 struct ip_options *opt = &(IPCB(skb)->opt);
789 struct ip_options *dopt = NULL;
790
791 if (opt && opt->optlen) {
792 int opt_size = optlength(opt);
793 dopt = kmalloc(opt_size, GFP_ATOMIC);
794 if (dopt) {
795 if (ip_options_echo(dopt, skb)) {
796 kfree(dopt);
797 dopt = NULL;
798 }
799 }
800 }
801 return dopt;
802}
803
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800804#ifdef CONFIG_TCP_MD5SIG
805/*
806 * RFC2385 MD5 checksumming requires a mapping of
807 * IP address->MD5 Key.
808 * We need to maintain these in the sk structure.
809 */
810
811/* Find the Key structure for an address. */
812static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
813{
814 struct tcp_sock *tp = tcp_sk(sk);
815 int i;
816
817 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
818 return NULL;
819 for (i = 0; i < tp->md5sig_info->entries4; i++) {
820 if (tp->md5sig_info->keys4[i].addr == addr)
821 return (struct tcp_md5sig_key *)&tp->md5sig_info->keys4[i];
822 }
823 return NULL;
824}
825
826struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
827 struct sock *addr_sk)
828{
829 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
830}
831
832EXPORT_SYMBOL(tcp_v4_md5_lookup);
833
834struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
835 struct request_sock *req)
836{
837 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
838}
839
840/* This can be called on a newly created socket, from other files */
841int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
842 u8 *newkey, u8 newkeylen)
843{
844 /* Add Key to the list */
845 struct tcp4_md5sig_key *key;
846 struct tcp_sock *tp = tcp_sk(sk);
847 struct tcp4_md5sig_key *keys;
848
849 key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
850 if (key) {
851 /* Pre-existing entry - just update that one. */
852 kfree (key->key);
853 key->key = newkey;
854 key->keylen = newkeylen;
855 } else {
856 if (!tp->md5sig_info) {
857 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
858 if (!tp->md5sig_info) {
859 kfree(newkey);
860 return -ENOMEM;
861 }
862 }
863 if (tcp_alloc_md5sig_pool() == NULL) {
864 kfree(newkey);
865 return -ENOMEM;
866 }
867 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
868 keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
869 (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
870 if (!keys) {
871 kfree(newkey);
872 tcp_free_md5sig_pool();
873 return -ENOMEM;
874 }
875
876 if (tp->md5sig_info->entries4)
877 memcpy(keys, tp->md5sig_info->keys4,
878 (sizeof (struct tcp4_md5sig_key) *
879 tp->md5sig_info->entries4));
880
881 /* Free old key list, and reference new one */
882 if (tp->md5sig_info->keys4)
883 kfree(tp->md5sig_info->keys4);
884 tp->md5sig_info->keys4 = keys;
885 tp->md5sig_info->alloced4++;
886 }
887 tp->md5sig_info->entries4++;
888 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
889 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
890 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
891 }
892 return 0;
893}
894
895EXPORT_SYMBOL(tcp_v4_md5_do_add);
896
897static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
898 u8 *newkey, u8 newkeylen)
899{
900 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
901 newkey, newkeylen);
902}
903
904int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
905{
906 struct tcp_sock *tp = tcp_sk(sk);
907 int i;
908
909 for (i = 0; i < tp->md5sig_info->entries4; i++) {
910 if (tp->md5sig_info->keys4[i].addr == addr) {
911 /* Free the key */
912 kfree(tp->md5sig_info->keys4[i].key);
913 tp->md5sig_info->entries4--;
914
915 if (tp->md5sig_info->entries4 == 0) {
916 kfree(tp->md5sig_info->keys4);
917 tp->md5sig_info->keys4 = NULL;
918 } else {
919 /* Need to do some manipulation */
920 if (tp->md5sig_info->entries4 != i)
921 memcpy(&tp->md5sig_info->keys4[i],
922 &tp->md5sig_info->keys4[i+1],
923 (tp->md5sig_info->entries4 - i)
924 * sizeof (struct tcp4_md5sig_key));
925 }
926 tcp_free_md5sig_pool();
927 return 0;
928 }
929 }
930 return -ENOENT;
931}
932
933EXPORT_SYMBOL(tcp_v4_md5_do_del);
934
935static void tcp_v4_clear_md5_list (struct sock *sk)
936{
937 struct tcp_sock *tp = tcp_sk(sk);
938
939 /* Free each key, then the set of key keys,
940 * the crypto element, and then decrement our
941 * hold on the last resort crypto.
942 */
943 if (tp->md5sig_info->entries4) {
944 int i;
945 for (i = 0; i < tp->md5sig_info->entries4; i++)
946 kfree(tp->md5sig_info->keys4[i].key);
947 tp->md5sig_info->entries4 = 0;
948 tcp_free_md5sig_pool();
949 }
950 if (tp->md5sig_info->keys4) {
951 kfree(tp->md5sig_info->keys4);
952 tp->md5sig_info->keys4 = NULL;
953 tp->md5sig_info->alloced4 = 0;
954 }
955}
956
957static int tcp_v4_parse_md5_keys (struct sock *sk, char __user *optval,
958 int optlen)
959{
960 struct tcp_md5sig cmd;
961 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
962 u8 *newkey;
963
964 if (optlen < sizeof(cmd))
965 return -EINVAL;
966
967 if (copy_from_user (&cmd, optval, sizeof(cmd)))
968 return -EFAULT;
969
970 if (sin->sin_family != AF_INET)
971 return -EINVAL;
972
973 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
974 if (!tcp_sk(sk)->md5sig_info)
975 return -ENOENT;
976 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
977 }
978
979 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
980 return -EINVAL;
981
982 if (!tcp_sk(sk)->md5sig_info) {
983 struct tcp_sock *tp = tcp_sk(sk);
984 struct tcp_md5sig_info *p;
985
986 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
987 if (!p)
988 return -EINVAL;
989
990 tp->md5sig_info = p;
991
992 }
993
994 newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
995 if (!newkey)
996 return -ENOMEM;
997 memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
998 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
999 newkey, cmd.tcpm_keylen);
1000}
1001
1002static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1003 __be32 saddr, __be32 daddr,
1004 struct tcphdr *th, int protocol,
1005 int tcplen)
1006{
1007 struct scatterlist sg[4];
1008 __u16 data_len;
1009 int block = 0;
1010#ifdef CONFIG_TCP_MD5SIG_DEBUG
1011 int i;
1012#endif
1013 __u16 old_checksum;
1014 struct tcp_md5sig_pool *hp;
1015 struct tcp4_pseudohdr *bp;
1016 struct hash_desc *desc;
1017 int err;
1018 unsigned int nbytes = 0;
1019
1020 /*
1021 * Okay, so RFC2385 is turned on for this connection,
1022 * so we need to generate the MD5 hash for the packet now.
1023 */
1024
1025 hp = tcp_get_md5sig_pool();
1026 if (!hp)
1027 goto clear_hash_noput;
1028
1029 bp = &hp->md5_blk.ip4;
1030 desc = &hp->md5_desc;
1031
1032 /*
1033 * 1. the TCP pseudo-header (in the order: source IP address,
1034 * destination IP address, zero-padded protocol number, and
1035 * segment length)
1036 */
1037 bp->saddr = saddr;
1038 bp->daddr = daddr;
1039 bp->pad = 0;
1040 bp->protocol = protocol;
1041 bp->len = htons(tcplen);
1042 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1043 nbytes += sizeof(*bp);
1044
1045#ifdef CONFIG_TCP_MD5SIG_DEBUG
1046 printk("Calcuating hash for: ");
1047 for (i = 0; i < sizeof (*bp); i++)
1048 printk ("%02x ", (unsigned int)((unsigned char *)bp)[i]);
1049 printk(" ");
1050#endif
1051
1052 /* 2. the TCP header, excluding options, and assuming a
1053 * checksum of zero/
1054 */
1055 old_checksum = th->check;
1056 th->check = 0;
1057 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1058 nbytes += sizeof(struct tcphdr);
1059#ifdef CONFIG_TCP_MD5SIG_DEBUG
1060 for (i = 0; i < sizeof (struct tcphdr); i++)
1061 printk (" %02x", (unsigned int)((unsigned char *)th)[i]);
1062#endif
1063 /* 3. the TCP segment data (if any) */
1064 data_len = tcplen - (th->doff << 2);
1065 if (data_len > 0) {
1066 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1067 sg_set_buf(&sg[block++], data, data_len);
1068 nbytes += data_len;
1069 }
1070
1071 /* 4. an independently-specified key or password, known to both
1072 * TCPs and presumably connection-specific
1073 */
1074 sg_set_buf(&sg[block++], key->key, key->keylen);
1075 nbytes += key->keylen;
1076
1077#ifdef CONFIG_TCP_MD5SIG_DEBUG
1078 printk (" and password: ");
1079 for (i = 0; i < key->keylen; i++)
1080 printk ("%02x ", (unsigned int)key->key[i]);
1081#endif
1082
1083 /* Now store the Hash into the packet */
1084 err = crypto_hash_init(desc);
1085 if (err)
1086 goto clear_hash;
1087 err = crypto_hash_update(desc, sg, nbytes);
1088 if (err)
1089 goto clear_hash;
1090 err = crypto_hash_final(desc, md5_hash);
1091 if (err)
1092 goto clear_hash;
1093
1094 /* Reset header, and free up the crypto */
1095 tcp_put_md5sig_pool();
1096 th->check = old_checksum;
1097
1098out:
1099#ifdef CONFIG_TCP_MD5SIG_DEBUG
1100 printk(" result:");
1101 for (i = 0; i < 16; i++)
1102 printk (" %02x", (unsigned int)(((u8*)md5_hash)[i]));
1103 printk("\n");
1104#endif
1105 return 0;
1106clear_hash:
1107 tcp_put_md5sig_pool();
1108clear_hash_noput:
1109 memset(md5_hash, 0, 16);
1110 goto out;
1111}
1112
1113int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1114 struct sock *sk,
1115 struct dst_entry *dst,
1116 struct request_sock *req,
1117 struct tcphdr *th, int protocol,
1118 int tcplen)
1119{
1120 __be32 saddr, daddr;
1121
1122 if (sk) {
1123 saddr = inet_sk(sk)->saddr;
1124 daddr = inet_sk(sk)->daddr;
1125 } else {
1126 struct rtable *rt = (struct rtable *)dst;
1127 BUG_ON(!rt);
1128 saddr = rt->rt_src;
1129 daddr = rt->rt_dst;
1130 }
1131 return tcp_v4_do_calc_md5_hash(md5_hash, key,
1132 saddr, daddr,
1133 th, protocol, tcplen);
1134}
1135
1136EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1137
1138static int tcp_v4_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
1139{
1140 /*
1141 * This gets called for each TCP segment that arrives
1142 * so we want to be efficient.
1143 * We have 3 drop cases:
1144 * o No MD5 hash and one expected.
1145 * o MD5 hash and we're not expecting one.
1146 * o MD5 hash and its wrong.
1147 */
1148 __u8 *hash_location = NULL;
1149 struct tcp_md5sig_key *hash_expected;
1150 struct iphdr *iph = skb->nh.iph;
1151 struct tcphdr *th = skb->h.th;
1152 int length = (th->doff << 2) - sizeof (struct tcphdr);
1153 int genhash;
1154 unsigned char *ptr;
1155 unsigned char newhash[16];
1156
1157 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1158
1159 /*
1160 * If the TCP option length is less than the TCP_MD5SIG
1161 * option length, then we can shortcut
1162 */
1163 if (length < TCPOLEN_MD5SIG) {
1164 if (hash_expected)
1165 return 1;
1166 else
1167 return 0;
1168 }
1169
1170 /* Okay, we can't shortcut - we have to grub through the options */
1171 ptr = (unsigned char *)(th + 1);
1172 while (length > 0) {
1173 int opcode = *ptr++;
1174 int opsize;
1175
1176 switch (opcode) {
1177 case TCPOPT_EOL:
1178 goto done_opts;
1179 case TCPOPT_NOP:
1180 length--;
1181 continue;
1182 default:
1183 opsize = *ptr++;
1184 if (opsize < 2)
1185 goto done_opts;
1186 if (opsize > length)
1187 goto done_opts;
1188
1189 if (opcode == TCPOPT_MD5SIG) {
1190 hash_location = ptr;
1191 goto done_opts;
1192 }
1193 }
1194 ptr += opsize-2;
1195 length -= opsize;
1196 }
1197done_opts:
1198 /* We've parsed the options - do we have a hash? */
1199 if (!hash_expected && !hash_location)
1200 return 0;
1201
1202 if (hash_expected && !hash_location) {
1203 if (net_ratelimit()) {
1204 printk(KERN_INFO "MD5 Hash NOT expected but found "
1205 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1206 NIPQUAD (iph->saddr), ntohs(th->source),
1207 NIPQUAD (iph->daddr), ntohs(th->dest));
1208 }
1209 return 1;
1210 }
1211
1212 if (!hash_expected && hash_location) {
1213 if (net_ratelimit()) {
1214 printk(KERN_INFO "MD5 Hash NOT expected but found "
1215 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1216 NIPQUAD (iph->saddr), ntohs(th->source),
1217 NIPQUAD (iph->daddr), ntohs(th->dest));
1218 }
1219 return 1;
1220 }
1221
1222 /* Okay, so this is hash_expected and hash_location -
1223 * so we need to calculate the checksum.
1224 */
1225 genhash = tcp_v4_do_calc_md5_hash(newhash,
1226 hash_expected,
1227 iph->saddr, iph->daddr,
1228 th, sk->sk_protocol,
1229 skb->len);
1230
1231 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1232 if (net_ratelimit()) {
1233 printk(KERN_INFO "MD5 Hash failed for "
1234 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1235 NIPQUAD (iph->saddr), ntohs(th->source),
1236 NIPQUAD (iph->daddr), ntohs(th->dest),
1237 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1238#ifdef CONFIG_TCP_MD5SIG_DEBUG
1239 do {
1240 int i;
1241 printk("Received: ");
1242 for (i = 0; i < 16; i++)
1243 printk("%02x ", 0xff & (int)hash_location[i]);
1244 printk("\n");
1245 printk("Calculated: ");
1246 for (i = 0; i < 16; i++)
1247 printk("%02x ", 0xff & (int)newhash[i]);
1248 printk("\n");
1249 } while(0);
1250#endif
1251 }
1252 return 1;
1253 }
1254 return 0;
1255}
1256
1257#endif
1258
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001259struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001261 .obj_size = sizeof(struct tcp_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 .rtx_syn_ack = tcp_v4_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001263 .send_ack = tcp_v4_reqsk_send_ack,
1264 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 .send_reset = tcp_v4_send_reset,
1266};
1267
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001268struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1269#ifdef CONFIG_TCP_MD5SIG
1270 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1271#endif
1272};
1273
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001274static struct timewait_sock_ops tcp_timewait_sock_ops = {
1275 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1276 .twsk_unique = tcp_twsk_unique,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001277 .twsk_destructor= tcp_twsk_destructor,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001278};
1279
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1281{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001282 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 struct tcp_options_received tmp_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001284 struct request_sock *req;
Al Viroadaf3452006-09-27 18:27:13 -07001285 __be32 saddr = skb->nh.iph->saddr;
1286 __be32 daddr = skb->nh.iph->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 __u32 isn = TCP_SKB_CB(skb)->when;
1288 struct dst_entry *dst = NULL;
1289#ifdef CONFIG_SYN_COOKIES
1290 int want_cookie = 0;
1291#else
1292#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1293#endif
1294
1295 /* Never answer to SYNs send to broadcast or multicast */
1296 if (((struct rtable *)skb->dst)->rt_flags &
1297 (RTCF_BROADCAST | RTCF_MULTICAST))
1298 goto drop;
1299
1300 /* TW buckets are converted to open requests without
1301 * limitations, they conserve resources and peer is
1302 * evidently real one.
1303 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001304 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305#ifdef CONFIG_SYN_COOKIES
1306 if (sysctl_tcp_syncookies) {
1307 want_cookie = 1;
1308 } else
1309#endif
1310 goto drop;
1311 }
1312
1313 /* Accept backlog is full. If we have already queued enough
1314 * of warm entries in syn queue, drop request. It is better than
1315 * clogging syn queue with openreqs with exponentially increasing
1316 * timeout.
1317 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001318 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 goto drop;
1320
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001321 req = reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 if (!req)
1323 goto drop;
1324
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001325#ifdef CONFIG_TCP_MD5SIG
1326 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1327#endif
1328
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 tcp_clear_options(&tmp_opt);
1330 tmp_opt.mss_clamp = 536;
1331 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1332
1333 tcp_parse_options(skb, &tmp_opt, 0);
1334
1335 if (want_cookie) {
1336 tcp_clear_options(&tmp_opt);
1337 tmp_opt.saw_tstamp = 0;
1338 }
1339
1340 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1341 /* Some OSes (unknown ones, but I see them on web server, which
1342 * contains information interesting only for windows'
1343 * users) do not send their stamp in SYN. It is easy case.
1344 * We simply do not advertise TS support.
1345 */
1346 tmp_opt.saw_tstamp = 0;
1347 tmp_opt.tstamp_ok = 0;
1348 }
1349 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1350
1351 tcp_openreq_init(req, &tmp_opt, skb);
1352
Venkat Yekkirala4237c752006-07-24 23:32:50 -07001353 if (security_inet_conn_request(sk, skb, req))
1354 goto drop_and_free;
1355
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001356 ireq = inet_rsk(req);
1357 ireq->loc_addr = daddr;
1358 ireq->rmt_addr = saddr;
1359 ireq->opt = tcp_v4_save_options(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 if (!want_cookie)
1361 TCP_ECN_create_request(req, skb->h.th);
1362
1363 if (want_cookie) {
1364#ifdef CONFIG_SYN_COOKIES
1365 syn_flood_warning(skb);
1366#endif
1367 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1368 } else if (!isn) {
1369 struct inet_peer *peer = NULL;
1370
1371 /* VJ's idea. We save last timestamp seen
1372 * from the destination in peer table, when entering
1373 * state TIME-WAIT, and check against it before
1374 * accepting new connection request.
1375 *
1376 * If "isn" is not zero, this request hit alive
1377 * timewait bucket, so that all the necessary checks
1378 * are made in the function processing timewait state.
1379 */
1380 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001381 tcp_death_row.sysctl_tw_recycle &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001382 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1384 peer->v4daddr == saddr) {
1385 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1386 (s32)(peer->tcp_ts - req->ts_recent) >
1387 TCP_PAWS_WINDOW) {
1388 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1389 dst_release(dst);
1390 goto drop_and_free;
1391 }
1392 }
1393 /* Kill the following clause, if you dislike this way. */
1394 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001395 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 (sysctl_max_syn_backlog >> 2)) &&
1397 (!peer || !peer->tcp_ts_stamp) &&
1398 (!dst || !dst_metric(dst, RTAX_RTT))) {
1399 /* Without syncookies last quarter of
1400 * backlog is filled with destinations,
1401 * proven to be alive.
1402 * It means that we continue to communicate
1403 * to destinations, already remembered
1404 * to the moment of synflood.
1405 */
Patrick McHardy64ce2072005-08-09 20:50:53 -07001406 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1407 "request from %u.%u.%u.%u/%u\n",
1408 NIPQUAD(saddr),
1409 ntohs(skb->h.th->source));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 dst_release(dst);
1411 goto drop_and_free;
1412 }
1413
Gerrit Renkera94f7232006-11-10 14:06:49 -08001414 isn = tcp_v4_init_sequence(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001416 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418 if (tcp_v4_send_synack(sk, req, dst))
1419 goto drop_and_free;
1420
1421 if (want_cookie) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001422 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 } else {
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -07001424 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 }
1426 return 0;
1427
1428drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001429 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 return 0;
1432}
1433
1434
1435/*
1436 * The three way handshake has completed - we got a valid synack -
1437 * now create the new socket.
1438 */
1439struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001440 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 struct dst_entry *dst)
1442{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001443 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 struct inet_sock *newinet;
1445 struct tcp_sock *newtp;
1446 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001447#ifdef CONFIG_TCP_MD5SIG
1448 struct tcp_md5sig_key *key;
1449#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450
1451 if (sk_acceptq_is_full(sk))
1452 goto exit_overflow;
1453
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001454 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 goto exit;
1456
1457 newsk = tcp_create_openreq_child(sk, req, skb);
1458 if (!newsk)
1459 goto exit;
1460
Herbert Xubcd76112006-06-30 13:36:35 -07001461 newsk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001462 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463
1464 newtp = tcp_sk(newsk);
1465 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001466 ireq = inet_rsk(req);
1467 newinet->daddr = ireq->rmt_addr;
1468 newinet->rcv_saddr = ireq->loc_addr;
1469 newinet->saddr = ireq->loc_addr;
1470 newinet->opt = ireq->opt;
1471 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001472 newinet->mc_index = inet_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 newinet->mc_ttl = skb->nh.iph->ttl;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001474 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 if (newinet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001476 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 newinet->id = newtp->write_seq ^ jiffies;
1478
John Heffner5d424d52006-03-20 17:53:41 -08001479 tcp_mtup_init(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 tcp_sync_mss(newsk, dst_mtu(dst));
1481 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482 tcp_initialize_rcv_mss(newsk);
1483
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001484#ifdef CONFIG_TCP_MD5SIG
1485 /* Copy over the MD5 key from the original socket */
1486 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1487 /*
1488 * We're using one, so create a matching key
1489 * on the newsk structure. If we fail to get
1490 * memory, then we end up not copying the key
1491 * across. Shucks.
1492 */
1493 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1494 if (newkey) {
1495 memcpy(newkey, key->key, key->keylen);
1496 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1497 newkey, key->keylen);
1498 }
1499 }
1500#endif
1501
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001502 __inet_hash(&tcp_hashinfo, newsk, 0);
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001503 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
1505 return newsk;
1506
1507exit_overflow:
1508 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1509exit:
1510 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1511 dst_release(dst);
1512 return NULL;
1513}
1514
1515static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1516{
1517 struct tcphdr *th = skb->h.th;
1518 struct iphdr *iph = skb->nh.iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001520 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001522 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1523 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 if (req)
1525 return tcp_check_req(sk, skb, req, prev);
1526
Herbert Xu8f4910692006-08-09 15:47:12 -07001527 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1528 th->source, skb->nh.iph->daddr,
1529 th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530
1531 if (nsk) {
1532 if (nsk->sk_state != TCP_TIME_WAIT) {
1533 bh_lock_sock(nsk);
1534 return nsk;
1535 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001536 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 return NULL;
1538 }
1539
1540#ifdef CONFIG_SYN_COOKIES
1541 if (!th->rst && !th->syn && th->ack)
1542 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1543#endif
1544 return sk;
1545}
1546
Al Virob51655b2006-11-14 21:40:42 -08001547static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548{
Patrick McHardy84fa7932006-08-29 16:44:56 -07001549 if (skb->ip_summed == CHECKSUM_COMPLETE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001551 skb->nh.iph->daddr, skb->csum)) {
1552 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001554 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001556
1557 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1558 skb->len, IPPROTO_TCP, 0);
1559
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001561 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 }
1563 return 0;
1564}
1565
1566
1567/* The socket must have it's spinlock held when we get
1568 * here.
1569 *
1570 * We have a potential double-lock case here, so even when
1571 * doing backlog processing we use the BH locking scheme.
1572 * This is because we cannot sleep with the original spinlock
1573 * held.
1574 */
1575int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1576{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001577 struct sock *rsk;
1578#ifdef CONFIG_TCP_MD5SIG
1579 /*
1580 * We really want to reject the packet as early as possible
1581 * if:
1582 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1583 * o There is an MD5 option and we're not expecting one
1584 */
1585 if (tcp_v4_inbound_md5_hash (sk, skb))
1586 goto discard;
1587#endif
1588
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1590 TCP_CHECK_TIMER(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001591 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1592 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001594 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 TCP_CHECK_TIMER(sk);
1596 return 0;
1597 }
1598
1599 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1600 goto csum_err;
1601
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1604 if (!nsk)
1605 goto discard;
1606
1607 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001608 if (tcp_child_process(sk, nsk, skb)) {
1609 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001611 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 return 0;
1613 }
1614 }
1615
1616 TCP_CHECK_TIMER(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001617 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1618 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001620 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 TCP_CHECK_TIMER(sk);
1622 return 0;
1623
1624reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001625 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626discard:
1627 kfree_skb(skb);
1628 /* Be careful here. If this function gets more complicated and
1629 * gcc suffers from register pressure on the x86, sk (in %ebx)
1630 * might be destroyed here. This current version compiles correctly,
1631 * but you have been warned.
1632 */
1633 return 0;
1634
1635csum_err:
1636 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1637 goto discard;
1638}
1639
1640/*
1641 * From tcp_input.c
1642 */
1643
1644int tcp_v4_rcv(struct sk_buff *skb)
1645{
1646 struct tcphdr *th;
1647 struct sock *sk;
1648 int ret;
1649
1650 if (skb->pkt_type != PACKET_HOST)
1651 goto discard_it;
1652
1653 /* Count it even if it's bad */
1654 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1655
1656 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1657 goto discard_it;
1658
1659 th = skb->h.th;
1660
1661 if (th->doff < sizeof(struct tcphdr) / 4)
1662 goto bad_packet;
1663 if (!pskb_may_pull(skb, th->doff * 4))
1664 goto discard_it;
1665
1666 /* An explanation is required here, I think.
1667 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001668 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 * So, we defer the checks. */
1670 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
Herbert Xufb286bb2005-11-10 13:01:24 -08001671 tcp_v4_checksum_init(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 goto bad_packet;
1673
1674 th = skb->h.th;
1675 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1676 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1677 skb->len - th->doff * 4);
1678 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1679 TCP_SKB_CB(skb)->when = 0;
1680 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1681 TCP_SKB_CB(skb)->sacked = 0;
1682
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -07001683 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
Herbert Xu8f4910692006-08-09 15:47:12 -07001684 skb->nh.iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001685 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686
1687 if (!sk)
1688 goto no_tcp_socket;
1689
1690process:
1691 if (sk->sk_state == TCP_TIME_WAIT)
1692 goto do_time_wait;
1693
1694 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1695 goto discard_and_relse;
Patrick McHardyb59c2702006-01-06 23:06:10 -08001696 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001698 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 goto discard_and_relse;
1700
1701 skb->dev = NULL;
1702
Ingo Molnarc6366182006-07-03 00:25:13 -07001703 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 ret = 0;
1705 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001706#ifdef CONFIG_NET_DMA
1707 struct tcp_sock *tp = tcp_sk(sk);
1708 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1709 tp->ucopy.dma_chan = get_softnet_dma();
1710 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001712 else
1713#endif
1714 {
1715 if (!tcp_prequeue(sk, skb))
1716 ret = tcp_v4_do_rcv(sk, skb);
1717 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 } else
1719 sk_add_backlog(sk, skb);
1720 bh_unlock_sock(sk);
1721
1722 sock_put(sk);
1723
1724 return ret;
1725
1726no_tcp_socket:
1727 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1728 goto discard_it;
1729
1730 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1731bad_packet:
1732 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1733 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001734 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 }
1736
1737discard_it:
1738 /* Discard frame. */
1739 kfree_skb(skb);
1740 return 0;
1741
1742discard_and_relse:
1743 sock_put(sk);
1744 goto discard_it;
1745
1746do_time_wait:
1747 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001748 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 goto discard_it;
1750 }
1751
1752 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1753 TCP_INC_STATS_BH(TCP_MIB_INERRS);
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001754 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 goto discard_it;
1756 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001757 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 case TCP_TW_SYN: {
Arnaldo Carvalho de Melo33b62232005-08-09 20:09:06 -07001759 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1760 skb->nh.iph->daddr,
Herbert Xu8f4910692006-08-09 15:47:12 -07001761 th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001762 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001764 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1765 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 sk = sk2;
1767 goto process;
1768 }
1769 /* Fall through to ACK */
1770 }
1771 case TCP_TW_ACK:
1772 tcp_v4_timewait_ack(sk, skb);
1773 break;
1774 case TCP_TW_RST:
1775 goto no_tcp_socket;
1776 case TCP_TW_SUCCESS:;
1777 }
1778 goto discard_it;
1779}
1780
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781/* VJ's idea. Save last timestamp seen from this destination
1782 * and hold it at least for normal timewait interval to use for duplicate
1783 * segment detection in subsequent connections, before they enter synchronized
1784 * state.
1785 */
1786
1787int tcp_v4_remember_stamp(struct sock *sk)
1788{
1789 struct inet_sock *inet = inet_sk(sk);
1790 struct tcp_sock *tp = tcp_sk(sk);
1791 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1792 struct inet_peer *peer = NULL;
1793 int release_it = 0;
1794
1795 if (!rt || rt->rt_dst != inet->daddr) {
1796 peer = inet_getpeer(inet->daddr, 1);
1797 release_it = 1;
1798 } else {
1799 if (!rt->peer)
1800 rt_bind_peer(rt, 1);
1801 peer = rt->peer;
1802 }
1803
1804 if (peer) {
1805 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1806 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1807 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1808 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1809 peer->tcp_ts = tp->rx_opt.ts_recent;
1810 }
1811 if (release_it)
1812 inet_putpeer(peer);
1813 return 1;
1814 }
1815
1816 return 0;
1817}
1818
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001819int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001821 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
1823 if (peer) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001824 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1825
1826 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001828 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1829 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1830 peer->tcp_ts = tcptw->tw_ts_recent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 }
1832 inet_putpeer(peer);
1833 return 1;
1834 }
1835
1836 return 0;
1837}
1838
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001839struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001840 .queue_xmit = ip_queue_xmit,
1841 .send_check = tcp_v4_send_check,
1842 .rebuild_header = inet_sk_rebuild_header,
1843 .conn_request = tcp_v4_conn_request,
1844 .syn_recv_sock = tcp_v4_syn_recv_sock,
1845 .remember_stamp = tcp_v4_remember_stamp,
1846 .net_header_len = sizeof(struct iphdr),
1847 .setsockopt = ip_setsockopt,
1848 .getsockopt = ip_getsockopt,
1849 .addr2sockaddr = inet_csk_addr2sockaddr,
1850 .sockaddr_len = sizeof(struct sockaddr_in),
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001851#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001852 .compat_setsockopt = compat_ip_setsockopt,
1853 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001854#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855};
1856
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001857struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1858#ifdef CONFIG_TCP_MD5SIG
1859 .md5_lookup = tcp_v4_md5_lookup,
1860 .calc_md5_hash = tcp_v4_calc_md5_hash,
1861 .md5_add = tcp_v4_md5_add_func,
1862 .md5_parse = tcp_v4_parse_md5_keys,
1863#endif
1864};
1865
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866/* NOTE: A lot of things set to zero explicitly by call to
1867 * sk_alloc() so need not be done here.
1868 */
1869static int tcp_v4_init_sock(struct sock *sk)
1870{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001871 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 struct tcp_sock *tp = tcp_sk(sk);
1873
1874 skb_queue_head_init(&tp->out_of_order_queue);
1875 tcp_init_xmit_timers(sk);
1876 tcp_prequeue_init(tp);
1877
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001878 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 tp->mdev = TCP_TIMEOUT_INIT;
1880
1881 /* So many TCP implementations out there (incorrectly) count the
1882 * initial SYN frame in their delayed-ACK and congestion control
1883 * algorithms that we must have the following bandaid to talk
1884 * efficiently to them. -DaveM
1885 */
1886 tp->snd_cwnd = 2;
1887
1888 /* See draft-stevens-tcpca-spec-01 for discussion of the
1889 * initialization of these values.
1890 */
1891 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1892 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07001893 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
1895 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001896 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897
1898 sk->sk_state = TCP_CLOSE;
1899
1900 sk->sk_write_space = sk_stream_write_space;
1901 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001903 icsk->icsk_af_ops = &ipv4_specific;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001904 icsk->icsk_sync_mss = tcp_sync_mss;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001905#ifdef CONFIG_TCP_MD5SIG
1906 tp->af_specific = &tcp_sock_ipv4_specific;
1907#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908
1909 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1910 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1911
1912 atomic_inc(&tcp_sockets_allocated);
1913
1914 return 0;
1915}
1916
1917int tcp_v4_destroy_sock(struct sock *sk)
1918{
1919 struct tcp_sock *tp = tcp_sk(sk);
1920
1921 tcp_clear_xmit_timers(sk);
1922
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001923 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001924
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 /* Cleanup up the write buffer. */
1926 sk_stream_writequeue_purge(sk);
1927
1928 /* Cleans up our, hopefully empty, out_of_order_queue. */
1929 __skb_queue_purge(&tp->out_of_order_queue);
1930
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001931#ifdef CONFIG_TCP_MD5SIG
1932 /* Clean up the MD5 key list, if any */
1933 if (tp->md5sig_info) {
1934 tcp_v4_clear_md5_list(sk);
1935 kfree(tp->md5sig_info);
1936 tp->md5sig_info = NULL;
1937 }
1938#endif
1939
Chris Leech1a2449a2006-05-23 18:05:53 -07001940#ifdef CONFIG_NET_DMA
1941 /* Cleans up our sk_async_wait_queue */
1942 __skb_queue_purge(&sk->sk_async_wait_queue);
1943#endif
1944
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 /* Clean prequeue, it must be empty really */
1946 __skb_queue_purge(&tp->ucopy.prequeue);
1947
1948 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001949 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001950 inet_put_port(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951
1952 /*
1953 * If sendmsg cached page exists, toss it.
1954 */
1955 if (sk->sk_sndmsg_page) {
1956 __free_page(sk->sk_sndmsg_page);
1957 sk->sk_sndmsg_page = NULL;
1958 }
1959
1960 atomic_dec(&tcp_sockets_allocated);
1961
1962 return 0;
1963}
1964
1965EXPORT_SYMBOL(tcp_v4_destroy_sock);
1966
1967#ifdef CONFIG_PROC_FS
1968/* Proc filesystem TCP sock list dumping. */
1969
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001970static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971{
1972 return hlist_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001973 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974}
1975
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001976static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977{
1978 return tw->tw_node.next ?
1979 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1980}
1981
1982static void *listening_get_next(struct seq_file *seq, void *cur)
1983{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001984 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 struct hlist_node *node;
1986 struct sock *sk = cur;
1987 struct tcp_iter_state* st = seq->private;
1988
1989 if (!sk) {
1990 st->bucket = 0;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001991 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 goto get_sk;
1993 }
1994
1995 ++st->num;
1996
1997 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001998 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999
Eric Dumazet72a3eff2006-11-16 02:30:37 -08002000 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 req = req->dl_next;
2002 while (1) {
2003 while (req) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002004 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002005 cur = req;
2006 goto out;
2007 }
2008 req = req->dl_next;
2009 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08002010 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 break;
2012get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002013 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 }
2015 sk = sk_next(st->syn_wait_sk);
2016 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002017 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 } else {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002019 icsk = inet_csk(sk);
2020 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002023 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 sk = sk_next(sk);
2025 }
2026get_sk:
2027 sk_for_each_from(sk, node) {
2028 if (sk->sk_family == st->family) {
2029 cur = sk;
2030 goto out;
2031 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002032 icsk = inet_csk(sk);
2033 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2034 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035start_req:
2036 st->uid = sock_i_uid(sk);
2037 st->syn_wait_sk = sk;
2038 st->state = TCP_SEQ_STATE_OPENREQ;
2039 st->sbucket = 0;
2040 goto get_req;
2041 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002042 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 }
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07002044 if (++st->bucket < INET_LHTABLE_SIZE) {
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002045 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 goto get_sk;
2047 }
2048 cur = NULL;
2049out:
2050 return cur;
2051}
2052
2053static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2054{
2055 void *rc = listening_get_next(seq, NULL);
2056
2057 while (rc && *pos) {
2058 rc = listening_get_next(seq, rc);
2059 --*pos;
2060 }
2061 return rc;
2062}
2063
2064static void *established_get_first(struct seq_file *seq)
2065{
2066 struct tcp_iter_state* st = seq->private;
2067 void *rc = NULL;
2068
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002069 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 struct sock *sk;
2071 struct hlist_node *node;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002072 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
2074 /* We can reschedule _before_ having picked the target: */
2075 cond_resched_softirq();
2076
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002077 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2078 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 if (sk->sk_family != st->family) {
2080 continue;
2081 }
2082 rc = sk;
2083 goto out;
2084 }
2085 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002086 inet_twsk_for_each(tw, node,
2087 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 if (tw->tw_family != st->family) {
2089 continue;
2090 }
2091 rc = tw;
2092 goto out;
2093 }
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002094 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 st->state = TCP_SEQ_STATE_ESTABLISHED;
2096 }
2097out:
2098 return rc;
2099}
2100
2101static void *established_get_next(struct seq_file *seq, void *cur)
2102{
2103 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002104 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 struct hlist_node *node;
2106 struct tcp_iter_state* st = seq->private;
2107
2108 ++st->num;
2109
2110 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2111 tw = cur;
2112 tw = tw_next(tw);
2113get_tw:
2114 while (tw && tw->tw_family != st->family) {
2115 tw = tw_next(tw);
2116 }
2117 if (tw) {
2118 cur = tw;
2119 goto out;
2120 }
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002121 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 st->state = TCP_SEQ_STATE_ESTABLISHED;
2123
2124 /* We can reschedule between buckets: */
2125 cond_resched_softirq();
2126
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002127 if (++st->bucket < tcp_hashinfo.ehash_size) {
2128 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2129 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 } else {
2131 cur = NULL;
2132 goto out;
2133 }
2134 } else
2135 sk = sk_next(sk);
2136
2137 sk_for_each_from(sk, node) {
2138 if (sk->sk_family == st->family)
2139 goto found;
2140 }
2141
2142 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002143 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144 goto get_tw;
2145found:
2146 cur = sk;
2147out:
2148 return cur;
2149}
2150
2151static void *established_get_idx(struct seq_file *seq, loff_t pos)
2152{
2153 void *rc = established_get_first(seq);
2154
2155 while (rc && pos) {
2156 rc = established_get_next(seq, rc);
2157 --pos;
2158 }
2159 return rc;
2160}
2161
2162static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2163{
2164 void *rc;
2165 struct tcp_iter_state* st = seq->private;
2166
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07002167 inet_listen_lock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 st->state = TCP_SEQ_STATE_LISTENING;
2169 rc = listening_get_idx(seq, &pos);
2170
2171 if (!rc) {
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07002172 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 local_bh_disable();
2174 st->state = TCP_SEQ_STATE_ESTABLISHED;
2175 rc = established_get_idx(seq, pos);
2176 }
2177
2178 return rc;
2179}
2180
2181static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2182{
2183 struct tcp_iter_state* st = seq->private;
2184 st->state = TCP_SEQ_STATE_LISTENING;
2185 st->num = 0;
2186 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2187}
2188
2189static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2190{
2191 void *rc = NULL;
2192 struct tcp_iter_state* st;
2193
2194 if (v == SEQ_START_TOKEN) {
2195 rc = tcp_get_idx(seq, 0);
2196 goto out;
2197 }
2198 st = seq->private;
2199
2200 switch (st->state) {
2201 case TCP_SEQ_STATE_OPENREQ:
2202 case TCP_SEQ_STATE_LISTENING:
2203 rc = listening_get_next(seq, v);
2204 if (!rc) {
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07002205 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 local_bh_disable();
2207 st->state = TCP_SEQ_STATE_ESTABLISHED;
2208 rc = established_get_first(seq);
2209 }
2210 break;
2211 case TCP_SEQ_STATE_ESTABLISHED:
2212 case TCP_SEQ_STATE_TIME_WAIT:
2213 rc = established_get_next(seq, v);
2214 break;
2215 }
2216out:
2217 ++*pos;
2218 return rc;
2219}
2220
2221static void tcp_seq_stop(struct seq_file *seq, void *v)
2222{
2223 struct tcp_iter_state* st = seq->private;
2224
2225 switch (st->state) {
2226 case TCP_SEQ_STATE_OPENREQ:
2227 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002228 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2229 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 }
2231 case TCP_SEQ_STATE_LISTENING:
2232 if (v != SEQ_START_TOKEN)
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07002233 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 break;
2235 case TCP_SEQ_STATE_TIME_WAIT:
2236 case TCP_SEQ_STATE_ESTABLISHED:
2237 if (v)
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07002238 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 local_bh_enable();
2240 break;
2241 }
2242}
2243
2244static int tcp_seq_open(struct inode *inode, struct file *file)
2245{
2246 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2247 struct seq_file *seq;
2248 struct tcp_iter_state *s;
2249 int rc;
2250
2251 if (unlikely(afinfo == NULL))
2252 return -EINVAL;
2253
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07002254 s = kzalloc(sizeof(*s), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 if (!s)
2256 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 s->family = afinfo->family;
2258 s->seq_ops.start = tcp_seq_start;
2259 s->seq_ops.next = tcp_seq_next;
2260 s->seq_ops.show = afinfo->seq_show;
2261 s->seq_ops.stop = tcp_seq_stop;
2262
2263 rc = seq_open(file, &s->seq_ops);
2264 if (rc)
2265 goto out_kfree;
2266 seq = file->private_data;
2267 seq->private = s;
2268out:
2269 return rc;
2270out_kfree:
2271 kfree(s);
2272 goto out;
2273}
2274
2275int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2276{
2277 int rc = 0;
2278 struct proc_dir_entry *p;
2279
2280 if (!afinfo)
2281 return -EINVAL;
2282 afinfo->seq_fops->owner = afinfo->owner;
2283 afinfo->seq_fops->open = tcp_seq_open;
2284 afinfo->seq_fops->read = seq_read;
2285 afinfo->seq_fops->llseek = seq_lseek;
2286 afinfo->seq_fops->release = seq_release_private;
2287
2288 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2289 if (p)
2290 p->data = afinfo;
2291 else
2292 rc = -ENOMEM;
2293 return rc;
2294}
2295
2296void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2297{
2298 if (!afinfo)
2299 return;
2300 proc_net_remove(afinfo->name);
2301 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2302}
2303
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002304static void get_openreq4(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 char *tmpbuf, int i, int uid)
2306{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002307 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 int ttd = req->expires - jiffies;
2309
2310 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2311 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2312 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002313 ireq->loc_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 ntohs(inet_sk(sk)->sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002315 ireq->rmt_addr,
2316 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 TCP_SYN_RECV,
2318 0, 0, /* could print option size, but that is af dependent. */
2319 1, /* timers active (only the expire timer) */
2320 jiffies_to_clock_t(ttd),
2321 req->retrans,
2322 uid,
2323 0, /* non standard timer */
2324 0, /* open_requests have no inode */
2325 atomic_read(&sk->sk_refcnt),
2326 req);
2327}
2328
2329static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2330{
2331 int timer_active;
2332 unsigned long timer_expires;
2333 struct tcp_sock *tp = tcp_sk(sp);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002334 const struct inet_connection_sock *icsk = inet_csk(sp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 struct inet_sock *inet = inet_sk(sp);
Al Viro714e85b2006-11-14 20:51:49 -08002336 __be32 dest = inet->daddr;
2337 __be32 src = inet->rcv_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 __u16 destp = ntohs(inet->dport);
2339 __u16 srcp = ntohs(inet->sport);
2340
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002341 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002343 timer_expires = icsk->icsk_timeout;
2344 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002346 timer_expires = icsk->icsk_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 } else if (timer_pending(&sp->sk_timer)) {
2348 timer_active = 2;
2349 timer_expires = sp->sk_timer.expires;
2350 } else {
2351 timer_active = 0;
2352 timer_expires = jiffies;
2353 }
2354
2355 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2356 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2357 i, src, srcp, dest, destp, sp->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002358 tp->write_seq - tp->snd_una,
2359 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 timer_active,
2361 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002362 icsk->icsk_retransmits,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 sock_i_uid(sp),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002364 icsk->icsk_probes_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 sock_i_ino(sp),
2366 atomic_read(&sp->sk_refcnt), sp,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002367 icsk->icsk_rto,
2368 icsk->icsk_ack.ato,
2369 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 tp->snd_cwnd,
2371 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2372}
2373
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002374static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375{
Al Viro23f33c22006-09-27 18:43:50 -07002376 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 __u16 destp, srcp;
2378 int ttd = tw->tw_ttd - jiffies;
2379
2380 if (ttd < 0)
2381 ttd = 0;
2382
2383 dest = tw->tw_daddr;
2384 src = tw->tw_rcv_saddr;
2385 destp = ntohs(tw->tw_dport);
2386 srcp = ntohs(tw->tw_sport);
2387
2388 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2389 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2390 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2391 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2392 atomic_read(&tw->tw_refcnt), tw);
2393}
2394
2395#define TMPSZ 150
2396
2397static int tcp4_seq_show(struct seq_file *seq, void *v)
2398{
2399 struct tcp_iter_state* st;
2400 char tmpbuf[TMPSZ + 1];
2401
2402 if (v == SEQ_START_TOKEN) {
2403 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2404 " sl local_address rem_address st tx_queue "
2405 "rx_queue tr tm->when retrnsmt uid timeout "
2406 "inode");
2407 goto out;
2408 }
2409 st = seq->private;
2410
2411 switch (st->state) {
2412 case TCP_SEQ_STATE_LISTENING:
2413 case TCP_SEQ_STATE_ESTABLISHED:
2414 get_tcp4_sock(v, tmpbuf, st->num);
2415 break;
2416 case TCP_SEQ_STATE_OPENREQ:
2417 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2418 break;
2419 case TCP_SEQ_STATE_TIME_WAIT:
2420 get_timewait4_sock(v, tmpbuf, st->num);
2421 break;
2422 }
2423 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2424out:
2425 return 0;
2426}
2427
2428static struct file_operations tcp4_seq_fops;
2429static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2430 .owner = THIS_MODULE,
2431 .name = "tcp",
2432 .family = AF_INET,
2433 .seq_show = tcp4_seq_show,
2434 .seq_fops = &tcp4_seq_fops,
2435};
2436
2437int __init tcp4_proc_init(void)
2438{
2439 return tcp_proc_register(&tcp4_seq_afinfo);
2440}
2441
2442void tcp4_proc_exit(void)
2443{
2444 tcp_proc_unregister(&tcp4_seq_afinfo);
2445}
2446#endif /* CONFIG_PROC_FS */
2447
2448struct proto tcp_prot = {
2449 .name = "TCP",
2450 .owner = THIS_MODULE,
2451 .close = tcp_close,
2452 .connect = tcp_v4_connect,
2453 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002454 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 .ioctl = tcp_ioctl,
2456 .init = tcp_v4_init_sock,
2457 .destroy = tcp_v4_destroy_sock,
2458 .shutdown = tcp_shutdown,
2459 .setsockopt = tcp_setsockopt,
2460 .getsockopt = tcp_getsockopt,
2461 .sendmsg = tcp_sendmsg,
2462 .recvmsg = tcp_recvmsg,
2463 .backlog_rcv = tcp_v4_do_rcv,
2464 .hash = tcp_v4_hash,
2465 .unhash = tcp_unhash,
2466 .get_port = tcp_v4_get_port,
2467 .enter_memory_pressure = tcp_enter_memory_pressure,
2468 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002469 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 .memory_allocated = &tcp_memory_allocated,
2471 .memory_pressure = &tcp_memory_pressure,
2472 .sysctl_mem = sysctl_tcp_mem,
2473 .sysctl_wmem = sysctl_tcp_wmem,
2474 .sysctl_rmem = sysctl_tcp_rmem,
2475 .max_header = MAX_TCP_HEADER,
2476 .obj_size = sizeof(struct tcp_sock),
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002477 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002478 .rsk_prot = &tcp_request_sock_ops,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002479#ifdef CONFIG_COMPAT
2480 .compat_setsockopt = compat_tcp_setsockopt,
2481 .compat_getsockopt = compat_tcp_getsockopt,
2482#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483};
2484
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485void __init tcp_v4_init(struct net_proto_family *ops)
2486{
Arnaldo Carvalho de Meloc4d93902006-03-20 22:01:03 -08002487 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489}
2490
2491EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494EXPORT_SYMBOL(tcp_unhash);
2495EXPORT_SYMBOL(tcp_v4_conn_request);
2496EXPORT_SYMBOL(tcp_v4_connect);
2497EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498EXPORT_SYMBOL(tcp_v4_remember_stamp);
2499EXPORT_SYMBOL(tcp_v4_send_check);
2500EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2501
2502#ifdef CONFIG_PROC_FS
2503EXPORT_SYMBOL(tcp_proc_register);
2504EXPORT_SYMBOL(tcp_proc_unregister);
2505#endif
2506EXPORT_SYMBOL(sysctl_local_port_range);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508