Blame - net/ipv4/tcp_timer.c - SHIFTPHONES/kernel/common

blob: 799ebe061e2ca51c1fdd0831e4f227211c2b71e3 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* Implementation of the Transmission Control Protocol(TCP).
				7	*
				8	* Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
				9	*
Jesper Juhl	02c30a8	2005-05-05 16:16:16 -0700	[diff] [blame^]	10	* Authors: Ross Biro
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	11	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				12	* Mark Evans, <evansmp@uhura.aston.ac.uk>
				13	* Corey Minyard <wf-rch!minyard@relay.EU.net>
				14	* Florian La Roche, <flla@stud.uni-sb.de>
				15	* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
				16	* Linus Torvalds, <torvalds@cs.helsinki.fi>
				17	* Alan Cox, <gw4pts@gw4pts.ampr.org>
				18	* Matthew Dillon, <dillon@apollo.west.oic.com>
				19	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				20	* Jorge Cwik, <jorge@laser.satlink.net>
				21	*/
				22
				23	#include <linux/module.h>
				24	#include <net/tcp.h>
				25
				26	int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
				27	int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
				28	int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
				29	int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
				30	int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
				31	int sysctl_tcp_retries1 = TCP_RETR1;
				32	int sysctl_tcp_retries2 = TCP_RETR2;
				33	int sysctl_tcp_orphan_retries;
				34
				35	static void tcp_write_timer(unsigned long);
				36	static void tcp_delack_timer(unsigned long);
				37	static void tcp_keepalive_timer (unsigned long data);
				38
				39	#ifdef TCP_DEBUG
				40	const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
				41	EXPORT_SYMBOL(tcp_timer_bug_msg);
				42	#endif
				43
				44	/*
				45	* Using different timers for retransmit, delayed acks and probes
				46	* We may wish use just one timer maintaining a list of expire jiffies
				47	* to optimize.
				48	*/
				49
				50	void tcp_init_xmit_timers(struct sock *sk)
				51	{
				52	struct tcp_sock *tp = tcp_sk(sk);
				53
				54	init_timer(&tp->retransmit_timer);
				55	tp->retransmit_timer.function=&tcp_write_timer;
				56	tp->retransmit_timer.data = (unsigned long) sk;
				57	tp->pending = 0;
				58
				59	init_timer(&tp->delack_timer);
				60	tp->delack_timer.function=&tcp_delack_timer;
				61	tp->delack_timer.data = (unsigned long) sk;
				62	tp->ack.pending = 0;
				63
				64	init_timer(&sk->sk_timer);
				65	sk->sk_timer.function = &tcp_keepalive_timer;
				66	sk->sk_timer.data = (unsigned long)sk;
				67	}
				68
				69	void tcp_clear_xmit_timers(struct sock *sk)
				70	{
				71	struct tcp_sock *tp = tcp_sk(sk);
				72
				73	tp->pending = 0;
				74	sk_stop_timer(sk, &tp->retransmit_timer);
				75
				76	tp->ack.pending = 0;
				77	tp->ack.blocked = 0;
				78	sk_stop_timer(sk, &tp->delack_timer);
				79
				80	sk_stop_timer(sk, &sk->sk_timer);
				81	}
				82
				83	static void tcp_write_err(struct sock *sk)
				84	{
				85	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
				86	sk->sk_error_report(sk);
				87
				88	tcp_done(sk);
				89	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
				90	}
				91
				92	/* Do not allow orphaned sockets to eat all our resources.
				93	* This is direct violation of TCP specs, but it is required
				94	* to prevent DoS attacks. It is called when a retransmission timeout
				95	* or zero probe timeout occurs on orphaned socket.
				96	*
				97	* Criterium is still not confirmed experimentally and may change.
				98	* We kill the socket, if:
				99	* 1. If number of orphaned sockets exceeds an administratively configured
				100	* limit.
				101	* 2. If we have strong memory pressure.
				102	*/
				103	static int tcp_out_of_resources(struct sock *sk, int do_reset)
				104	{
				105	struct tcp_sock *tp = tcp_sk(sk);
				106	int orphans = atomic_read(&tcp_orphan_count);
				107
				108	/* If peer does not open window for long time, or did not transmit
				109	* anything for long time, penalize it. */
				110	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX \|\| !do_reset)
				111	orphans <<= 1;
				112
				113	/* If some dubious ICMP arrived, penalize even more. */
				114	if (sk->sk_err_soft)
				115	orphans <<= 1;
				116
				117	if (orphans >= sysctl_tcp_max_orphans \|\|
				118	(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
				119	atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
				120	if (net_ratelimit())
				121	printk(KERN_INFO "Out of socket memory\n");
				122
				123	/* Catch exceptional cases, when connection requires reset.
				124	* 1. Last segment was sent recently. */
				125	if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN \|\|
				126	/* 2. Window is closed. */
				127	(!tp->snd_wnd && !tp->packets_out))
				128	do_reset = 1;
				129	if (do_reset)
				130	tcp_send_active_reset(sk, GFP_ATOMIC);
				131	tcp_done(sk);
				132	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
				133	return 1;
				134	}
				135	return 0;
				136	}
				137
				138	/* Calculate maximal number or retries on an orphaned socket. */
				139	static int tcp_orphan_retries(struct sock *sk, int alive)
				140	{
				141	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
				142
				143	/* We know from an ICMP that something is wrong. */
				144	if (sk->sk_err_soft && !alive)
				145	retries = 0;
				146
				147	/* However, if socket sent something recently, select some safe
				148	* number of retries. 8 corresponds to >100 seconds with minimal
				149	* RTO of 200msec. */
				150	if (retries == 0 && alive)
				151	retries = 8;
				152	return retries;
				153	}
				154
				155	/* A write timeout has occurred. Process the after effects. */
				156	static int tcp_write_timeout(struct sock *sk)
				157	{
				158	struct tcp_sock *tp = tcp_sk(sk);
				159	int retry_until;
				160
				161	if ((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV)) {
				162	if (tp->retransmits)
				163	dst_negative_advice(&sk->sk_dst_cache);
				164	retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
				165	} else {
				166	if (tp->retransmits >= sysctl_tcp_retries1) {
				167	/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
				168	hole detection. :-(
				169
				170	It is place to make it. It is not made. I do not want
				171	to make it. It is disguisting. It does not work in any
				172	case. Let me to cite the same draft, which requires for
				173	us to implement this:
				174
				175	"The one security concern raised by this memo is that ICMP black holes
				176	are often caused by over-zealous security administrators who block
				177	all ICMP messages. It is vitally important that those who design and
				178	deploy security systems understand the impact of strict filtering on
				179	upper-layer protocols. The safest web site in the world is worthless
				180	if most TCP implementations cannot transfer data from it. It would
				181	be far nicer to have all of the black holes fixed rather than fixing
				182	all of the TCP implementations."
				183
				184	Golden words :-).
				185	*/
				186
				187	dst_negative_advice(&sk->sk_dst_cache);
				188	}
				189
				190	retry_until = sysctl_tcp_retries2;
				191	if (sock_flag(sk, SOCK_DEAD)) {
				192	int alive = (tp->rto < TCP_RTO_MAX);
				193
				194	retry_until = tcp_orphan_retries(sk, alive);
				195
				196	if (tcp_out_of_resources(sk, alive \|\| tp->retransmits < retry_until))
				197	return 1;
				198	}
				199	}
				200
				201	if (tp->retransmits >= retry_until) {
				202	/* Has it gone just too far? */
				203	tcp_write_err(sk);
				204	return 1;
				205	}
				206	return 0;
				207	}
				208
				209	static void tcp_delack_timer(unsigned long data)
				210	{
				211	struct sock sk = (struct sock)data;
				212	struct tcp_sock *tp = tcp_sk(sk);
				213
				214	bh_lock_sock(sk);
				215	if (sock_owned_by_user(sk)) {
				216	/* Try again later. */
				217	tp->ack.blocked = 1;
				218	NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
				219	sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
				220	goto out_unlock;
				221	}
				222
				223	sk_stream_mem_reclaim(sk);
				224
				225	if (sk->sk_state == TCP_CLOSE \|\| !(tp->ack.pending & TCP_ACK_TIMER))
				226	goto out;
				227
				228	if (time_after(tp->ack.timeout, jiffies)) {
				229	sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
				230	goto out;
				231	}
				232	tp->ack.pending &= ~TCP_ACK_TIMER;
				233
				234	if (skb_queue_len(&tp->ucopy.prequeue)) {
				235	struct sk_buff *skb;
				236
				237	NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED,
				238	skb_queue_len(&tp->ucopy.prequeue));
				239
				240	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
				241	sk->sk_backlog_rcv(sk, skb);
				242
				243	tp->ucopy.memory = 0;
				244	}
				245
				246	if (tcp_ack_scheduled(tp)) {
				247	if (!tp->ack.pingpong) {
				248	/* Delayed ACK missed: inflate ATO. */
				249	tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
				250	} else {
				251	/* Delayed ACK missed: leave pingpong mode and
				252	* deflate ATO.
				253	*/
				254	tp->ack.pingpong = 0;
				255	tp->ack.ato = TCP_ATO_MIN;
				256	}
				257	tcp_send_ack(sk);
				258	NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
				259	}
				260	TCP_CHECK_TIMER(sk);
				261
				262	out:
				263	if (tcp_memory_pressure)
				264	sk_stream_mem_reclaim(sk);
				265	out_unlock:
				266	bh_unlock_sock(sk);
				267	sock_put(sk);
				268	}
				269
				270	static void tcp_probe_timer(struct sock *sk)
				271	{
				272	struct tcp_sock *tp = tcp_sk(sk);
				273	int max_probes;
				274
				275	if (tp->packets_out \|\| !sk->sk_send_head) {
				276	tp->probes_out = 0;
				277	return;
				278	}
				279
				280	/* WARNING RFC 1122 forbids this
				281	*
				282	* It doesn't AFAIK, because we kill the retransmit timer -AK
				283	*
				284	* FIXME: We ought not to do it, Solaris 2.5 actually has fixing
				285	* this behaviour in Solaris down as a bug fix. [AC]
				286	*
				287	* Let me to explain. probes_out is zeroed by incoming ACKs
				288	* even if they advertise zero window. Hence, connection is killed only
				289	* if we received no ACKs for normal connection timeout. It is not killed
				290	* only because window stays zero for some time, window may be zero
				291	* until armageddon and even later. We are in full accordance
				292	* with RFCs, only probe timer combines both retransmission timeout
				293	* and probe timeout in one bottle. --ANK
				294	*/
				295	max_probes = sysctl_tcp_retries2;
				296
				297	if (sock_flag(sk, SOCK_DEAD)) {
				298	int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
				299
				300	max_probes = tcp_orphan_retries(sk, alive);
				301
				302	if (tcp_out_of_resources(sk, alive \|\| tp->probes_out <= max_probes))
				303	return;
				304	}
				305
				306	if (tp->probes_out > max_probes) {
				307	tcp_write_err(sk);
				308	} else {
				309	/* Only send another probe if we didn't close things up. */
				310	tcp_send_probe0(sk);
				311	}
				312	}
				313
				314	/*
				315	* The TCP retransmit timer.
				316	*/
				317
				318	static void tcp_retransmit_timer(struct sock *sk)
				319	{
				320	struct tcp_sock *tp = tcp_sk(sk);
				321
				322	if (!tp->packets_out)
				323	goto out;
				324
				325	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
				326
				327	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
				328	!((1 << sk->sk_state) & (TCPF_SYN_SENT \| TCPF_SYN_RECV))) {
				329	/* Receiver dastardly shrinks window. Our retransmits
				330	* become zero probes, but we should not timeout this
				331	* connection. If the socket is an orphan, time it out,
				332	* we cannot allow such beasts to hang infinitely.
				333	*/
				334	#ifdef TCP_DEBUG
				335	if (net_ratelimit()) {
				336	struct inet_sock *inet = inet_sk(sk);
				337	printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
				338	NIPQUAD(inet->daddr), htons(inet->dport),
				339	inet->num, tp->snd_una, tp->snd_nxt);
				340	}
				341	#endif
				342	if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
				343	tcp_write_err(sk);
				344	goto out;
				345	}
				346	tcp_enter_loss(sk, 0);
				347	tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
				348	__sk_dst_reset(sk);
				349	goto out_reset_timer;
				350	}
				351
				352	if (tcp_write_timeout(sk))
				353	goto out;
				354
				355	if (tp->retransmits == 0) {
				356	if (tp->ca_state == TCP_CA_Disorder \|\| tp->ca_state == TCP_CA_Recovery) {
				357	if (tp->rx_opt.sack_ok) {
				358	if (tp->ca_state == TCP_CA_Recovery)
				359	NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
				360	else
				361	NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
				362	} else {
				363	if (tp->ca_state == TCP_CA_Recovery)
				364	NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
				365	else
				366	NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
				367	}
				368	} else if (tp->ca_state == TCP_CA_Loss) {
				369	NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
				370	} else {
				371	NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
				372	}
				373	}
				374
				375	if (tcp_use_frto(sk)) {
				376	tcp_enter_frto(sk);
				377	} else {
				378	tcp_enter_loss(sk, 0);
				379	}
				380
				381	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
				382	/* Retransmission failed because of local congestion,
				383	* do not backoff.
				384	*/
				385	if (!tp->retransmits)
				386	tp->retransmits=1;
				387	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
				388	min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
				389	goto out;
				390	}
				391
				392	/* Increase the timeout each time we retransmit. Note that
				393	* we do not increase the rtt estimate. rto is initialized
				394	* from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
				395	* that doubling rto each time is the least we can get away with.
				396	* In KA9Q, Karn uses this for the first few times, and then
				397	* goes to quadratic. netBSD doubles, but only goes up to *64,
				398	* and clamps at 1 to 64 sec afterwards. Note that 120 sec is
				399	* defined in the protocol as the maximum possible RTT. I guess
				400	* we'll have to use something other than TCP to talk to the
				401	* University of Mars.
				402	*
				403	* PAWS allows us longer timeouts and large windows, so once
				404	* implemented ftp to mars will work nicely. We will have to fix
				405	* the 120 second clamps though!
				406	*/
				407	tp->backoff++;
				408	tp->retransmits++;
				409
				410	out_reset_timer:
				411	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
				412	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
				413	if (tp->retransmits > sysctl_tcp_retries1)
				414	__sk_dst_reset(sk);
				415
				416	out:;
				417	}
				418
				419	static void tcp_write_timer(unsigned long data)
				420	{
				421	struct sock sk = (struct sock)data;
				422	struct tcp_sock *tp = tcp_sk(sk);
				423	int event;
				424
				425	bh_lock_sock(sk);
				426	if (sock_owned_by_user(sk)) {
				427	/* Try again later */
				428	sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
				429	goto out_unlock;
				430	}
				431
				432	if (sk->sk_state == TCP_CLOSE \|\| !tp->pending)
				433	goto out;
				434
				435	if (time_after(tp->timeout, jiffies)) {
				436	sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
				437	goto out;
				438	}
				439
				440	event = tp->pending;
				441	tp->pending = 0;
				442
				443	switch (event) {
				444	case TCP_TIME_RETRANS:
				445	tcp_retransmit_timer(sk);
				446	break;
				447	case TCP_TIME_PROBE0:
				448	tcp_probe_timer(sk);
				449	break;
				450	}
				451	TCP_CHECK_TIMER(sk);
				452
				453	out:
				454	sk_stream_mem_reclaim(sk);
				455	out_unlock:
				456	bh_unlock_sock(sk);
				457	sock_put(sk);
				458	}
				459
				460	/*
				461	* Timer for listening sockets
				462	*/
				463
				464	static void tcp_synack_timer(struct sock *sk)
				465	{
				466	struct tcp_sock *tp = tcp_sk(sk);
				467	struct tcp_listen_opt *lopt = tp->listen_opt;
				468	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
				469	int thresh = max_retries;
				470	unsigned long now = jiffies;
				471	struct open_request *reqp, req;
				472	int i, budget;
				473
				474	if (lopt == NULL \|\| lopt->qlen == 0)
				475	return;
				476
				477	/* Normally all the openreqs are young and become mature
				478	* (i.e. converted to established socket) for first timeout.
				479	* If synack was not acknowledged for 3 seconds, it means
				480	* one of the following things: synack was lost, ack was lost,
				481	* rtt is high or nobody planned to ack (i.e. synflood).
				482	* When server is a bit loaded, queue is populated with old
				483	* open requests, reducing effective size of queue.
				484	* When server is well loaded, queue size reduces to zero
				485	* after several minutes of work. It is not synflood,
				486	* it is normal operation. The solution is pruning
				487	* too old entries overriding normal timeout, when
				488	* situation becomes dangerous.
				489	*
				490	* Essentially, we reserve half of room for young
				491	* embrions; and abort old ones without pity, if old
				492	* ones are about to clog our table.
				493	*/
				494	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
				495	int young = (lopt->qlen_young<<1);
				496
				497	while (thresh > 2) {
				498	if (lopt->qlen < young)
				499	break;
				500	thresh--;
				501	young <<= 1;
				502	}
				503	}
				504
				505	if (tp->defer_accept)
				506	max_retries = tp->defer_accept;
				507
				508	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
				509	i = lopt->clock_hand;
				510
				511	do {
				512	reqp=&lopt->syn_table[i];
				513	while ((req = *reqp) != NULL) {
				514	if (time_after_eq(now, req->expires)) {
				515	if ((req->retrans < thresh \|\|
				516	(req->acked && req->retrans < max_retries))
				517	&& !req->class->rtx_syn_ack(sk, req, NULL)) {
				518	unsigned long timeo;
				519
				520	if (req->retrans++ == 0)
				521	lopt->qlen_young--;
				522	timeo = min((TCP_TIMEOUT_INIT << req->retrans),
				523	TCP_RTO_MAX);
				524	req->expires = now + timeo;
				525	reqp = &req->dl_next;
				526	continue;
				527	}
				528
				529	/* Drop this request */
				530	write_lock(&tp->syn_wait_lock);
				531	*reqp = req->dl_next;
				532	write_unlock(&tp->syn_wait_lock);
				533	lopt->qlen--;
				534	if (req->retrans == 0)
				535	lopt->qlen_young--;
				536	tcp_openreq_free(req);
				537	continue;
				538	}
				539	reqp = &req->dl_next;
				540	}
				541
				542	i = (i+1)&(TCP_SYNQ_HSIZE-1);
				543
				544	} while (--budget > 0);
				545
				546	lopt->clock_hand = i;
				547
				548	if (lopt->qlen)
				549	tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
				550	}
				551
				552	void tcp_delete_keepalive_timer (struct sock *sk)
				553	{
				554	sk_stop_timer(sk, &sk->sk_timer);
				555	}
				556
				557	void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
				558	{
				559	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
				560	}
				561
				562	void tcp_set_keepalive(struct sock *sk, int val)
				563	{
				564	if ((1 << sk->sk_state) & (TCPF_CLOSE \| TCPF_LISTEN))
				565	return;
				566
				567	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
				568	tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
				569	else if (!val)
				570	tcp_delete_keepalive_timer(sk);
				571	}
				572
				573
				574	static void tcp_keepalive_timer (unsigned long data)
				575	{
				576	struct sock sk = (struct sock ) data;
				577	struct tcp_sock *tp = tcp_sk(sk);
				578	__u32 elapsed;
				579
				580	/* Only process if socket is not in use. */
				581	bh_lock_sock(sk);
				582	if (sock_owned_by_user(sk)) {
				583	/* Try again later. */
				584	tcp_reset_keepalive_timer (sk, HZ/20);
				585	goto out;
				586	}
				587
				588	if (sk->sk_state == TCP_LISTEN) {
				589	tcp_synack_timer(sk);
				590	goto out;
				591	}
				592
				593	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
				594	if (tp->linger2 >= 0) {
				595	int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
				596
				597	if (tmo > 0) {
				598	tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
				599	goto out;
				600	}
				601	}
				602	tcp_send_active_reset(sk, GFP_ATOMIC);
				603	goto death;
				604	}
				605
				606	if (!sock_flag(sk, SOCK_KEEPOPEN) \|\| sk->sk_state == TCP_CLOSE)
				607	goto out;
				608
				609	elapsed = keepalive_time_when(tp);
				610
				611	/* It is alive without keepalive 8) */
				612	if (tp->packets_out \|\| sk->sk_send_head)
				613	goto resched;
				614
				615	elapsed = tcp_time_stamp - tp->rcv_tstamp;
				616
				617	if (elapsed >= keepalive_time_when(tp)) {
				618	if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) \|\|
				619	(tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
				620	tcp_send_active_reset(sk, GFP_ATOMIC);
				621	tcp_write_err(sk);
				622	goto out;
				623	}
				624	if (tcp_write_wakeup(sk) <= 0) {
				625	tp->probes_out++;
				626	elapsed = keepalive_intvl_when(tp);
				627	} else {
				628	/* If keepalive was lost due to local congestion,
				629	* try harder.
				630	*/
				631	elapsed = TCP_RESOURCE_PROBE_INTERVAL;
				632	}
				633	} else {
				634	/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
				635	elapsed = keepalive_time_when(tp) - elapsed;
				636	}
				637
				638	TCP_CHECK_TIMER(sk);
				639	sk_stream_mem_reclaim(sk);
				640
				641	resched:
				642	tcp_reset_keepalive_timer (sk, elapsed);
				643	goto out;
				644
				645	death:
				646	tcp_done(sk);
				647
				648	out:
				649	bh_unlock_sock(sk);
				650	sock_put(sk);
				651	}
				652
				653	EXPORT_SYMBOL(tcp_clear_xmit_timers);
				654	EXPORT_SYMBOL(tcp_delete_keepalive_timer);
				655	EXPORT_SYMBOL(tcp_init_xmit_timers);
				656	EXPORT_SYMBOL(tcp_reset_keepalive_timer);