net: Fix percpu counters deadlock
When we converted the protocol atomic counters such as the orphan
count and the total socket count deadlocks were introduced due to
the mismatch in BH status of the spots that used the percpu counter
operations.
Based on the diagnosis and patch by Peter Zijlstra, this patch
fixes these issues by disabling BH where we may be in process
context.
Reported-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d5c2bac..1747cca 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -964,7 +964,6 @@
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- percpu_counter_inc(sk->sk_prot->orphan_count);
/*
* It is the last release_sock in its life. It will remove backlog.
@@ -978,6 +977,8 @@
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk));
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+
/* Have we already been destroyed by a softirq or backlog? */
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
goto out;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c7cda1c..f26ab38 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -633,8 +633,6 @@
acc_req = req->dl_next;
- percpu_counter_inc(sk->sk_prot->orphan_count);
-
local_bh_disable();
bh_lock_sock(child);
WARN_ON(sock_owned_by_user(child));
@@ -644,6 +642,8 @@
sock_orphan(child);
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 614958b..eb62e58 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/udplite.h>
+#include <linux/bottom_half.h>
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -50,13 +51,17 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
+ int orphans, sockets;
+
+ local_bh_disable();
+ orphans = percpu_counter_sum_positive(&tcp_orphan_count),
+ sockets = percpu_counter_sum_positive(&tcp_sockets_allocated),
+ local_bh_enable();
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
- sock_prot_inuse_get(net, &tcp_prot),
- (int)percpu_counter_sum_positive(&tcp_orphan_count),
- tcp_death_row.tw_count,
- (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
+ sock_prot_inuse_get(net, &tcp_prot), orphans,
+ tcp_death_row.tw_count, sockets,
atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d mem %d\n",
sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1f3d529..f28acf1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1836,7 +1836,6 @@
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- percpu_counter_inc(sk->sk_prot->orphan_count);
/* It is the last release_sock in its life. It will remove backlog. */
release_sock(sk);
@@ -1849,6 +1848,8 @@
bh_lock_sock(sk);
WARN_ON(sock_owned_by_user(sk));
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+
/* Have we already been destroyed by a softirq or backlog? */
if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1017248..9d839fa 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -51,6 +51,7 @@
*/
+#include <linux/bottom_half.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
@@ -1797,7 +1798,9 @@
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+ local_bh_disable();
percpu_counter_inc(&tcp_sockets_allocated);
+ local_bh_enable();
return 0;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8702b06..e8b8337 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -23,6 +23,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/bottom_half.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -1830,7 +1831,9 @@
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+ local_bh_disable();
percpu_counter_inc(&tcp_sockets_allocated);
+ local_bh_enable();
return 0;
}