tcp/dccp: do not touch listener sk_refcnt under synflood
When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple
cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes.
By letting listeners use SOCK_RCU_FREE infrastructure,
we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt
Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets,
only listeners are impacted by this change.
Peak performance under SYNFLOOD is increased by ~33% :
On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps
Most consuming functions are now skb_set_owner_w() and sock_wfree()
contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 28332bd..b87beca 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -66,13 +66,15 @@
const __be16 sport,
const struct in6_addr *daddr,
const u16 hnum,
- const int dif)
+ const int dif,
+ bool *refcounted)
{
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
sport, daddr, hnum, dif);
+ *refcounted = true;
if (sk)
return sk;
-
+ *refcounted = false;
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
}
@@ -81,17 +83,19 @@
struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
- int iif)
+ int iif,
+ bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
+ *refcounted = true;
if (sk)
return sk;
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
- iif);
+ iif, refcounted);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a77acee..0574493 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -100,14 +100,10 @@
/*
* Sockets can be hashed in established or listening table
- * We must use different 'nulls' end-of-chain value for listening
- * hash table, or we might find a socket that was closed and
- * reallocated/inserted into established hash table
*/
-#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
- struct hlist_nulls_head head;
+ struct hlist_head head;
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -304,14 +300,20 @@
struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
- const int dif)
+ const int dif,
+ bool *refcounted)
{
u16 hnum = ntohs(dport);
- struct sock *sk = __inet_lookup_established(net, hashinfo,
- saddr, sport, daddr, hnum, dif);
+ struct sock *sk;
- return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
- sport, daddr, hnum, dif);
+ sk = __inet_lookup_established(net, hashinfo, saddr, sport,
+ daddr, hnum, dif);
+ *refcounted = true;
+ if (sk)
+ return sk;
+ *refcounted = false;
+ return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ sport, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct net *net,
@@ -322,10 +324,13 @@
const int dif)
{
struct sock *sk;
+ bool refcounted;
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- dport, dif);
+ dport, dif, &refcounted);
+ if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
return sk;
}
@@ -333,17 +338,20 @@
struct sk_buff *skb,
int doff,
const __be16 sport,
- const __be16 dport)
+ const __be16 dport,
+ bool *refcounted)
{
struct sock *sk = skb_steal_sock(skb);
const struct iphdr *iph = ip_hdr(skb);
+ *refcounted = true;
if (sk)
return sk;
- else
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb));
+
+ return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, iph->saddr, sport,
+ iph->daddr, dport, inet_iif(skb),
+ refcounted);
}
u32 sk_ehashfn(const struct sock *sk);