net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls

RCU was added to UDP lookups, using a fast infrastructure :
- sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the
  price of call_rcu() at freeing time.
- hlist_nulls permits to use few memory barriers.

This patch uses same infrastructure for TCP/DCCP established
and timewait sockets.

Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications
using short lived TCP connections. A followup patch, converting
rwlocks to spinlocks will even speedup this case.

__inet_lookup_established() is pretty fast now we dont have to
dirty a contended cache line (read_lock/read_unlock)

Only established and timewait hashtable are converted to RCU
(bind table and listen table are still using traditional locking)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index cb31fbf..4818960 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -41,8 +41,8 @@
  * I'll experiment with dynamic table growth later.
  */
 struct inet_ehash_bucket {
-	struct hlist_head chain;
-	struct hlist_head twchain;
+	struct hlist_nulls_head chain;
+	struct hlist_nulls_head twchain;
 };
 
 /* There are a few simple rules, which allow for local port reuse by
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 80e4977..4b8ece2 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -110,7 +110,7 @@
 #define tw_state		__tw_common.skc_state
 #define tw_reuse		__tw_common.skc_reuse
 #define tw_bound_dev_if		__tw_common.skc_bound_dev_if
-#define tw_node			__tw_common.skc_node
+#define tw_node			__tw_common.skc_nulls_node
 #define tw_bind_node		__tw_common.skc_bind_node
 #define tw_refcnt		__tw_common.skc_refcnt
 #define tw_hash			__tw_common.skc_hash
@@ -137,10 +137,10 @@
 	struct hlist_node	tw_death_node;
 };
 
-static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
-				      struct hlist_head *list)
+static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+				      struct hlist_nulls_head *list)
 {
-	hlist_add_head(&tw->tw_node, list);
+	hlist_nulls_add_head_rcu(&tw->tw_node, list);
 }
 
 static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -175,7 +175,7 @@
 }
 
 #define inet_twsk_for_each(tw, node, head) \
-	hlist_for_each_entry(tw, node, head, tw_node)
+	hlist_nulls_for_each_entry(tw, node, head, tw_node)
 
 #define inet_twsk_for_each_inmate(tw, node, jail) \
 	hlist_for_each_entry(tw, node, jail, tw_death_node)