Merge branch 'tcp_src_port_selection'
Eric Dumazet says:
====================
tcp: improve source port selection
With increase of TCP sockets in hosts, we often hit limitations
caused by port selection, due to randomization and poor strategy.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 73fe0f9..774d241 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -148,8 +148,6 @@
*/
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp;
-
- atomic_t bsockets;
};
static inline struct inet_ehash_bucket *inet_ehash_bucket(
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8976ca4..60021d0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -99,6 +99,7 @@
struct net *net = sock_net(sk);
int smallest_size = -1, smallest_rover;
kuid_t uid = sock_i_uid(sk);
+ int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
local_bh_disable();
if (!snum) {
@@ -106,6 +107,14 @@
again:
inet_get_local_port_range(net, &low, &high);
+ if (attempt_half) {
+ int half = low + ((high - low) >> 1);
+
+ if (attempt_half == 1)
+ high = half;
+ else
+ low = half;
+ }
remaining = (high - low) + 1;
smallest_rover = rover = prandom_u32() % remaining + low;
@@ -127,11 +136,6 @@
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
- !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- snum = smallest_rover;
- goto tb_found;
- }
}
if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = rover;
@@ -159,6 +163,11 @@
snum = smallest_rover;
goto have_snum;
}
+ if (attempt_half == 1) {
+ /* OK we now try the upper half of the range */
+ attempt_half = 2;
+ goto again;
+ }
goto fail;
}
/* OK, here is the one we will use. HEAD is
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c6fb80b..3766bdd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -90,10 +90,6 @@
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- atomic_inc(&hashinfo->bsockets);
-
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
tb->num_owners++;
@@ -111,8 +107,6 @@
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
- atomic_dec(&hashinfo->bsockets);
-
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
@@ -608,7 +602,6 @@
{
int i;
- atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,