net: add real socket cookies
A long standing problem in netlink socket dumps is the use
of kernel socket addresses as cookies.
1) It is a security concern.
2) Sockets can be reused quite quickly, so there is
no guarantee a cookie is used once and identify
a flow.
3) request sock, establish sock, and timewait socks
for a given flow have different cookies.
Part of our effort to bring better TCP statistics requires
to switch to a different allocator.
In this patch, I chose to use a per network namespace 64bit generator,
and to use it only in the case a socket needs to be dumped to netlink.
(This might be refined later if needed)
Note that I tried to carry cookies from request sock, to establish sock,
then timewait sockets.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Salo <salo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/core/sock.c b/net/core/sock.c
index 726e1f9..a9a9c2f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1538,6 +1538,7 @@
newsk->sk_err = 0;
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
+ atomic64_set(&newsk->sk_cookie, 0);
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 96e70ee..74dddf8 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -13,22 +13,39 @@
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
-int sock_diag_check_cookie(void *sk, const __u32 *cookie)
+static u64 sock_gen_cookie(struct sock *sk)
{
- if ((cookie[0] != INET_DIAG_NOCOOKIE ||
- cookie[1] != INET_DIAG_NOCOOKIE) &&
- ((u32)(unsigned long)sk != cookie[0] ||
- (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
- return -ESTALE;
- else
+ while (1) {
+ u64 res = atomic64_read(&sk->sk_cookie);
+
+ if (res)
+ return res;
+ res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+ atomic64_cmpxchg(&sk->sk_cookie, 0, res);
+ }
+}
+
+int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie)
+{
+ u64 res;
+
+ if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE)
return 0;
+
+ res = sock_gen_cookie(sk);
+ if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1])
+ return -ESTALE;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
-void sock_diag_save_cookie(void *sk, __u32 *cookie)
+void sock_diag_save_cookie(struct sock *sk, __u32 *cookie)
{
- cookie[0] = (u32)(unsigned long)sk;
- cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+ u64 res = sock_gen_cookie(sk);
+
+ cookie[0] = (u32)res;
+ cookie[1] = (u32)(res >> 32);
}
EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e45b968..207281a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -641,6 +641,8 @@
ireq = inet_rsk(req);
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+ ireq->ireq_net = sock_net(sk);
+ atomic64_set(&ireq->ir_cookie, 0);
/*
* Step 3: Process LISTEN state
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14d02ea..34581f9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -678,6 +678,8 @@
newsk->sk_write_space = sk_stream_write_space;
newsk->sk_mark = inet_rsk(req)->ir_mark;
+ atomic64_set(&newsk->sk_cookie,
+ atomic64_read(&inet_rsk(req)->ir_cookie));
newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ac3bfb4..29317ff 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -221,12 +221,13 @@
user_ns, portid, seq, nlmsg_flags, unlh);
}
-static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
const struct inet_diag_req_v2 *req,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
+ struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
s32 tmo;
@@ -247,7 +248,7 @@
r->idiag_retrans = 0;
r->id.idiag_if = tw->tw_bound_dev_if;
- sock_diag_save_cookie(tw, r->id.idiag_cookie);
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
r->id.idiag_sport = tw->tw_sport;
r->id.idiag_dport = tw->tw_dport;
@@ -283,7 +284,7 @@
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
+ return inet_twsk_diag_fill(sk, skb, r, portid, seq,
nlmsg_flags, unlh);
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
@@ -675,7 +676,7 @@
if (!inet_diag_bc_sk(bc, sk))
return 0;
- return inet_twsk_diag_fill(inet_twsk(sk), skb, r,
+ return inet_twsk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -734,7 +735,10 @@
r->idiag_retrans = req->num_retrans;
r->id.idiag_if = sk->sk_bound_dev_if;
- sock_diag_save_cookie(req, r->id.idiag_cookie);
+
+ BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
+ offsetof(struct sock, sk_cookie));
+ sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie);
tmo = req->expires - jiffies;
if (tmo < 0)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 6d592f8..2bd9805 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -195,6 +195,7 @@
tw->tw_ipv6only = 0;
tw->tw_transparent = inet->transparent;
tw->tw_prot = sk->sk_prot_creator;
+ atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, hold_net(sock_net(sk)));
/*
* Because we use RCU lookups, we should not set tw_refcnt
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 45fe60c..ece31b4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -346,6 +346,7 @@
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->listener = NULL;
+ ireq->ireq_net = sock_net(sk);
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fb4cf8b..d7045f5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5965,6 +5965,8 @@
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
+ inet_rsk(req)->ireq_net = sock_net(sk);
+ atomic64_set(&inet_rsk(req)->ir_cookie, 0);
af_ops->init_req(req, sk, skb);