bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock
This patch adds a helper function BPF_FUNC_tcp_sock and it
is currently available for cg_skb and sched_(cls|act):
struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk);
int cg_skb_foo(struct __sk_buff *skb) {
struct bpf_tcp_sock *tp;
struct bpf_sock *sk;
__u32 snd_cwnd;
sk = skb->sk;
if (!sk)
return 1;
tp = bpf_tcp_sock(sk);
if (!tp)
return 1;
snd_cwnd = tp->snd_cwnd;
/* ... */
return 1;
}
A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide
read-only access. bpf_tcp_sock has all the existing tcp_sock's fields
that has already been exposed by the bpf_sock_ops.
i.e. no new tcp_sock's fields are exposed in bpf.h.
This helper returns a pointer to the tcp_sock. If it is not a tcp_sock
or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it
returns NULL. Hence, the caller needs to check for NULL before
accessing it.
The current use case is to expose members from tcp_sock
to allow a cg_skb_bpf_prog to provide per cgroup traffic
policing/shaping.
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b755d55..1b9496c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
static bool type_is_sk_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_SOCKET ||
- type == PTR_TO_SOCK_COMMON;
+ type == PTR_TO_SOCK_COMMON ||
+ type == PTR_TO_TCP_SOCK;
}
static bool reg_type_may_be_null(enum bpf_reg_type type)
{
return type == PTR_TO_MAP_VALUE_OR_NULL ||
type == PTR_TO_SOCKET_OR_NULL ||
- type == PTR_TO_SOCK_COMMON_OR_NULL;
+ type == PTR_TO_SOCK_COMMON_OR_NULL ||
+ type == PTR_TO_TCP_SOCK_OR_NULL;
}
static bool type_is_refcounted(enum bpf_reg_type type)
@@ -407,6 +409,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
[PTR_TO_SOCK_COMMON] = "sock_common",
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
+ [PTR_TO_TCP_SOCK] = "tcp_sock",
+ [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
};
static char slot_type_char[] = {
@@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+ case PTR_TO_TCP_SOCK_OR_NULL:
return true;
default:
return false;
@@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
case PTR_TO_SOCKET:
valid = bpf_sock_is_valid_access(off, size, t, &info);
break;
+ case PTR_TO_TCP_SOCK:
+ valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
+ break;
default:
valid = false;
}
@@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_SOCK_COMMON:
pointer_desc = "sock_common ";
break;
+ case PTR_TO_TCP_SOCK:
+ pointer_desc = "tcp_sock ";
+ break;
default:
break;
}
@@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen;
}
+ } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
+ regs[BPF_REG_0].id = ++env->id_gen;
} else {
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
@@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+ case PTR_TO_TCP_SOCK_OR_NULL:
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
@@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
reg->type = PTR_TO_SOCKET;
} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
reg->type = PTR_TO_SOCK_COMMON;
+ } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
+ reg->type = PTR_TO_TCP_SOCK;
}
if (is_null || !(reg_is_refcounted(reg) ||
reg_may_point_to_spin_lock(reg))) {
@@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+ case PTR_TO_TCP_SOCK_OR_NULL:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
@@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
+ case PTR_TO_TCP_SOCK:
+ case PTR_TO_TCP_SOCK_OR_NULL:
return false;
default:
return true;
@@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
case PTR_TO_SOCK_COMMON:
convert_ctx_access = bpf_sock_convert_ctx_access;
break;
+ case PTR_TO_TCP_SOCK:
+ convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+ break;
default:
continue;
}