net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
flag. The flag then indicates to the kernel that the neighbor entry should be
periodically probed for keeping the entry in NUD_REACHABLE state iff possible.
The use case for this is targeting XDP or tc BPF load-balancers which use
the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
for their backends. Given they cannot be resolved in fast-path, a control
plane inserts the L3 (without L2) entries manually into the neighbor table
and lets the kernel do the neighbor resolution either on the gateway or on
the backend directly in case the latter resides in the same L2. This avoids
to deal with L2 in the control plane and to rebuild what the kernel already
does best anyway.
NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
table which gets triggered by the system work queue to periodically call
neigh_event_send() for performing the resolution. The implementation allows
migration from/to NTF_MANAGED neighbor entries, so that already existing
entries can be converted by the control plane if needed. Potentially, we could
make the interval for periodically calling neigh_event_send() configurable;
right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
has similar driver-internal infrastructure c723c735fa6b ("mlxsw: spectrum_router:
Periodically update the kernel's neigh table"). In future, the latter could
possibly reuse the NTF_MANAGED neighbors as well.
Example:
# ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
# ./ip/ip n
192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
[...]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
Link: https://linuxplumbersconf.org/event/11/contributions/953/
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 26d4ada..e8e48be 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -155,6 +155,7 @@ struct neighbour {
int (*output)(struct neighbour *, struct sk_buff *);
const struct neigh_ops *ops;
struct list_head gc_list;
+ struct list_head managed_list;
struct rcu_head rcu;
struct net_device *dev;
u8 primary_key[0];
@@ -216,11 +217,13 @@ struct neigh_table {
int gc_thresh3;
unsigned long last_flush;
struct delayed_work gc_work;
+ struct delayed_work managed_work;
struct timer_list proxy_timer;
struct sk_buff_head proxy_queue;
atomic_t entries;
atomic_t gc_entries;
struct list_head gc_list;
+ struct list_head managed_list;
rwlock_t lock;
unsigned long last_rand;
struct neigh_statistics __percpu *stats;
@@ -250,17 +253,21 @@ static inline void *neighbour_priv(const struct neighbour *n)
}
/* flags for neigh_update() */
-#define NEIGH_UPDATE_F_OVERRIDE 0x00000001
-#define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002
-#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004
-#define NEIGH_UPDATE_F_USE 0x10000000
-#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000
-#define NEIGH_UPDATE_F_ISROUTER 0x40000000
-#define NEIGH_UPDATE_F_ADMIN 0x80000000
+#define NEIGH_UPDATE_F_OVERRIDE BIT(0)
+#define NEIGH_UPDATE_F_WEAK_OVERRIDE BIT(1)
+#define NEIGH_UPDATE_F_OVERRIDE_ISROUTER BIT(2)
+#define NEIGH_UPDATE_F_USE BIT(3)
+#define NEIGH_UPDATE_F_MANAGED BIT(4)
+#define NEIGH_UPDATE_F_EXT_LEARNED BIT(5)
+#define NEIGH_UPDATE_F_ISROUTER BIT(6)
+#define NEIGH_UPDATE_F_ADMIN BIT(7)
/* In-kernel representation for NDA_FLAGS_EXT flags: */
#define NTF_OLD_MASK 0xff
#define NTF_EXT_SHIFT 8
+#define NTF_EXT_MASK (NTF_EXT_MANAGED)
+
+#define NTF_MANAGED (NTF_EXT_MANAGED << NTF_EXT_SHIFT)
extern const struct nla_policy nda_policy[];