bridge: per vlan dst_metadata netlink support
This patch adds support to attach per vlan tunnel info dst
metadata. This enables bridge driver to map vlan to tunnel_info
at ingress and egress. It uses the kernel dst_metadata infrastructure.
The initial use case is vlan to vni bridging, but the api is generic
to extend to any tunnel_info in the future:
- Uapi to configure/unconfigure/dump per vlan tunnel data
- netlink functions to configure vlan and tunnel_info mapping
- Introduces bridge port flag BR_LWT_VLAN to enable attach/detach
dst_metadata to bridged packets on ports. off by default.
- changes to existing code is mainly refactor some existing vlan
handling netlink code + hooks for new vlan tunnel code
- I have kept the vlan tunnel code isolated in separate files.
- most of the netlink vlan tunnel code is handling of vlan-tunid
ranges (follows the vlan range handling code). To conserve space
vlan-tunid by default are always dumped in ranges if applicable.
Use case:
example use for this is a vxlan bridging gateway or vtep
which maps vlans to vn-segments (or vnis).
iproute2 example (patched and pruned iproute2 output to just show
relevant fdb entries):
example shows same host mac learnt on two vni's and
vlan 100 maps to vni 1000, vlan 101 maps to vni 1001
before (netdev per vni):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan1001 vlan 101 master bridge
00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan1000 vlan 100 master bridge
00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self
after this patch with collect metdata in bridged mode (single netdev):
$bridge fdb show | grep "00:02:00:00:00:03"
00:02:00:00:00:03 dev vxlan0 vlan 101 master bridge
00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self
00:02:00:00:00:03 dev vxlan0 vlan 100 master bridge
00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self
CC: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
new file mode 100644
index 0000000..b3fd29d
--- /dev/null
+++ b/net/bridge/br_vlan_tunnel.c
@@ -0,0 +1,149 @@
+/*
+ * Bridge per vlan tunnel port dst_metadata handling code
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/switchdev.h>
+#include <net/dst_metadata.h>
+
+#include "br_private.h"
+#include "br_private_tunnel.h"
+
+static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct net_bridge_vlan *vle = ptr;
+ __be64 tunid = *(__be64 *)arg->key;
+
+ return vle->tinfo.tunnel_id != tunid;
+}
+
+static const struct rhashtable_params br_vlan_tunnel_rht_params = {
+ .head_offset = offsetof(struct net_bridge_vlan, tnode),
+ .key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id),
+ .key_len = sizeof(__be64),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = br_vlan_tunid_cmp,
+ .automatic_shrinking = true,
+};
+
+void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *vlan)
+{
+ if (!vlan->tinfo.tunnel_dst)
+ return;
+ rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
+ br_vlan_tunnel_rht_params);
+ vlan->tinfo.tunnel_id = 0;
+ dst_release(&vlan->tinfo.tunnel_dst->dst);
+ vlan->tinfo.tunnel_dst = NULL;
+}
+
+static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *vlan, u32 tun_id)
+{
+ struct metadata_dst *metadata = NULL;
+ __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
+ int err;
+
+ if (vlan->tinfo.tunnel_dst)
+ return -EEXIST;
+
+ metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ key, 0);
+ if (!metadata)
+ return -EINVAL;
+
+ metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
+ vlan->tinfo.tunnel_dst = metadata;
+ vlan->tinfo.tunnel_id = key;
+
+ err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
+ br_vlan_tunnel_rht_params);
+ if (err)
+ goto out;
+
+ return 0;
+out:
+ dst_release(&vlan->tinfo.tunnel_dst->dst);
+
+ return err;
+}
+
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
+int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ ASSERT_RTNL();
+
+ vg = nbp_vlan_group(port);
+ vlan = br_vlan_find(vg, vid);
+ if (!vlan)
+ return -EINVAL;
+
+ return __vlan_tunnel_info_add(vg, vlan, tun_id);
+}
+
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
+int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+
+ ASSERT_RTNL();
+
+ vg = nbp_vlan_group(port);
+ v = br_vlan_find(vg, vid);
+ if (!v)
+ return -ENOENT;
+
+ vlan_tunnel_info_del(vg, v);
+
+ return 0;
+}
+
+static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg)
+{
+ struct net_bridge_vlan *vlan, *tmp;
+
+ list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
+ vlan_tunnel_info_del(vg, vlan);
+}
+
+void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port)
+{
+ struct net_bridge_vlan_group *vg;
+
+ ASSERT_RTNL();
+
+ vg = nbp_vlan_group(port);
+ __vlan_tunnel_info_flush(vg);
+}
+
+int vlan_tunnel_init(struct net_bridge_vlan_group *vg)
+{
+ return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params);
+}
+
+void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
+{
+ rhashtable_destroy(&vg->tunnel_hash);
+}