net: Add source address lookup op for VRF
Add operation to l3mdev to lookup source address for a given flow.
Add support for the operation to VRF driver and convert existing
IPv4 hooks to use the new lookup.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 8713317..6449976 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -36,6 +36,9 @@
#include <net/addrconf.h>
#include <net/l3mdev.h>
+#define RT_FL_TOS(oldflp4) \
+ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
+
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
@@ -553,9 +556,41 @@
return rth;
}
+/* called under rcu_read_lock */
+static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
+{
+ struct fib_result res = { .tclassid = 0 };
+ struct net *net = dev_net(dev);
+ u32 orig_tos = fl4->flowi4_tos;
+ u8 flags = fl4->flowi4_flags;
+ u8 scope = fl4->flowi4_scope;
+ u8 tos = RT_FL_TOS(fl4);
+
+ if (unlikely(!fl4->daddr))
+ return;
+
+ fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+
+ if (!fib_lookup(net, fl4, &res, 0)) {
+ if (res.type == RTN_LOCAL)
+ fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
+ else
+ fib_select_path(net, &res, fl4, -1);
+ }
+
+ fl4->flowi4_flags = flags;
+ fl4->flowi4_tos = orig_tos;
+ fl4->flowi4_scope = scope;
+}
+
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
.l3mdev_get_rtable = vrf_get_rtable,
+ .l3mdev_get_saddr = vrf_get_saddr,
};
static void vrf_get_drvinfo(struct net_device *dev,
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 87cee05..44a19a1 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -17,12 +17,16 @@
* @l3mdev_fib_table: Get FIB table id to use for lookups
*
* @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ *
+ * @l3mdev_get_saddr: Get source address for a flow
*/
struct l3mdev_ops {
u32 (*l3mdev_fib_table)(const struct net_device *dev);
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
const struct flowi4 *fl4);
+ void (*l3mdev_get_saddr)(struct net_device *dev,
+ struct flowi4 *fl4);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -100,6 +104,25 @@
return rc;
}
+static inline void l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
+{
+ struct net_device *dev;
+
+ if (ifindex) {
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_get_saddr) {
+ dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+ }
+
+ rcu_read_unlock();
+ }
+}
+
#else
static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
@@ -144,6 +167,10 @@
return false;
}
+static inline void l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
+{
+}
#endif
#endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/route.h b/include/net/route.h
index 3e18d90..ee81307 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -266,9 +266,6 @@
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- if (netif_index_is_l3_master(sock_net(sk), oif))
- flow_flags |= FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
protocol, flow_flags, dst, src, dport, sport);
}
@@ -285,6 +282,10 @@
ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
sport, dport, sk);
+ if (!src && oif) {
+ l3mdev_get_saddr(net, oif, fl4);
+ src = fl4->saddr;
+ }
if (!dst || !src) {
rt = __ip_route_output_key(net, fl4);
if (IS_ERR(rt))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b2882cf..e1fc129 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1017,30 +1017,14 @@
fl4 = &fl4_stack;
- /* unconnected socket. If output device is enslaved to a VRF
- * device lookup source address from VRF table. This mimics
- * behavior of ip_route_connect{_init}.
- */
- if (netif_index_is_l3_master(net, ipc.oif)) {
- flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
- RT_SCOPE_UNIVERSE, sk->sk_protocol,
- (flow_flags | FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF),
- faddr, saddr, dport,
- inet->inet_sport);
-
- rt = ip_route_output_flow(net, fl4, sk);
- if (!IS_ERR(rt)) {
- saddr = fl4->saddr;
- ip_rt_put(rt);
- }
- }
-
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
flow_flags,
faddr, saddr, dport, inet->inet_sport);
+ if (!saddr && ipc.oif)
+ l3mdev_get_saddr(net, ipc.oif, fl4);
+
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt)) {