ipv6: Add support for non-equal-cost multipath
The use of hash-threshold instead of modulo-N makes it trivial to add
support for non-equal-cost multipath.
Instead of dividing the multipath hash function's output space equally
between the nexthops, each nexthop is assigned a region size which is
proportional to its weight.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 97cd05d..34ec321d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -171,6 +171,7 @@ struct rt6_info {
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
+ int rt6i_nh_weight;
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
u8 exception_bucket_flushed:1,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7837b8c..1076ae0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2594,6 +2594,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
#endif
rt->rt6i_metric = cfg->fc_metric;
+ rt->rt6i_nh_weight = 1;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
@@ -3507,11 +3508,11 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
int total = 0;
if (!rt6_is_dead(rt))
- total++;
+ total += rt->rt6i_nh_weight;
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
if (!rt6_is_dead(iter))
- total++;
+ total += iter->rt6i_nh_weight;
}
return total;
@@ -3522,7 +3523,7 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- (*weight)++;
+ *weight += rt->rt6i_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
@@ -4024,6 +4025,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
+ rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
dst_release_immediate(&rt->dst);
@@ -4246,7 +4249,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh)
goto nla_put_failure;
- rtnh->rtnh_hops = 0;
+ rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0)