IPv4: Add "offload failed" indication to routes
After installing a route to the kernel, user space receives an
acknowledgment, which means the route was installed in the kernel, but not
necessarily in hardware.
The asynchronous nature of route installation in hardware can lead to a
routing daemon advertising a route before it was actually installed in
hardware. This can result in packet loss or mis-routed packets until the
route is installed in hardware.
To avoid such cases, previous patch set added the ability to emit
RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/RTM_F_TRAP flags
are changed, this behavior is controlled by sysctl.
With the above mentioned behavior, it is possible to know from user-space
if the route was offloaded, but if the offload fails there is no indication
to user-space. Following a failure, a routing daemon will wait indefinitely
for a notification that will never come.
This patch adds an "offload_failed" indication to IPv4 routes, so that
users will have better visibility into the offload process.
'struct fib_alias', and 'struct fib_rt_info' are extended with new field
that indicates if route offload failed. Note that the new field is added
using unused bit and therefore there is no need to increase structs size.
Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index cf111e7..ac9a1743 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4963,6 +4963,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
fri.type = fib4_entry->type;
fri.offload = should_offload;
fri.trap = !should_offload;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
@@ -4985,6 +4986,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
fri.type = fib4_entry->type;
fri.offload = false;
fri.trap = false;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 1779146..ca19da1 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -319,6 +319,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net,
fri.type = fib4_rt->type;
fri.offload = false;
fri.trap = trap;
+ fri.offload_failed = false;
fib_alias_hw_flags_set(net, &fri);
}
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 2ec062a..a914f33 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -213,7 +213,8 @@ struct fib_rt_info {
u8 type;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
};
struct fib_entry_notifier_info {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index aff454e..b58db1c 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -18,7 +18,8 @@ struct fib_alias {
s16 fa_default;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4c38fac..a632b66 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -521,6 +521,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -1811,6 +1812,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (fri->trap)
rtm->rtm_flags |= RTM_F_TRAP;
+ if (fri->offload_failed)
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 60559b7..80147ca 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1047,11 +1047,13 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap)
+ if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
+ fa_match->offload_failed == fri->offload_failed)
goto out;
fa_match->offload = fri->offload;
fa_match->trap = fri->trap;
+ fa_match->offload_failed = fri->offload_failed;
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -1290,6 +1292,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
@@ -1350,6 +1353,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
@@ -2289,6 +2293,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be31e24..02d81d7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3304,6 +3304,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
+ fri.offload_failed = 0;
if (res.fa_head) {
struct fib_alias *fa;