tipc: transfer broadcast nacks in link state messages
When we send broadcasts in clusters of more 70-80 nodes, we sometimes
see the broadcast link resetting because of an excessive number of
retransmissions. This is caused by a combination of two factors:
1) A 'NACK crunch", where loss of broadcast packets is discovered
and NACK'ed by several nodes simultaneously, leading to multiple
redundant broadcast retransmissions.
2) The fact that the NACKS as such also are sent as broadcast, leading
to excessive load and packet loss on the transmitting switch/bridge.
This commit deals with the latter problem, by moving sending of
broadcast nacks from the dedicated BCAST_PROTOCOL/NACK message type
to regular unicast LINK_PROTOCOL/STATE messages. We allocate 10 unused
bits in word 8 of the said message for this purpose, and introduce a
new capability bit, TIPC_BCAST_STATE_NACK in order to keep the change
backwards compatible.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7e8b75f..7ef14e2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1262,6 +1262,34 @@
kfree_skb(skb);
}
+static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
+ int bearer_id, struct sk_buff_head *xmitq)
+{
+ struct tipc_link *ucl;
+ int rc;
+
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+
+ if (rc & TIPC_LINK_DOWN_EVT) {
+ tipc_bearer_reset_all(n->net);
+ return;
+ }
+
+ if (!(rc & TIPC_LINK_SND_STATE))
+ return;
+
+ /* If probe message, a STATE response will be sent anyway */
+ if (msg_probe(hdr))
+ return;
+
+ /* Produce a STATE message carrying broadcast NACK */
+ tipc_node_read_lock(n);
+ ucl = n->links[bearer_id].link;
+ if (ucl)
+ tipc_link_build_state_msg(ucl, xmitq);
+ tipc_node_read_unlock(n);
+}
+
/**
* tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
* @net: the applicable net namespace
@@ -1298,7 +1326,7 @@
rc = tipc_bcast_rcv(net, be->link, skb);
/* Broadcast ACKs are sent on a unicast link */
- if (rc & TIPC_LINK_SND_BC_ACK) {
+ if (rc & TIPC_LINK_SND_STATE) {
tipc_node_read_lock(n);
tipc_link_build_state_msg(le->link, &xmitq);
tipc_node_read_unlock(n);
@@ -1505,7 +1533,7 @@
/* Ensure broadcast reception is in synch with peer's send state */
if (unlikely(usr == LINK_PROTOCOL))
- tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+ tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);