rxrpc: Fix lockup due to no error backoff after ack transmit error If the network becomes (partially) unavailable, say by disabling IPv6, the background ACK transmission routine can get itself into a tizzy by proposing immediate ACK retransmission. Since we're in the call event processor, that happens immediately without returning to the workqueue manager. The condition should clear after a while when either the network comes back or the call times out. Fix this by: (1) When re-proposing an ACK on failed Tx, don't schedule it immediately. This will allow a certain amount of time to elapse before we try again. (2) Enforce a return to the workqueue manager after a certain number of iterations of the call processing loop. (3) Add a backoff delay that increases the delay on deferred ACKs by a jiffy per failed transmission to a limit of HZ. The backoff delay is cleared on a successful return from kernel_sendmsg(). (4) Cancel calls immediately if the opening sendmsg fails. The layer above can arrange retransmission or rotate to another server. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>

commit: c7e86acfcee30794dc99a0759924bf7b9d43f1ca [log] [tgz]
author: David Howells <dhowells@redhat.com> Thu Nov 01 13:39:53 2018 +0000
committer: David S. Miller <davem@davemloft.net> Fri Nov 02 23:59:26 2018 -0700
tree: c31ab320a0a156e97a0442c30e8859071a11d178
parent: 284fb78ed7572117846f8e1d1d8e3dbfd16880c2 [diff] [blame]
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 1894188..736aa92 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c

@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
 static const char rxrpc_keepalive_string[] = "";
 
 /*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+	if (ret < 0) {
+		u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+		if (tx_backoff < HZ)
+			WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+	} else {
+		WRITE_ONCE(call->tx_backoff, 0);
+	}
+}
+
+/*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
  * the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
 				      rxrpc_tx_point_call_ack);
+	rxrpc_tx_backoff(call, ret);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 			rxrpc_propose_ACK(call, pkt->ack.reason,
 					  ntohs(pkt->ack.maxSkew),
 					  ntohl(pkt->ack.serial),
-					  true, true,
+					  false, true,
 					  rxrpc_propose_ack_retry_tx);
 		} else {
 			spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
 				      rxrpc_tx_point_call_abort);
-
+	rxrpc_tx_backoff(call, ret);
 
 	rxrpc_put_connection(conn);
 	return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_nofrag);
+	rxrpc_tx_backoff(call, ret);
 	if (ret == -EMSGSIZE)
 		goto send_fragmentable;
 
@@ -445,9 +462,18 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 			rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
 						rxrpc_timer_set_for_normal);
 		}
-	}
 
-	rxrpc_set_keepalive(call);
+		rxrpc_set_keepalive(call);
+	} else {
+		/* Cancel the call if the initial transmission fails,
+		 * particularly if that's due to network routing issues that
+		 * aren't going away anytime soon.  The layer above can arrange
+		 * the retransmission.
+		 */
+		if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+			rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+						  RX_USER_ABORT, ret);
+	}
 
 	_leave(" = %d [%u]", ret, call->peer->maxdata);
 	return ret;
@@ -506,6 +532,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_frag);
+	rxrpc_tx_backoff(call, ret);
 
 	up_write(&conn->params.local->defrag_sem);
 	goto done;
commit	c7e86acfcee30794dc99a0759924bf7b9d43f1ca	[log] [tgz]
author	David Howells <dhowells@redhat.com>	Thu Nov 01 13:39:53 2018 +0000
committer	David S. Miller <davem@davemloft.net>	Fri Nov 02 23:59:26 2018 -0700
tree	c31ab320a0a156e97a0442c30e8859071a11d178
parent	284fb78ed7572117846f8e1d1d8e3dbfd16880c2 [diff] [blame]