firewire: net: throttle TX queue before running out of tlabels
This prevents firewire-net from submitting write requests in fast
succession until failure due to all 64 transaction labels were used up
for unfinished split transactions. The netif_stop/wake_queue API is
used for this purpose.
Without this stop/wake mechanism, datagrams were simply lost whenever
the tlabel pool was exhausted. Plus, tlabel exhaustion by firewire-net
also prevented other unrelated outbound transactions to be initiated.
The chosen queue depth was checked by me to hit the maximum possible
throughput with an OS X peer whose receive DMA is good enough to never
reject requests due to busy inbound request FIFO. Current Linux peers
show a mixed picture of -5%...+15% change in bandwidth; their current
bottleneck are RCODE_BUSY situations (fewer or more, depending on TX
queue depth) due to too small AR buffer in firewire-ohci.
Maxim Levitsky tested this change with similar watermarks with a Linux
peer and some pending firewire-ohci improvements that address the
RCODE_BUSY problem and confirmed that these TX queue limits are good.
Note: This removes some netif_wake_queue from reception code paths.
They were apparently copy&paste artefacts from a nonsensical
netif_wake_queue use in the older eth1394 driver. This belongs only
into the transmit path.
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index d2d488b..1a467a9 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -27,8 +27,14 @@
#include <asm/unaligned.h>
#include <net/arp.h>
-#define FWNET_MAX_FRAGMENTS 25 /* arbitrary limit */
-#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16 * 1024 ? 4 : 2)
+/* rx limits */
+#define FWNET_MAX_FRAGMENTS 30 /* arbitrary, > TX queue depth */
+#define FWNET_ISO_PAGE_COUNT (PAGE_SIZE < 16*1024 ? 4 : 2)
+
+/* tx limits */
+#define FWNET_MAX_QUEUED_DATAGRAMS 20 /* < 64 = number of tlabels */
+#define FWNET_MIN_QUEUED_DATAGRAMS 10 /* should keep AT DMA busy enough */
+#define FWNET_TX_QUEUE_LEN FWNET_MAX_QUEUED_DATAGRAMS /* ? */
#define IEEE1394_BROADCAST_CHANNEL 31
#define IEEE1394_ALL_NODES (0xffc0 | 0x003f)
@@ -640,8 +646,6 @@
net->stats.rx_packets++;
net->stats.rx_bytes += skb->len;
}
- if (netif_queue_stopped(net))
- netif_wake_queue(net);
return 0;
@@ -650,8 +654,6 @@
net->stats.rx_dropped++;
dev_kfree_skb_any(skb);
- if (netif_queue_stopped(net))
- netif_wake_queue(net);
return -ENOENT;
}
@@ -783,15 +785,10 @@
* Datagram is not complete, we're done for the
* moment.
*/
- spin_unlock_irqrestore(&dev->lock, flags);
-
- return 0;
+ retval = 0;
fail:
spin_unlock_irqrestore(&dev->lock, flags);
- if (netif_queue_stopped(net))
- netif_wake_queue(net);
-
return retval;
}
@@ -891,6 +888,13 @@
kmem_cache_free(fwnet_packet_task_cache, ptask);
}
+/* Caller must hold dev->lock. */
+static void dec_queued_datagrams(struct fwnet_device *dev)
+{
+ if (--dev->queued_datagrams == FWNET_MIN_QUEUED_DATAGRAMS)
+ netif_wake_queue(dev->netdev);
+}
+
static int fwnet_send_packet(struct fwnet_packet_task *ptask);
static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
@@ -907,7 +911,7 @@
/* Check whether we or the networking TX soft-IRQ is last user. */
free = (ptask->outstanding_pkts == 0 && ptask->enqueued);
if (free)
- dev->queued_datagrams--;
+ dec_queued_datagrams(dev);
if (ptask->outstanding_pkts == 0) {
dev->netdev->stats.tx_packets++;
@@ -978,7 +982,7 @@
/* Check whether we or the networking TX soft-IRQ is last user. */
free = ptask->enqueued;
if (free)
- dev->queued_datagrams--;
+ dec_queued_datagrams(dev);
dev->netdev->stats.tx_dropped++;
dev->netdev->stats.tx_errors++;
@@ -1063,7 +1067,7 @@
if (!free)
ptask->enqueued = true;
else
- dev->queued_datagrams--;
+ dec_queued_datagrams(dev);
spin_unlock_irqrestore(&dev->lock, flags);
@@ -1082,7 +1086,7 @@
if (!free)
ptask->enqueued = true;
else
- dev->queued_datagrams--;
+ dec_queued_datagrams(dev);
spin_unlock_irqrestore(&dev->lock, flags);
@@ -1248,6 +1252,15 @@
struct fwnet_peer *peer;
unsigned long flags;
+ spin_lock_irqsave(&dev->lock, flags);
+
+ /* Can this happen? */
+ if (netif_queue_stopped(dev->netdev)) {
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ return NETDEV_TX_BUSY;
+ }
+
ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
if (ptask == NULL)
goto fail;
@@ -1266,9 +1279,6 @@
proto = hdr_buf.h_proto;
dg_size = skb->len;
- /* serialize access to peer, including peer->datagram_label */
- spin_lock_irqsave(&dev->lock, flags);
-
/*
* Set the transmission type for the packet. ARP packets and IP
* broadcast packets are sent via GASP.
@@ -1290,7 +1300,7 @@
peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR)
- goto fail_unlock;
+ goto fail;
generation = peer->generation;
dest_node = peer->node_id;
@@ -1344,7 +1354,8 @@
max_payload += RFC2374_FRAG_HDR_SIZE;
}
- dev->queued_datagrams++;
+ if (++dev->queued_datagrams == FWNET_MAX_QUEUED_DATAGRAMS)
+ netif_stop_queue(dev->netdev);
spin_unlock_irqrestore(&dev->lock, flags);
@@ -1355,9 +1366,9 @@
return NETDEV_TX_OK;
- fail_unlock:
- spin_unlock_irqrestore(&dev->lock, flags);
fail:
+ spin_unlock_irqrestore(&dev->lock, flags);
+
if (ptask)
kmem_cache_free(fwnet_packet_task_cache, ptask);
@@ -1403,7 +1414,7 @@
net->addr_len = FWNET_ALEN;
net->hard_header_len = FWNET_HLEN;
net->type = ARPHRD_IEEE1394;
- net->tx_queue_len = 10;
+ net->tx_queue_len = FWNET_TX_QUEUE_LEN;
}
/* caller must hold fwnet_device_mutex */