net: fec: Add Scatter/gather support
Add Scatter/gather support for FEC.
This feature allows to improve outbound throughput performance.
Tested on imx6dl sabresd board:
Running iperf tests shows a 55.4% improvement.
$ ethtool -K eth0 sg off
$ iperf -c 10.192.242.167 -t 3 &
[ 3] local 10.192.242.108 port 52618 connected with 10.192.242.167 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 3.0 sec 99.5 MBytes 278 Mbits/sec
$ ethtool -K eth0 sg on
$ iperf -c 10.192.242.167 -t 3 &
[ 3] local 10.192.242.108 port 52617 connected with 10.192.242.167 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 3.0 sec 154 MBytes 432 Mbits/sec
CC: Li Frank <B20596@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5ffd323..e7ce14d 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -221,7 +221,7 @@
#define BD_ENET_TX_RCMASK ((ushort)0x003c)
#define BD_ENET_TX_UN ((ushort)0x0002)
#define BD_ENET_TX_CSL ((ushort)0x0001)
-#define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */
+#define BD_ENET_TX_STATS ((ushort)0x0fff) /* All status bits */
/*enhanced buffer descriptor control/status used by Ethernet transmit*/
#define BD_ENET_TX_INT 0x40000000
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index b27a729..bea00a8 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -289,6 +289,16 @@
return ((const char *)bdp - (const char *)base) / fep->bufdesc_size;
}
+static int fec_enet_get_free_txdesc_num(struct fec_enet_private *fep)
+{
+ int entries;
+
+ entries = ((const char *)fep->dirty_tx -
+ (const char *)fep->cur_tx) / fep->bufdesc_size - 1;
+
+ return entries > 0 ? entries : entries + fep->tx_ring_size;
+}
+
static void *swap_buffer(void *bufaddr, int len)
{
int i;
@@ -316,92 +326,12 @@
return 0;
}
-static int txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
+static void
+fec_enet_submit_work(struct bufdesc *bdp, struct fec_enet_private *fep)
{
- struct fec_enet_private *fep = netdev_priv(ndev);
const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev);
- struct bufdesc *bdp, *bdp_pre;
- void *bufaddr;
- unsigned short status;
- unsigned int index;
-
- /* Fill in a Tx ring entry */
- bdp = fep->cur_tx;
-
- status = bdp->cbd_sc;
-
- /* Protocol checksum off-load for TCP and UDP. */
- if (fec_enet_clear_csum(skb, ndev)) {
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
- }
-
- /* Clear all of the status flags */
- status &= ~BD_ENET_TX_STATS;
-
- /* Set buffer length and buffer pointer */
- bufaddr = skb->data;
- bdp->cbd_datlen = skb->len;
-
- index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
-
- if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
- memcpy(fep->tx_bounce[index], skb->data, skb->len);
- bufaddr = fep->tx_bounce[index];
- }
-
- /*
- * Some design made an incorrect assumption on endian mode of
- * the system that it's running on. As the result, driver has to
- * swap every frame going to and coming from the controller.
- */
- if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
- swap_buffer(bufaddr, skb->len);
-
- /* Save skb pointer */
- fep->tx_skbuff[index] = skb;
-
- /* Push the data cache so the CPM does not get stale memory
- * data.
- */
- bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
- skb->len, DMA_TO_DEVICE);
- if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
- bdp->cbd_bufaddr = 0;
- fep->tx_skbuff[index] = NULL;
- dev_kfree_skb_any(skb);
- if (net_ratelimit())
- netdev_err(ndev, "Tx DMA memory map failed\n");
- return NETDEV_TX_OK;
- }
-
- if (fep->bufdesc_ex) {
-
- struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
- ebdp->cbd_bdu = 0;
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- fep->hwts_tx_en)) {
- ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT);
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
- } else {
- ebdp->cbd_esc = BD_ENET_TX_INT;
-
- /* Enable protocol checksum flags
- * We do not bother with the IP Checksum bits as they
- * are done by the kernel
- */
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- ebdp->cbd_esc |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
- }
- }
-
- /* Send it on its way. Tell FEC it's ready, interrupt when done,
- * it's the last BD of the frame, and to put the CRC on the end.
- */
- status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
- | BD_ENET_TX_LAST | BD_ENET_TX_TC);
- bdp->cbd_sc = status;
+ struct bufdesc *bdp_pre;
bdp_pre = fec_enet_get_prevdesc(bdp, fep);
if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
@@ -410,9 +340,189 @@
schedule_delayed_work(&(fep->delay_work.delay_work),
msecs_to_jiffies(1));
}
+}
+
+static int
+fec_enet_txq_submit_frag_skb(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+ struct bufdesc *bdp = fep->cur_tx;
+ struct bufdesc_ex *ebdp;
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int frag, frag_len;
+ unsigned short status;
+ unsigned int estatus = 0;
+ skb_frag_t *this_frag;
+ unsigned int index;
+ void *bufaddr;
+ int i;
+
+ for (frag = 0; frag < nr_frags; frag++) {
+ this_frag = &skb_shinfo(skb)->frags[frag];
+ bdp = fec_enet_get_nextdesc(bdp, fep);
+ ebdp = (struct bufdesc_ex *)bdp;
+
+ status = bdp->cbd_sc;
+ status &= ~BD_ENET_TX_STATS;
+ status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+ frag_len = skb_shinfo(skb)->frags[frag].size;
+
+ /* Handle the last BD specially */
+ if (frag == nr_frags - 1) {
+ status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+ if (fep->bufdesc_ex) {
+ estatus |= BD_ENET_TX_INT;
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+ estatus |= BD_ENET_TX_TS;
+ }
+ }
+
+ if (fep->bufdesc_ex) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+ ebdp->cbd_bdu = 0;
+ ebdp->cbd_esc = estatus;
+ }
+
+ bufaddr = page_address(this_frag->page.p) + this_frag->page_offset;
+
+ index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+ if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+ id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+ memcpy(fep->tx_bounce[index], bufaddr, frag_len);
+ bufaddr = fep->tx_bounce[index];
+
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(bufaddr, frag_len);
+ }
+
+ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+ frag_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "Tx DMA memory map failed\n");
+ goto dma_mapping_error;
+ }
+
+ bdp->cbd_datlen = frag_len;
+ bdp->cbd_sc = status;
+ }
+
+ fep->cur_tx = bdp;
+
+ return 0;
+
+dma_mapping_error:
+ bdp = fep->cur_tx;
+ for (i = 0; i < frag; i++) {
+ bdp = fec_enet_get_nextdesc(bdp, fep);
+ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+ bdp->cbd_datlen, DMA_TO_DEVICE);
+ }
+ return NETDEV_TX_OK;
+}
+
+static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct fec_enet_private *fep = netdev_priv(ndev);
+ const struct platform_device_id *id_entry =
+ platform_get_device_id(fep->pdev);
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct bufdesc *bdp, *last_bdp;
+ void *bufaddr;
+ unsigned short status;
+ unsigned short buflen;
+ unsigned int estatus = 0;
+ unsigned int index;
+ int ret;
+
+ /* Protocol checksum off-load for TCP and UDP. */
+ if (fec_enet_clear_csum(skb, ndev)) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* Fill in a Tx ring entry */
+ bdp = fep->cur_tx;
+ status = bdp->cbd_sc;
+ status &= ~BD_ENET_TX_STATS;
+
+ /* Set buffer length and buffer pointer */
+ bufaddr = skb->data;
+ buflen = skb_headlen(skb);
+
+ index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+ if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+ id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+ memcpy(fep->tx_bounce[index], skb->data, buflen);
+ bufaddr = fep->tx_bounce[index];
+
+ if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+ swap_buffer(bufaddr, buflen);
+ }
+
+ /* Push the data cache so the CPM does not get stale memory
+ * data.
+ */
+ bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
+ buflen, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+ dev_kfree_skb_any(skb);
+ if (net_ratelimit())
+ netdev_err(ndev, "Tx DMA memory map failed\n");
+ return NETDEV_TX_OK;
+ }
+
+ if (nr_frags) {
+ ret = fec_enet_txq_submit_frag_skb(skb, ndev);
+ if (ret)
+ return ret;
+ } else {
+ status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+ if (fep->bufdesc_ex) {
+ estatus = BD_ENET_TX_INT;
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP && fep->hwts_tx_en))
+ estatus |= BD_ENET_TX_TS;
+ }
+ }
+
+ if (fep->bufdesc_ex) {
+
+ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ fep->hwts_tx_en))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+
+ ebdp->cbd_bdu = 0;
+ ebdp->cbd_esc = estatus;
+ }
+
+ last_bdp = fep->cur_tx;
+ index = fec_enet_get_bd_index(fep->tx_bd_base, last_bdp, fep);
+ /* Save skb pointer */
+ fep->tx_skbuff[index] = skb;
+
+ bdp->cbd_datlen = buflen;
+
+ /* Send it on its way. Tell FEC it's ready, interrupt when done,
+ * it's the last BD of the frame, and to put the CRC on the end.
+ */
+ status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
+ bdp->cbd_sc = status;
+
+ fec_enet_submit_work(bdp, fep);
/* If this was the last BD in the ring, start at the beginning again. */
- bdp = fec_enet_get_nextdesc(bdp, fep);
+ bdp = fec_enet_get_nextdesc(last_bdp, fep);
skb_tx_timestamp(skb);
@@ -421,7 +531,7 @@
/* Trigger transmission start */
writel(0, fep->hwp + FEC_X_DES_ACTIVE);
- return NETDEV_TX_OK;
+ return 0;
}
static netdev_tx_t
@@ -430,6 +540,7 @@
struct fec_enet_private *fep = netdev_priv(ndev);
struct bufdesc *bdp;
unsigned short status;
+ int entries_free;
int ret;
/* Fill in a Tx ring entry */
@@ -441,15 +552,17 @@
/* Ooops. All transmit buffers are full. Bail out.
* This should not happen, since ndev->tbusy should be set.
*/
- netdev_err(ndev, "tx queue full!\n");
+ if (net_ratelimit())
+ netdev_err(ndev, "tx queue full!\n");
return NETDEV_TX_BUSY;
}
- ret = txq_submit_skb(skb, ndev);
- if (ret == -EBUSY)
- return NETDEV_TX_BUSY;
+ ret = fec_enet_txq_submit_skb(skb, ndev);
+ if (ret)
+ return ret;
- if (fep->cur_tx == fep->dirty_tx)
+ entries_free = fec_enet_get_free_txdesc_num(fep);
+ if (entries_free < MAX_SKB_FRAGS + 1)
netif_stop_queue(ndev);
return NETDEV_TX_OK;
@@ -770,6 +883,7 @@
unsigned short status;
struct sk_buff *skb;
int index = 0;
+ int entries;
fep = netdev_priv(ndev);
bdp = fep->dirty_tx;
@@ -786,9 +900,13 @@
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
skb = fep->tx_skbuff[index];
- dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, skb->len,
+ dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
DMA_TO_DEVICE);
bdp->cbd_bufaddr = 0;
+ if (!skb) {
+ bdp = fec_enet_get_nextdesc(bdp, fep);
+ continue;
+ }
/* Check for errors. */
if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -807,7 +925,7 @@
ndev->stats.tx_carrier_errors++;
} else {
ndev->stats.tx_packets++;
- ndev->stats.tx_bytes += bdp->cbd_datlen;
+ ndev->stats.tx_bytes += skb->len;
}
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) &&
@@ -844,15 +962,13 @@
/* Since we have freed up a buffer, the ring is no longer full
*/
- if (fep->dirty_tx != fep->cur_tx) {
- if (netif_queue_stopped(ndev))
- netif_wake_queue(ndev);
- }
+ entries = fec_enet_get_free_txdesc_num(fep);
+ if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
}
return;
}
-
/* During a receive, the cur_rx points to the current incoming buffer.
* When we update through the ring, if the next incoming buffer has
* not been given to the system, we just set the empty indicator,
@@ -2095,7 +2211,7 @@
if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
/* enable hw accelerator */
ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
- | NETIF_F_RXCSUM);
+ | NETIF_F_RXCSUM | NETIF_F_SG);
fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
}