Merge branch 'bnxt_en-Add-XDP-support'

Michael Chan says:

====================
bnxt_en: Add XDP support.

The first 10 patches refactor the code (rx/tx code paths and ring logic)
and add the basic infrastructure to support XDP.  The 11th patch adds
basic ndo_xdp to support XDP_DROP and XDP_PASS only.  The 12th patch
completes the series with XDP_TX.

Thanks to Andy Gospodarek for testing and uncovering some bugs.

v3: Removed Kconfig option.
    Pass modified offset and length to stack for XDP_PASS.
    Improved buffer recycling scheme for XDP_TX.
    Other minor fixes.

v2: Addressed review comments from Alexei Starovoitov, Jakub Kicinski,
and David Miller:
	- Added missing dma syncs.
	- Added XDP headroom support.
	- Added tracing in exception path.
	- Clarified a parameter change.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 6082ed1..a7ca45b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aff3dc1..cda1c78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -33,6 +33,7 @@
 #include <linux/if.h>
 #include <linux/if_vlan.h>
 #include <linux/rtc.h>
+#include <linux/bpf.h>
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -53,6 +54,7 @@
 #include "bnxt_sriov.h"
 #include "bnxt_ethtool.h"
 #include "bnxt_dcb.h"
+#include "bnxt_xdp.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -210,16 +212,7 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 #define BNXT_CP_DB_IRQ_DIS(db)						\
 		writel(DB_CP_IRQ_DIS_FLAGS, db)
 
-static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
-{
-	/* Tell compiler to fetch tx indices from memory. */
-	barrier();
-
-	return bp->tx_ring_size -
-		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
-}
-
-static const u16 bnxt_lhint_arr[] = {
+const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_512_AND_SMALLER,
 	TX_BD_FLAGS_LHINT_512_TO_1023,
 	TX_BD_FLAGS_LHINT_1024_TO_2047,
@@ -262,8 +255,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
-	txr = &bp->tx_ring[i];
 	txq = netdev_get_tx_queue(dev, i);
+	txr = &bp->tx_ring[bp->tx_ring_map[i]];
 	prod = txr->tx_prod;
 
 	free_size = bnxt_tx_avail(bp, txr);
@@ -509,8 +502,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 {
 	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
-	int index = txr - &bp->tx_ring[0];
-	struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, index);
+	struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
 	u16 cons = txr->tx_cons;
 	struct pci_dev *pdev = bp->pdev;
 	int i;
@@ -573,6 +565,25 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 	}
 }
 
+static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+					 gfp_t gfp)
+{
+	struct device *dev = &bp->pdev->dev;
+	struct page *page;
+
+	page = alloc_page(gfp);
+	if (!page)
+		return NULL;
+
+	*mapping = dma_map_page(dev, page, 0, PAGE_SIZE, bp->rx_dir);
+	if (dma_mapping_error(dev, *mapping)) {
+		__free_page(page);
+		return NULL;
+	}
+	*mapping += bp->rx_dma_offset;
+	return page;
+}
+
 static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping,
 				       gfp_t gfp)
 {
@@ -583,8 +594,8 @@ static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping,
 	if (!data)
 		return NULL;
 
-	*mapping = dma_map_single(&pdev->dev, data + BNXT_RX_DMA_OFFSET,
-				  bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+	*mapping = dma_map_single(&pdev->dev, data + bp->rx_dma_offset,
+				  bp->rx_buf_use_size, bp->rx_dir);
 
 	if (dma_mapping_error(&pdev->dev, *mapping)) {
 		kfree(data);
@@ -593,29 +604,37 @@ static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, dma_addr_t *mapping,
 	return data;
 }
 
-static inline int bnxt_alloc_rx_data(struct bnxt *bp,
-				     struct bnxt_rx_ring_info *rxr,
-				     u16 prod, gfp_t gfp)
+int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+		       u16 prod, gfp_t gfp)
 {
 	struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
 	struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod];
-	u8 *data;
 	dma_addr_t mapping;
 
-	data = __bnxt_alloc_rx_data(bp, &mapping, gfp);
-	if (!data)
-		return -ENOMEM;
+	if (BNXT_RX_PAGE_MODE(bp)) {
+		struct page *page = __bnxt_alloc_rx_page(bp, &mapping, gfp);
 
-	rx_buf->data = data;
-	dma_unmap_addr_set(rx_buf, mapping, mapping);
+		if (!page)
+			return -ENOMEM;
+
+		rx_buf->data = page;
+		rx_buf->data_ptr = page_address(page) + bp->rx_offset;
+	} else {
+		u8 *data = __bnxt_alloc_rx_data(bp, &mapping, gfp);
+
+		if (!data)
+			return -ENOMEM;
+
+		rx_buf->data = data;
+		rx_buf->data_ptr = data + bp->rx_offset;
+	}
+	rx_buf->mapping = mapping;
 
 	rxbd->rx_bd_haddr = cpu_to_le64(mapping);
-
 	return 0;
 }
 
-static void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons,
-			       u8 *data)
+void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data)
 {
 	u16 prod = rxr->rx_prod;
 	struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
@@ -625,9 +644,9 @@ static void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons,
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 
 	prod_rx_buf->data = data;
+	prod_rx_buf->data_ptr = cons_rx_buf->data_ptr;
 
-	dma_unmap_addr_set(prod_rx_buf, mapping,
-			   dma_unmap_addr(cons_rx_buf, mapping));
+	prod_rx_buf->mapping = cons_rx_buf->mapping;
 
 	prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
 	cons_bd = &rxr->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)];
@@ -753,13 +772,60 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_napi *bnapi, u16 cp_cons,
 	rxr->rx_sw_agg_prod = sw_prod;
 }
 
+static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
+					struct bnxt_rx_ring_info *rxr,
+					u16 cons, void *data, u8 *data_ptr,
+					dma_addr_t dma_addr,
+					unsigned int offset_and_len)
+{
+	unsigned int payload = offset_and_len >> 16;
+	unsigned int len = offset_and_len & 0xffff;
+	struct skb_frag_struct *frag;
+	struct page *page = data;
+	u16 prod = rxr->rx_prod;
+	struct sk_buff *skb;
+	int off, err;
+
+	err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC);
+	if (unlikely(err)) {
+		bnxt_reuse_rx_data(rxr, cons, data);
+		return NULL;
+	}
+	dma_addr -= bp->rx_dma_offset;
+	dma_unmap_page(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir);
+
+	if (unlikely(!payload))
+		payload = eth_get_headlen(data_ptr, len);
+
+	skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
+	if (!skb) {
+		__free_page(page);
+		return NULL;
+	}
+
+	off = (void *)data_ptr - page_address(page);
+	skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
+	memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
+	       payload + NET_IP_ALIGN);
+
+	frag = &skb_shinfo(skb)->frags[0];
+	skb_frag_size_sub(frag, payload);
+	frag->page_offset += payload;
+	skb->data_len -= payload;
+	skb->tail += payload;
+
+	return skb;
+}
+
 static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
 				   struct bnxt_rx_ring_info *rxr, u16 cons,
-				   u16 prod, u8 *data, dma_addr_t dma_addr,
-				   unsigned int len)
+				   void *data, u8 *data_ptr,
+				   dma_addr_t dma_addr,
+				   unsigned int offset_and_len)
 {
-	int err;
+	u16 prod = rxr->rx_prod;
 	struct sk_buff *skb;
+	int err;
 
 	err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC);
 	if (unlikely(err)) {
@@ -769,14 +835,14 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
 
 	skb = build_skb(data, 0);
 	dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
-			 PCI_DMA_FROMDEVICE);
+			 bp->rx_dir);
 	if (!skb) {
 		kfree(data);
 		return NULL;
 	}
 
-	skb_reserve(skb, BNXT_RX_OFFSET);
-	skb_put(skb, len);
+	skb_reserve(skb, bp->rx_offset);
+	skb_put(skb, offset_and_len & 0xffff);
 	return skb;
 }
 
@@ -812,7 +878,7 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_napi *bnapi,
 		 * a sw_prod index that equals the cons index, so we
 		 * need to clear the cons entry now.
 		 */
-		mapping = dma_unmap_addr(cons_rx_buf, mapping);
+		mapping = cons_rx_buf->mapping;
 		page = cons_rx_buf->page;
 		cons_rx_buf->page = NULL;
 
@@ -875,14 +941,14 @@ static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data,
 	if (!skb)
 		return NULL;
 
-	dma_sync_single_for_cpu(&pdev->dev, mapping,
-				bp->rx_copy_thresh, PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copy_thresh,
+				bp->rx_dir);
 
-	memcpy(skb->data - BNXT_RX_OFFSET, data, len + BNXT_RX_OFFSET);
+	memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN,
+	       len + NET_IP_ALIGN);
 
-	dma_sync_single_for_device(&pdev->dev, mapping,
-				   bp->rx_copy_thresh,
-				   PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copy_thresh,
+				   bp->rx_dir);
 
 	skb_put(skb, len);
 	return skb;
@@ -951,17 +1017,19 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	}
 
 	prod_rx_buf->data = tpa_info->data;
+	prod_rx_buf->data_ptr = tpa_info->data_ptr;
 
 	mapping = tpa_info->mapping;
-	dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
+	prod_rx_buf->mapping = mapping;
 
 	prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
 
 	prod_bd->rx_bd_haddr = cpu_to_le64(mapping);
 
 	tpa_info->data = cons_rx_buf->data;
+	tpa_info->data_ptr = cons_rx_buf->data_ptr;
 	cons_rx_buf->data = NULL;
-	tpa_info->mapping = dma_unmap_addr(cons_rx_buf, mapping);
+	tpa_info->mapping = cons_rx_buf->mapping;
 
 	tpa_info->len =
 		le32_to_cpu(tpa_start->rx_tpa_start_cmp_len_flags_type) >>
@@ -1187,17 +1255,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 					   u32 *raw_cons,
 					   struct rx_tpa_end_cmp *tpa_end,
 					   struct rx_tpa_end_cmp_ext *tpa_end1,
-					   bool *agg_event)
+					   u8 *event)
 {
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 	u8 agg_id = TPA_END_AGG_ID(tpa_end);
-	u8 *data, agg_bufs;
+	u8 *data_ptr, agg_bufs;
 	u16 cp_cons = RING_CMP(*raw_cons);
 	unsigned int len;
 	struct bnxt_tpa_info *tpa_info;
 	dma_addr_t mapping;
 	struct sk_buff *skb;
+	void *data;
 
 	if (unlikely(bnapi->in_reset)) {
 		int rc = bnxt_discard_rx(bp, bnapi, raw_cons, tpa_end);
@@ -1209,7 +1278,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 
 	tpa_info = &rxr->rx_tpa[agg_id];
 	data = tpa_info->data;
-	prefetch(data);
+	data_ptr = tpa_info->data_ptr;
+	prefetch(data_ptr);
 	len = tpa_info->len;
 	mapping = tpa_info->mapping;
 
@@ -1220,7 +1290,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons))
 			return ERR_PTR(-EBUSY);
 
-		*agg_event = true;
+		*event |= BNXT_AGG_EVENT;
 		cp_cons = NEXT_CMP(cp_cons);
 	}
 
@@ -1232,7 +1302,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	}
 
 	if (len <= bp->rx_copy_thresh) {
-		skb = bnxt_copy_skb(bnapi, data, len, mapping);
+		skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
 		if (!skb) {
 			bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
 			return NULL;
@@ -1248,18 +1318,19 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		}
 
 		tpa_info->data = new_data;
+		tpa_info->data_ptr = new_data + bp->rx_offset;
 		tpa_info->mapping = new_mapping;
 
 		skb = build_skb(data, 0);
 		dma_unmap_single(&bp->pdev->dev, mapping, bp->rx_buf_use_size,
-				 PCI_DMA_FROMDEVICE);
+				 bp->rx_dir);
 
 		if (!skb) {
 			kfree(data);
 			bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
 			return NULL;
 		}
-		skb_reserve(skb, BNXT_RX_OFFSET);
+		skb_reserve(skb, bp->rx_offset);
 		skb_put(skb, len);
 	}
 
@@ -1305,7 +1376,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
  * -EIO    - packet aborted due to hw error indicated in BD
  */
 static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
-		       bool *agg_event)
+		       u8 *event)
 {
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
@@ -1316,10 +1387,12 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
 	struct bnxt_sw_rx_bd *rx_buf;
 	unsigned int len;
-	u8 *data, agg_bufs, cmp_type;
+	u8 *data_ptr, agg_bufs, cmp_type;
 	dma_addr_t dma_addr;
 	struct sk_buff *skb;
+	void *data;
 	int rc = 0;
+	u32 misc;
 
 	rxcmp = (struct rx_cmp *)
 			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
@@ -1340,13 +1413,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 		bnxt_tpa_start(bp, rxr, (struct rx_tpa_start_cmp *)rxcmp,
 			       (struct rx_tpa_start_cmp_ext *)rxcmp1);
 
+		*event |= BNXT_RX_EVENT;
 		goto next_rx_no_prod;
 
 	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
 		skb = bnxt_tpa_end(bp, bnapi, &tmp_raw_cons,
 				   (struct rx_tpa_end_cmp *)rxcmp,
-				   (struct rx_tpa_end_cmp_ext *)rxcmp1,
-				   agg_event);
+				   (struct rx_tpa_end_cmp_ext *)rxcmp1, event);
 
 		if (unlikely(IS_ERR(skb)))
 			return -EBUSY;
@@ -1357,30 +1430,33 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 			napi_gro_receive(&bnapi->napi, skb);
 			rc = 1;
 		}
+		*event |= BNXT_RX_EVENT;
 		goto next_rx_no_prod;
 	}
 
 	cons = rxcmp->rx_cmp_opaque;
 	rx_buf = &rxr->rx_buf_ring[cons];
 	data = rx_buf->data;
+	data_ptr = rx_buf->data_ptr;
 	if (unlikely(cons != rxr->rx_next_cons)) {
 		int rc1 = bnxt_discard_rx(bp, bnapi, raw_cons, rxcmp);
 
 		bnxt_sched_reset(bp, rxr);
 		return rc1;
 	}
-	prefetch(data);
+	prefetch(data_ptr);
 
-	agg_bufs = (le32_to_cpu(rxcmp->rx_cmp_misc_v1) & RX_CMP_AGG_BUFS) >>
-				RX_CMP_AGG_BUFS_SHIFT;
+	misc = le32_to_cpu(rxcmp->rx_cmp_misc_v1);
+	agg_bufs = (misc & RX_CMP_AGG_BUFS) >> RX_CMP_AGG_BUFS_SHIFT;
 
 	if (agg_bufs) {
 		if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, &tmp_raw_cons))
 			return -EBUSY;
 
 		cp_cons = NEXT_CMP(cp_cons);
-		*agg_event = true;
+		*event |= BNXT_AGG_EVENT;
 	}
+	*event |= BNXT_RX_EVENT;
 
 	rx_buf->data = NULL;
 	if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L2_ERRORS) {
@@ -1393,17 +1469,29 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	}
 
 	len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
-	dma_addr = dma_unmap_addr(rx_buf, mapping);
+	dma_addr = rx_buf->mapping;
+
+	if (bnxt_rx_xdp(bp, rxr, cons, data, &data_ptr, &len, event)) {
+		rc = 1;
+		goto next_rx;
+	}
 
 	if (len <= bp->rx_copy_thresh) {
-		skb = bnxt_copy_skb(bnapi, data, len, dma_addr);
+		skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr);
 		bnxt_reuse_rx_data(rxr, cons, data);
 		if (!skb) {
 			rc = -ENOMEM;
 			goto next_rx;
 		}
 	} else {
-		skb = bnxt_rx_skb(bp, rxr, cons, prod, data, dma_addr, len);
+		u32 payload;
+
+		if (rx_buf->data_ptr == data_ptr)
+			payload = misc & RX_CMP_PAYLOAD_OFFSET;
+		else
+			payload = 0;
+		skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr,
+				      payload | len);
 		if (!skb) {
 			rc = -ENOMEM;
 			goto next_rx;
@@ -1627,8 +1715,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 	u32 cons;
 	int tx_pkts = 0;
 	int rx_pkts = 0;
-	bool rx_event = false;
-	bool agg_event = false;
+	u8 event = 0;
 	struct tx_cmp *txcmp;
 
 	while (1) {
@@ -1650,12 +1737,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			if (unlikely(tx_pkts > bp->tx_wake_thresh))
 				rx_pkts = budget;
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event);
+			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
 			else if (rc == -EBUSY)	/* partial completion */
 				break;
-			rx_event = true;
 		} else if (unlikely((TX_CMP_TYPE(txcmp) ==
 				     CMPL_BASE_TYPE_HWRM_DONE) ||
 				    (TX_CMP_TYPE(txcmp) ==
@@ -1670,6 +1756,18 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			break;
 	}
 
+	if (event & BNXT_TX_EVENT) {
+		struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+		void __iomem *db = txr->tx_doorbell;
+		u16 prod = txr->tx_prod;
+
+		/* Sync BD data before updating doorbell */
+		wmb();
+
+		writel(DB_KEY_TX | prod, db);
+		writel(DB_KEY_TX | prod, db);
+	}
+
 	cpr->cp_raw_cons = raw_cons;
 	/* ACK completion ring before freeing tx ring and producing new
 	 * buffers in rx/agg rings to prevent overflowing the completion
@@ -1678,14 +1776,14 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 	BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
 
 	if (tx_pkts)
-		bnxt_tx_int(bp, bnapi, tx_pkts);
+		bnapi->tx_int(bp, bnapi, tx_pkts);
 
-	if (rx_event) {
+	if (event & BNXT_RX_EVENT) {
 		struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 
 		writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
 		writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
-		if (agg_event) {
+		if (event & BNXT_AGG_EVENT) {
 			writel(DB_KEY_RX | rxr->rx_agg_prod,
 			       rxr->rx_agg_doorbell);
 			writel(DB_KEY_RX | rxr->rx_agg_prod,
@@ -1706,7 +1804,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 	u32 cp_cons, tmp_raw_cons;
 	u32 raw_cons = cpr->cp_raw_cons;
 	u32 rx_pkts = 0;
-	bool agg_event = false;
+	u8 event = 0;
 
 	while (1) {
 		int rc;
@@ -1730,7 +1828,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 			rxcmp1->rx_cmp_cfa_code_errors_v2 |=
 				cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
 
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event);
+			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
 			if (likely(rc == -EIO))
 				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
@@ -1753,7 +1851,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 	writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
 	writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell);
 
-	if (agg_event) {
+	if (event & BNXT_AGG_EVENT) {
 		writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell);
 		writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell);
 	}
@@ -1866,11 +1964,9 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 				if (!data)
 					continue;
 
-				dma_unmap_single(
-					&pdev->dev,
-					dma_unmap_addr(tpa_info, mapping),
-					bp->rx_buf_use_size,
-					PCI_DMA_FROMDEVICE);
+				dma_unmap_single(&pdev->dev, tpa_info->mapping,
+						 bp->rx_buf_use_size,
+						 bp->rx_dir);
 
 				tpa_info->data = NULL;
 
@@ -1880,19 +1976,20 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 
 		for (j = 0; j < max_idx; j++) {
 			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
-			u8 *data = rx_buf->data;
+			void *data = rx_buf->data;
 
 			if (!data)
 				continue;
 
-			dma_unmap_single(&pdev->dev,
-					 dma_unmap_addr(rx_buf, mapping),
-					 bp->rx_buf_use_size,
-					 PCI_DMA_FROMDEVICE);
+			dma_unmap_single(&pdev->dev, rx_buf->mapping,
+					 bp->rx_buf_use_size, bp->rx_dir);
 
 			rx_buf->data = NULL;
 
-			kfree(data);
+			if (BNXT_RX_PAGE_MODE(bp))
+				__free_page(data);
+			else
+				kfree(data);
 		}
 
 		for (j = 0; j < max_agg_idx; j++) {
@@ -1903,8 +2000,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 			if (!page)
 				continue;
 
-			dma_unmap_page(&pdev->dev,
-				       dma_unmap_addr(rx_agg_buf, mapping),
+			dma_unmap_page(&pdev->dev, rx_agg_buf->mapping,
 				       BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE);
 
 			rx_agg_buf->page = NULL;
@@ -1995,6 +2091,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 		struct bnxt_ring_struct *ring;
 
+		if (rxr->xdp_prog)
+			bpf_prog_put(rxr->xdp_prog);
+
 		kfree(rxr->rx_tpa);
 		rxr->rx_tpa = NULL;
 
@@ -2133,6 +2232,8 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
 		}
 		ring->queue_id = bp->q_info[j].queue_id;
+		if (i < bp->tx_nr_rings_xdp)
+			continue;
 		if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
 			j++;
 	}
@@ -2280,6 +2381,15 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 	ring = &rxr->rx_ring_struct;
 	bnxt_init_rxbd_pages(ring, type);
 
+	if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) {
+		rxr->xdp_prog = bpf_prog_add(bp->xdp_prog, 1);
+		if (IS_ERR(rxr->xdp_prog)) {
+			int rc = PTR_ERR(rxr->xdp_prog);
+
+			rxr->xdp_prog = NULL;
+			return rc;
+		}
+	}
 	prod = rxr->rx_prod;
 	for (i = 0; i < bp->rx_ring_size; i++) {
 		if (bnxt_alloc_rx_data(bp, rxr, prod, GFP_KERNEL) != 0) {
@@ -2326,6 +2436,7 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 					return -ENOMEM;
 
 				rxr->rx_tpa[i].data = data;
+				rxr->rx_tpa[i].data_ptr = data + bp->rx_offset;
 				rxr->rx_tpa[i].mapping = mapping;
 			}
 		} else {
@@ -2341,6 +2452,14 @@ static int bnxt_init_rx_rings(struct bnxt *bp)
 {
 	int i, rc = 0;
 
+	if (BNXT_RX_PAGE_MODE(bp)) {
+		bp->rx_offset = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+		bp->rx_dma_offset = XDP_PACKET_HEADROOM;
+	} else {
+		bp->rx_offset = BNXT_RX_OFFSET;
+		bp->rx_dma_offset = BNXT_RX_DMA_OFFSET;
+	}
+
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		rc = bnxt_init_one_rx_ring(bp, i);
 		if (rc)
@@ -2464,7 +2583,7 @@ static int bnxt_calc_nr_ring_pages(u32 ring_size, int desc_per_pg)
 	return pages;
 }
 
-static void bnxt_set_tpa_flags(struct bnxt *bp)
+void bnxt_set_tpa_flags(struct bnxt *bp)
 {
 	bp->flags &= ~BNXT_FLAG_TPA;
 	if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
@@ -2550,6 +2669,27 @@ void bnxt_set_ring_params(struct bnxt *bp)
 	bp->cp_ring_mask = bp->cp_bit - 1;
 }
 
+int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+{
+	if (page_mode) {
+		if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU)
+			return -EOPNOTSUPP;
+		bp->dev->max_mtu = BNXT_MAX_PAGE_MODE_MTU;
+		bp->flags &= ~BNXT_FLAG_AGG_RINGS;
+		bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE;
+		bp->dev->hw_features &= ~NETIF_F_LRO;
+		bp->dev->features &= ~NETIF_F_LRO;
+		bp->rx_dir = DMA_BIDIRECTIONAL;
+		bp->rx_skb_func = bnxt_rx_page_skb;
+	} else {
+		bp->dev->max_mtu = BNXT_MAX_MTU;
+		bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
+		bp->rx_dir = DMA_FROM_DEVICE;
+		bp->rx_skb_func = bnxt_rx_skb;
+	}
+	return 0;
+}
+
 static void bnxt_free_vnic_attributes(struct bnxt *bp)
 {
 	int i;
@@ -2859,6 +2999,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
 		bnxt_free_stats(bp);
 		bnxt_free_ring_grps(bp);
 		bnxt_free_vnics(bp);
+		kfree(bp->tx_ring_map);
+		bp->tx_ring_map = NULL;
 		kfree(bp->tx_ring);
 		bp->tx_ring = NULL;
 		kfree(bp->rx_ring);
@@ -2911,6 +3053,12 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 		if (!bp->tx_ring)
 			return -ENOMEM;
 
+		bp->tx_ring_map = kcalloc(bp->tx_nr_rings, sizeof(u16),
+					  GFP_KERNEL);
+
+		if (!bp->tx_ring_map)
+			return -ENOMEM;
+
 		if (bp->flags & BNXT_FLAG_SHARED_RINGS)
 			j = 0;
 		else
@@ -2919,6 +3067,15 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 		for (i = 0; i < bp->tx_nr_rings; i++, j++) {
 			bp->tx_ring[i].bnapi = bp->bnapi[j];
 			bp->bnapi[j]->tx_ring = &bp->tx_ring[i];
+			bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i;
+			if (i >= bp->tx_nr_rings_xdp) {
+				bp->tx_ring[i].txq_index = i -
+					bp->tx_nr_rings_xdp;
+				bp->bnapi[j]->tx_int = bnxt_tx_int;
+			} else {
+				bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP;
+				bp->bnapi[j]->tx_int = bnxt_tx_int_xdp;
+			}
 		}
 
 		rc = bnxt_alloc_stats(bp);
@@ -4096,7 +4253,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
 	return rc;
 }
 
-int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings)
+static int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings)
 {
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
@@ -4841,7 +4998,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp)
 	int rc;
 	struct net_device *dev = bp->dev;
 
-	rc = netif_set_real_num_tx_queues(dev, bp->tx_nr_rings);
+	rc = netif_set_real_num_tx_queues(dev, bp->tx_nr_rings -
+					  bp->tx_nr_rings_xdp);
 	if (rc)
 		return rc;
 
@@ -4889,19 +5047,12 @@ static void bnxt_setup_msix(struct bnxt *bp)
 
 	tcs = netdev_get_num_tc(dev);
 	if (tcs > 1) {
-		bp->tx_nr_rings_per_tc = bp->tx_nr_rings / tcs;
-		if (bp->tx_nr_rings_per_tc == 0) {
-			netdev_reset_tc(dev);
-			bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
-		} else {
-			int i, off, count;
+		int i, off, count;
 
-			bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs;
-			for (i = 0; i < tcs; i++) {
-				count = bp->tx_nr_rings_per_tc;
-				off = i * count;
-				netdev_set_tc_queue(dev, i, count, off);
-			}
+		for (i = 0; i < tcs; i++) {
+			count = bp->tx_nr_rings_per_tc;
+			off = i * count;
+			netdev_set_tc_queue(dev, i, count, off);
 		}
 	}
 
@@ -6463,6 +6614,37 @@ static void bnxt_sp_task(struct work_struct *work)
 	clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
 }
 
+/* Under rtnl_lock */
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp)
+{
+	int max_rx, max_tx, tx_sets = 1;
+	int tx_rings_needed;
+	bool sh = true;
+	int rc;
+
+	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS))
+		sh = false;
+
+	if (tcs)
+		tx_sets = tcs;
+
+	rc = bnxt_get_max_rings(bp, &max_rx, &max_tx, sh);
+	if (rc)
+		return rc;
+
+	if (max_rx < rx)
+		return -ENOMEM;
+
+	tx_rings_needed = tx * tx_sets + tx_xdp;
+	if (max_tx < tx_rings_needed)
+		return -ENOMEM;
+
+	if (bnxt_hwrm_reserve_tx_rings(bp, &tx_rings_needed) ||
+	    tx_rings_needed < (tx * tx_sets + tx_xdp))
+		return -ENOMEM;
+	return 0;
+}
+
 static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 {
 	int rc;
@@ -6625,6 +6807,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	bool sh = false;
+	int rc;
 
 	if (tc > bp->max_tc) {
 		netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
@@ -6638,19 +6821,10 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
 		sh = true;
 
-	if (tc) {
-		int max_rx_rings, max_tx_rings, req_tx_rings, rsv_tx_rings, rc;
-
-		req_tx_rings = bp->tx_nr_rings_per_tc * tc;
-		rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
-		if (rc || req_tx_rings > max_tx_rings)
-			return -ENOMEM;
-
-		rsv_tx_rings = req_tx_rings;
-		if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings) ||
-		    rsv_tx_rings < req_tx_rings)
-			return -ENOMEM;
-	}
+	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
+				tc, bp->tx_nr_rings_xdp);
+	if (rc)
+		return rc;
 
 	/* Needs to close the device and do hw resource re-allocations */
 	if (netif_running(bp->dev))
@@ -6958,6 +7132,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
 #endif
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
+	.ndo_xdp		= bnxt_xdp,
 };
 
 static void bnxt_remove_one(struct pci_dev *pdev)
@@ -6982,6 +7157,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	pci_iounmap(pdev, bp->bar0);
 	kfree(bp->edev);
 	bp->edev = NULL;
+	if (bp->xdp_prog)
+		bpf_prog_put(bp->xdp_prog);
 	free_netdev(dev);
 
 	pci_release_regions(pdev);
@@ -7258,7 +7435,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* MTU range: 60 - 9500 */
 	dev->min_mtu = ETH_ZLEN;
-	dev->max_mtu = 9500;
+	dev->max_mtu = BNXT_MAX_MTU;
 
 	bnxt_dcb_init(bp);
 
@@ -7299,6 +7476,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_hwrm_func_qcfg(bp);
 	bnxt_hwrm_port_led_qcaps(bp);
 
+	bnxt_set_rx_skb_mode(bp, false);
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 	bnxt_set_max_func_irqs(bp, max_irqs);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 52a1cc0..9f07b9c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -416,6 +416,11 @@ struct rx_tpa_end_cmp_ext {
 
 #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
 
+#define BNXT_MAX_MTU		9500
+#define BNXT_MAX_PAGE_MODE_MTU	\
+	((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN -	\
+	 XDP_PACKET_HEADROOM)
+
 #define BNXT_MIN_PKT_SIZE	52
 
 #define BNXT_NUM_TESTS(bp)	0
@@ -507,17 +512,25 @@ struct rx_tpa_end_cmp_ext {
 #define BNXT_HWRM_REQS_PER_PAGE		(BNXT_PAGE_SIZE /	\
 					 BNXT_HWRM_REQ_MAX_SIZE)
 
+#define BNXT_RX_EVENT	1
+#define BNXT_AGG_EVENT	2
+#define BNXT_TX_EVENT	4
+
 struct bnxt_sw_tx_bd {
 	struct sk_buff		*skb;
 	DEFINE_DMA_UNMAP_ADDR(mapping);
 	u8			is_gso;
 	u8			is_push;
-	unsigned short		nr_frags;
+	union {
+		unsigned short		nr_frags;
+		u16			rx_prod;
+	};
 };
 
 struct bnxt_sw_rx_bd {
-	u8			*data;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
+	void			*data;
+	u8			*data_ptr;
+	dma_addr_t		mapping;
 };
 
 struct bnxt_sw_rx_agg_bd {
@@ -558,6 +571,7 @@ struct bnxt_tx_ring_info {
 	struct bnxt_napi	*bnapi;
 	u16			tx_prod;
 	u16			tx_cons;
+	u16			txq_index;
 	void __iomem		*tx_doorbell;
 
 	struct tx_bd		*tx_desc_ring[MAX_TX_PAGES];
@@ -576,7 +590,8 @@ struct bnxt_tx_ring_info {
 };
 
 struct bnxt_tpa_info {
-	u8			*data;
+	void			*data;
+	u8			*data_ptr;
 	dma_addr_t		mapping;
 	u16			len;
 	unsigned short		gso_type;
@@ -608,6 +623,8 @@ struct bnxt_rx_ring_info {
 	void __iomem		*rx_doorbell;
 	void __iomem		*rx_agg_doorbell;
 
+	struct bpf_prog		*xdp_prog;
+
 	struct rx_bd		*rx_desc_ring[MAX_RX_PAGES];
 	struct bnxt_sw_rx_bd	*rx_buf_ring;
 
@@ -654,6 +671,11 @@ struct bnxt_napi {
 	struct bnxt_rx_ring_info	*rx_ring;
 	struct bnxt_tx_ring_info	*tx_ring;
 
+	void			(*tx_int)(struct bnxt *, struct bnxt_napi *,
+					  int);
+	u32			flags;
+#define BNXT_NAPI_FLAG_XDP	0x1
+
 	bool			in_reset;
 };
 
@@ -965,6 +987,7 @@ struct bnxt {
 	#define BNXT_FLAG_ROCE_CAP	(BNXT_FLAG_ROCEV1_CAP |	\
 					 BNXT_FLAG_ROCEV2_CAP)
 	#define BNXT_FLAG_NO_AGG_RINGS	0x20000
+	#define BNXT_FLAG_RX_PAGE_MODE	0x40000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
@@ -976,6 +999,7 @@ struct bnxt {
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
 #define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp))
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
+#define BNXT_RX_PAGE_MODE(bp)	((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
 
 	struct bnxt_en_dev	*edev;
 	struct bnxt_en_dev *	(*ulp_probe)(struct net_device *);
@@ -984,12 +1008,21 @@ struct bnxt {
 
 	struct bnxt_rx_ring_info	*rx_ring;
 	struct bnxt_tx_ring_info	*tx_ring;
+	u16			*tx_ring_map;
 
 	struct sk_buff *	(*gro_func)(struct bnxt_tpa_info *, int, int,
 					    struct sk_buff *);
 
+	struct sk_buff *	(*rx_skb_func)(struct bnxt *,
+					       struct bnxt_rx_ring_info *,
+					       u16, void *, u8 *, dma_addr_t,
+					       unsigned int);
+
 	u32			rx_buf_size;
 	u32			rx_buf_use_size;	/* useable size */
+	u16			rx_offset;
+	u16			rx_dma_offset;
+	enum dma_data_direction	rx_dir;
 	u32			rx_ring_size;
 	u32			rx_agg_ring_size;
 	u32			rx_copy_thresh;
@@ -1005,6 +1038,7 @@ struct bnxt {
 	int			tx_nr_pages;
 	int			tx_nr_rings;
 	int			tx_nr_rings_per_tc;
+	int			tx_nr_rings_xdp;
 
 	int			tx_wake_thresh;
 	int			tx_push_thresh;
@@ -1140,6 +1174,8 @@ struct bnxt {
 
 	u8			num_leds;
 	struct bnxt_led_info	leds[BNXT_MAX_LED];
+
+	struct bpf_prog		*xdp_prog;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
@@ -1159,7 +1195,23 @@ struct bnxt {
 #define SFF_MODULE_ID_QSFP28			0x11
 #define BNXT_MAX_PHY_I2C_RESP_SIZE		64
 
+static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
+{
+	/* Tell compiler to fetch tx indices from memory. */
+	barrier();
+
+	return bp->tx_ring_size -
+		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
+}
+
+extern const u16 bnxt_lhint_arr[];
+
+int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+		       u16 prod, gfp_t gfp);
+void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
+void bnxt_set_tpa_flags(struct bnxt *bp);
 void bnxt_set_ring_params(struct bnxt *);
+int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
 void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16);
 int _hwrm_send_message(struct bnxt *, void *, u32, int);
 int hwrm_send_message(struct bnxt *, void *, u32, int);
@@ -1168,7 +1220,6 @@ int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap,
 				     int bmap_size);
 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
 int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
-int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings);
 int bnxt_hwrm_set_coal(struct bnxt *);
 unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
@@ -1182,6 +1233,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_close_nic(struct bnxt *, bool, bool);
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 void bnxt_restore_pf_fw_resources(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 24818e1..7aa248d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -387,10 +387,10 @@ static int bnxt_set_channels(struct net_device *dev,
 			     struct ethtool_channels *channel)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	int max_rx_rings, max_tx_rings, tcs;
-	int req_tx_rings, rsv_tx_rings;
-	u32 rc = 0;
+	int req_tx_rings, req_rx_rings, tcs;
 	bool sh = false;
+	int tx_xdp = 0;
+	int rc = 0;
 
 	if (channel->other_count)
 		return -EINVAL;
@@ -410,32 +410,21 @@ static int bnxt_set_channels(struct net_device *dev,
 	if (channel->combined_count)
 		sh = true;
 
-	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
-
 	tcs = netdev_get_num_tc(dev);
-	if (tcs > 1)
-		max_tx_rings /= tcs;
-
-	if (sh &&
-	    channel->combined_count > max_t(int, max_rx_rings, max_tx_rings))
-		return -ENOMEM;
-
-	if (!sh && (channel->rx_count > max_rx_rings ||
-		    channel->tx_count > max_tx_rings))
-		return -ENOMEM;
 
 	req_tx_rings = sh ? channel->combined_count : channel->tx_count;
-	req_tx_rings = min_t(int, req_tx_rings, max_tx_rings);
-	if (tcs > 1)
-		req_tx_rings *= tcs;
-
-	rsv_tx_rings = req_tx_rings;
-	if (bnxt_hwrm_reserve_tx_rings(bp, &rsv_tx_rings))
-		return -ENOMEM;
-
-	if (rsv_tx_rings < req_tx_rings) {
-		netdev_warn(dev, "Unable to allocate the requested tx rings\n");
-		return -ENOMEM;
+	req_rx_rings = sh ? channel->combined_count : channel->rx_count;
+	if (bp->tx_nr_rings_xdp) {
+		if (!sh) {
+			netdev_err(dev, "Only combined mode supported when XDP is enabled.\n");
+			return -EINVAL;
+		}
+		tx_xdp = req_rx_rings;
+	}
+	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, tcs, tx_xdp);
+	if (rc) {
+		netdev_warn(dev, "Unable to allocate the requested rings\n");
+		return rc;
 	}
 
 	if (netif_running(dev)) {
@@ -454,19 +443,17 @@ static int bnxt_set_channels(struct net_device *dev,
 
 	if (sh) {
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
-		bp->rx_nr_rings = min_t(int, channel->combined_count,
-					max_rx_rings);
-		bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count,
-					       max_tx_rings);
+		bp->rx_nr_rings = channel->combined_count;
+		bp->tx_nr_rings_per_tc = channel->combined_count;
 	} else {
 		bp->flags &= ~BNXT_FLAG_SHARED_RINGS;
 		bp->rx_nr_rings = channel->rx_count;
 		bp->tx_nr_rings_per_tc = channel->tx_count;
 	}
-
-	bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
+	bp->tx_nr_rings_xdp = tx_xdp;
+	bp->tx_nr_rings = bp->tx_nr_rings_per_tc + tx_xdp;
 	if (tcs > 1)
-		bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs;
+		bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp;
 
 	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
 			       bp->tx_nr_rings + bp->rx_nr_rings;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
new file mode 100644
index 0000000..899c30f
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -0,0 +1,240 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/filter.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_xdp.h"
+
+static void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			  dma_addr_t mapping, u32 len, u16 rx_prod)
+{
+	struct bnxt_sw_tx_bd *tx_buf;
+	struct tx_bd_ext *txbd1;
+	struct tx_bd *txbd;
+	u32 flags;
+	u16 prod;
+
+	prod = txr->tx_prod;
+	tx_buf = &txr->tx_buf_ring[prod];
+	tx_buf->rx_prod = rx_prod;
+
+	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+	flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
+		(2 << TX_BD_FLAGS_BD_CNT_SHIFT) | TX_BD_FLAGS_COAL_NOW |
+		TX_BD_FLAGS_PACKET_END | bnxt_lhint_arr[len >> 9];
+	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
+	txbd->tx_bd_opaque = prod;
+	txbd->tx_bd_haddr = cpu_to_le64(mapping);
+
+	prod = NEXT_TX(prod);
+	txbd1 = (struct tx_bd_ext *)
+		&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+
+	txbd1->tx_bd_hsize_lflags = cpu_to_le32(0);
+	txbd1->tx_bd_mss = cpu_to_le32(0);
+	txbd1->tx_bd_cfa_action = cpu_to_le32(0);
+	txbd1->tx_bd_cfa_meta = cpu_to_le32(0);
+
+	prod = NEXT_TX(prod);
+	txr->tx_prod = prod;
+}
+
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+{
+	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	struct bnxt_sw_tx_bd *tx_buf;
+	u16 tx_cons = txr->tx_cons;
+	u16 last_tx_cons = tx_cons;
+	u16 rx_prod;
+	int i;
+
+	for (i = 0; i < nr_pkts; i++) {
+		last_tx_cons = tx_cons;
+		tx_cons = NEXT_TX(tx_cons);
+		tx_cons = NEXT_TX(tx_cons);
+	}
+	txr->tx_cons = tx_cons;
+	if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) {
+		rx_prod = rxr->rx_prod;
+	} else {
+		tx_buf = &txr->tx_buf_ring[last_tx_cons];
+		rx_prod = tx_buf->rx_prod;
+	}
+	writel(DB_KEY_RX | rx_prod, rxr->rx_doorbell);
+}
+
+/* returns the following:
+ * true    - packet consumed by XDP and new buffer is allocated.
+ * false   - packet should be passed to the stack.
+ */
+bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+		 struct page *page, u8 **data_ptr, unsigned int *len, u8 *event)
+{
+	struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
+	struct bnxt_tx_ring_info *txr;
+	struct bnxt_sw_rx_bd *rx_buf;
+	struct pci_dev *pdev;
+	struct xdp_buff xdp;
+	dma_addr_t mapping;
+	void *orig_data;
+	u32 tx_avail;
+	u32 offset;
+	u32 act;
+
+	if (!xdp_prog)
+		return false;
+
+	pdev = bp->pdev;
+	txr = rxr->bnapi->tx_ring;
+	rx_buf = &rxr->rx_buf_ring[cons];
+	offset = bp->rx_offset;
+
+	xdp.data_hard_start = *data_ptr - offset;
+	xdp.data = *data_ptr;
+	xdp.data_end = *data_ptr + *len;
+	orig_data = xdp.data;
+	mapping = rx_buf->mapping - bp->rx_dma_offset;
+
+	dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
+
+	rcu_read_lock();
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	rcu_read_unlock();
+
+	tx_avail = bnxt_tx_avail(bp, txr);
+	/* If the tx ring is not full, we must not update the rx producer yet
+	 * because we may still be transmitting on some BDs.
+	 */
+	if (tx_avail != bp->tx_ring_size)
+		*event &= ~BNXT_RX_EVENT;
+
+	if (orig_data != xdp.data) {
+		offset = xdp.data - xdp.data_hard_start;
+		*data_ptr = xdp.data_hard_start + offset;
+		*len = xdp.data_end - xdp.data;
+	}
+	switch (act) {
+	case XDP_PASS:
+		return false;
+
+	case XDP_TX:
+		if (tx_avail < 2) {
+			trace_xdp_exception(bp->dev, xdp_prog, act);
+			bnxt_reuse_rx_data(rxr, cons, page);
+			return true;
+		}
+
+		*event = BNXT_TX_EVENT;
+		dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
+					   bp->rx_dir);
+		bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
+			      NEXT_RX(rxr->rx_prod));
+		bnxt_reuse_rx_data(rxr, cons, page);
+		return true;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* Fall thru */
+	case XDP_ABORTED:
+		trace_xdp_exception(bp->dev, xdp_prog, act);
+		/* Fall thru */
+	case XDP_DROP:
+		bnxt_reuse_rx_data(rxr, cons, page);
+		break;
+	}
+	return true;
+}
+
+/* Under rtnl_lock */
+static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
+{
+	struct net_device *dev = bp->dev;
+	int tx_xdp = 0, rc, tc;
+	struct bpf_prog *old;
+
+	if (prog && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+		netdev_warn(dev, "MTU %d larger than largest XDP supported MTU %d.\n",
+			    bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU);
+		return -EOPNOTSUPP;
+	}
+	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) {
+		netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
+		return -EOPNOTSUPP;
+	}
+	if (prog)
+		tx_xdp = bp->rx_nr_rings;
+
+	tc = netdev_get_num_tc(dev);
+	if (!tc)
+		tc = 1;
+	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
+				tc, tx_xdp);
+	if (rc) {
+		netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n");
+		return rc;
+	}
+	if (netif_running(dev))
+		bnxt_close_nic(bp, true, false);
+
+	old = xchg(&bp->xdp_prog, prog);
+	if (old)
+		bpf_prog_put(old);
+
+	if (prog) {
+		bnxt_set_rx_skb_mode(bp, true);
+	} else {
+		int rx, tx;
+
+		bnxt_set_rx_skb_mode(bp, false);
+		bnxt_get_max_rings(bp, &rx, &tx, true);
+		if (rx > 1) {
+			bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
+			bp->dev->hw_features |= NETIF_F_LRO;
+		}
+	}
+	bp->tx_nr_rings_xdp = tx_xdp;
+	bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
+	bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
+	bp->num_stat_ctxs = bp->cp_nr_rings;
+	bnxt_set_tpa_flags(bp);
+	bnxt_set_ring_params(bp);
+
+	if (netif_running(dev))
+		return bnxt_open_nic(bp, true, false);
+
+	return 0;
+}
+
+int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		rc = bnxt_xdp_set(bp, xdp->prog);
+		break;
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!bp->xdp_prog;
+		rc = 0;
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
+	return rc;
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
new file mode 100644
index 0000000..b529f2c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -0,0 +1,19 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_XDP_H
+#define BNXT_XDP_H
+
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
+bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+		 struct page *page, u8 **data_ptr, unsigned int *len,
+		 u8 *event);
+int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp);
+
+#endif