Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Tony Nguyen says:
====================
40GbE Intel Wired LAN Driver Updates 2020-09-14
This series contains updates to i40e driver only.
Li RongQing removes binding affinity mask to a fixed CPU and sets
prefetch of Rx buffer page to occur conditionally.
Björn provides AF_XDP performance improvements by not prefetching HW
descriptors, using 16 byte descriptors, and moving buffer allocation
out of Rx processing loop.
v2: Define prefetch_page_address in a common header for patch 2.
Dropped, previous, patch 5 as it is being reworked to be more
generalized.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 8470c83..1a7e4df 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -465,6 +465,7 @@ static int acenic_probe_one(struct pci_dev *pdev,
SET_NETDEV_DEV(dev, &pdev->dev);
ap = netdev_priv(dev);
+ ap->ndev = dev;
ap->pdev = pdev;
ap->name = pci_name(pdev);
@@ -1562,10 +1563,10 @@ static void ace_watchdog(struct net_device *data, unsigned int txqueue)
}
-static void ace_tasklet(unsigned long arg)
+static void ace_tasklet(struct tasklet_struct *t)
{
- struct net_device *dev = (struct net_device *) arg;
- struct ace_private *ap = netdev_priv(dev);
+ struct ace_private *ap = from_tasklet(ap, t, ace_tasklet);
+ struct net_device *dev = ap->ndev;
int cur_size;
cur_size = atomic_read(&ap->cur_rx_bufs);
@@ -2269,7 +2270,7 @@ static int ace_open(struct net_device *dev)
/*
* Setup the bottom half rx ring refill handler
*/
- tasklet_init(&ap->ace_tasklet, ace_tasklet, (unsigned long)dev);
+ tasklet_setup(&ap->ace_tasklet, ace_tasklet);
return 0;
}
diff --git a/drivers/net/ethernet/alteon/acenic.h b/drivers/net/ethernet/alteon/acenic.h
index c670067..265fa60 100644
--- a/drivers/net/ethernet/alteon/acenic.h
+++ b/drivers/net/ethernet/alteon/acenic.h
@@ -633,6 +633,7 @@ struct ace_skb
*/
struct ace_private
{
+ struct net_device *ndev; /* backpointer */
struct ace_info *info;
struct ace_regs __iomem *regs; /* register base */
struct ace_skb *skb;
@@ -776,7 +777,7 @@ static int ace_open(struct net_device *dev);
static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static int ace_close(struct net_device *dev);
-static void ace_tasklet(unsigned long dev);
+static void ace_tasklet(struct tasklet_struct *t);
static void ace_dump_trace(struct ace_private *ap);
static void ace_set_multicast_list(struct net_device *dev);
static int ace_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 4ba7555..2709a2d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -403,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
return false;
}
-static void xgbe_ecc_isr_task(unsigned long data)
+static void xgbe_ecc_isr_task(struct tasklet_struct *t)
{
- struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+ struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_ecc);
unsigned int ecc_isr;
bool stop = false;
@@ -468,14 +468,14 @@ static irqreturn_t xgbe_ecc_isr(int irq, void *data)
if (pdata->isr_as_tasklet)
tasklet_schedule(&pdata->tasklet_ecc);
else
- xgbe_ecc_isr_task((unsigned long)pdata);
+ xgbe_ecc_isr_task(&pdata->tasklet_ecc);
return IRQ_HANDLED;
}
-static void xgbe_isr_task(unsigned long data)
+static void xgbe_isr_task(struct tasklet_struct *t)
{
- struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+ struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_dev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
struct xgbe_channel *channel;
unsigned int dma_isr, dma_ch_isr;
@@ -582,7 +582,7 @@ static void xgbe_isr_task(unsigned long data)
/* If there is not a separate ECC irq, handle it here */
if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
- xgbe_ecc_isr_task((unsigned long)pdata);
+ xgbe_ecc_isr_task(&pdata->tasklet_ecc);
/* If there is not a separate I2C irq, handle it here */
if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
@@ -607,7 +607,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
if (pdata->isr_as_tasklet)
tasklet_schedule(&pdata->tasklet_dev);
else
- xgbe_isr_task((unsigned long)pdata);
+ xgbe_isr_task(&pdata->tasklet_dev);
return IRQ_HANDLED;
}
@@ -991,9 +991,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
unsigned int i;
int ret;
- tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata);
- tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task,
- (unsigned long)pdata);
+ tasklet_setup(&pdata->tasklet_dev, xgbe_isr_task);
+ tasklet_setup(&pdata->tasklet_ecc, xgbe_ecc_isr_task);
ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
netdev_name(netdev), pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 4d9062d..22d4fc5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,9 +274,9 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
}
-static void xgbe_i2c_isr_task(unsigned long data)
+static void xgbe_i2c_isr_task(struct tasklet_struct *t)
{
- struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+ struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_i2c);
struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
unsigned int isr;
@@ -324,7 +324,7 @@ static irqreturn_t xgbe_i2c_isr(int irq, void *data)
if (pdata->isr_as_tasklet)
tasklet_schedule(&pdata->tasklet_i2c);
else
- xgbe_i2c_isr_task((unsigned long)pdata);
+ xgbe_i2c_isr_task(&pdata->tasklet_i2c);
return IRQ_HANDLED;
}
@@ -369,7 +369,7 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
{
- xgbe_i2c_isr_task((unsigned long)pdata);
+ xgbe_i2c_isr_task(&pdata->tasklet_i2c);
return IRQ_HANDLED;
}
@@ -462,8 +462,7 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
/* If we have a separate I2C irq, enable it */
if (pdata->dev_irq != pdata->i2c_irq) {
- tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task,
- (unsigned long)pdata);
+ tasklet_setup(&pdata->tasklet_i2c, xgbe_i2c_isr_task);
ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
xgbe_i2c_isr, 0, pdata->i2c_name,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 8a3a60b..93ef5a3 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -688,9 +688,9 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
}
}
-static void xgbe_an_isr_task(unsigned long data)
+static void xgbe_an_isr_task(struct tasklet_struct *t)
{
- struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+ struct xgbe_prv_data *pdata = from_tasklet(pdata, t, tasklet_an);
netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
@@ -715,14 +715,14 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
if (pdata->isr_as_tasklet)
tasklet_schedule(&pdata->tasklet_an);
else
- xgbe_an_isr_task((unsigned long)pdata);
+ xgbe_an_isr_task(&pdata->tasklet_an);
return IRQ_HANDLED;
}
static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
{
- xgbe_an_isr_task((unsigned long)pdata);
+ xgbe_an_isr_task(&pdata->tasklet_an);
return IRQ_HANDLED;
}
@@ -1414,8 +1414,7 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
/* If we have a separate AN irq, enable it */
if (pdata->dev_irq != pdata->an_irq) {
- tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task,
- (unsigned long)pdata);
+ tasklet_setup(&pdata->tasklet_an, xgbe_an_isr_task);
ret = devm_request_irq(pdata->dev, pdata->an_irq,
xgbe_an_isr, 0, pdata->an_name,
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 8453629..f7f10cf 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3009,10 +3009,10 @@ static int cnic_service_bnx2(void *data, void *status_blk)
return cnic_service_bnx2_queues(dev);
}
-static void cnic_service_bnx2_msix(unsigned long data)
+static void cnic_service_bnx2_msix(struct tasklet_struct *t)
{
- struct cnic_dev *dev = (struct cnic_dev *) data;
- struct cnic_local *cp = dev->cnic_priv;
+ struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+ struct cnic_dev *dev = cp->dev;
cp->last_status_idx = cnic_service_bnx2_queues(dev);
@@ -3134,10 +3134,10 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
return last_status;
}
-static void cnic_service_bnx2x_bh(unsigned long data)
+static void cnic_service_bnx2x_bh(struct tasklet_struct *t)
{
- struct cnic_dev *dev = (struct cnic_dev *) data;
- struct cnic_local *cp = dev->cnic_priv;
+ struct cnic_local *cp = from_tasklet(cp, t, cnic_irq_task);
+ struct cnic_dev *dev = cp->dev;
struct bnx2x *bp = netdev_priv(dev->netdev);
u32 status_idx, new_status_idx;
@@ -4458,8 +4458,7 @@ static int cnic_init_bnx2_irq(struct cnic_dev *dev)
CNIC_WR(dev, base + BNX2_HC_CMD_TICKS_OFF, (64 << 16) | 220);
cp->last_status_idx = cp->status_blk.bnx2->status_idx;
- tasklet_init(&cp->cnic_irq_task, cnic_service_bnx2_msix,
- (unsigned long) dev);
+ tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2_msix);
err = cnic_request_irq(dev);
if (err)
return err;
@@ -4868,8 +4867,7 @@ static int cnic_init_bnx2x_irq(struct cnic_dev *dev)
struct cnic_eth_dev *ethdev = cp->ethdev;
int err = 0;
- tasklet_init(&cp->cnic_irq_task, cnic_service_bnx2x_bh,
- (unsigned long) dev);
+ tasklet_setup(&cp->cnic_irq_task, cnic_service_bnx2x_bh);
if (ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX)
err = cnic_request_irq(dev);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 6761f40..830c537 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1466,9 +1466,9 @@ static int macb_poll(struct napi_struct *napi, int budget)
return work_done;
}
-static void macb_hresp_error_task(unsigned long data)
+static void macb_hresp_error_task(struct tasklet_struct *t)
{
- struct macb *bp = (struct macb *)data;
+ struct macb *bp = from_tasklet(bp, t, hresp_err_tasklet);
struct net_device *dev = bp->dev;
struct macb_queue *queue;
unsigned int q;
@@ -4560,8 +4560,7 @@ static int macb_probe(struct platform_device *pdev)
goto err_out_unregister_mdio;
}
- tasklet_init(&bp->hresp_err_tasklet, macb_hresp_error_task,
- (unsigned long)bp);
+ tasklet_setup(&bp->hresp_err_tasklet, macb_hresp_error_task);
netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8e0ed01..9eac0d43 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -161,13 +161,13 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
static struct handshake handshake[MAX_OCTEON_DEVICES];
static struct completion first_stage;
-static void octeon_droq_bh(unsigned long pdev)
+static void octeon_droq_bh(struct tasklet_struct *t)
{
int q_no;
int reschedule = 0;
- struct octeon_device *oct = (struct octeon_device *)pdev;
- struct octeon_device_priv *oct_priv =
- (struct octeon_device_priv *)oct->priv;
+ struct octeon_device_priv *oct_priv = from_tasklet(oct_priv, t,
+ droq_tasklet);
+ struct octeon_device *oct = oct_priv->dev;
for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES(oct); q_no++) {
if (!(oct->io_qmask.oq & BIT_ULL(q_no)))
@@ -4193,8 +4193,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
/* Initialize the tasklet that handles output queue packet processing.*/
dev_dbg(&octeon_dev->pci_dev->dev, "Initializing droq tasklet\n");
- tasklet_init(&oct_priv->droq_tasklet, octeon_droq_bh,
- (unsigned long)octeon_dev);
+ tasklet_setup(&oct_priv->droq_tasklet, octeon_droq_bh);
/* Setup the interrupt handler and record the INT SUM register address
*/
@@ -4298,6 +4297,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
complete(&handshake[octeon_dev->octeon_id].init);
atomic_set(&octeon_dev->status, OCT_DEV_HOST_OK);
+ oct_priv->dev = octeon_dev;
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 073d064..5b4cb72 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -39,6 +39,7 @@ struct octeon_device_priv {
/** Tasklet structures for this device. */
struct tasklet_struct droq_tasklet;
unsigned long napi_mask;
+ struct octeon_device *dev;
};
/** This structure is used by NIC driver to store information required
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 3e17ce0..e9d6a5b 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -315,9 +315,9 @@ static void octeon_mgmt_clean_tx_buffers(struct octeon_mgmt *p)
netif_wake_queue(p->netdev);
}
-static void octeon_mgmt_clean_tx_tasklet(unsigned long arg)
+static void octeon_mgmt_clean_tx_tasklet(struct tasklet_struct *t)
{
- struct octeon_mgmt *p = (struct octeon_mgmt *)arg;
+ struct octeon_mgmt *p = from_tasklet(p, t, tx_clean_tasklet);
octeon_mgmt_clean_tx_buffers(p);
octeon_mgmt_enable_tx_irq(p);
}
@@ -1489,8 +1489,8 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
skb_queue_head_init(&p->tx_list);
skb_queue_head_init(&p->rx_list);
- tasklet_init(&p->tx_clean_tasklet,
- octeon_mgmt_clean_tx_tasklet, (unsigned long)p);
+ tasklet_setup(&p->tx_clean_tasklet,
+ octeon_mgmt_clean_tx_tasklet);
netdev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 063e560..0a94c39 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -985,9 +985,9 @@ static int nicvf_poll(struct napi_struct *napi, int budget)
*
* As of now only CQ errors are handled
*/
-static void nicvf_handle_qs_err(unsigned long data)
+static void nicvf_handle_qs_err(struct tasklet_struct *t)
{
- struct nicvf *nic = (struct nicvf *)data;
+ struct nicvf *nic = from_tasklet(nic, t, qs_err_task);
struct queue_set *qs = nic->qs;
int qidx;
u64 status;
@@ -1493,12 +1493,10 @@ int nicvf_open(struct net_device *netdev)
}
/* Init tasklet for handling Qset err interrupt */
- tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err,
- (unsigned long)nic);
+ tasklet_setup(&nic->qs_err_task, nicvf_handle_qs_err);
/* Init RBDR tasklet which will refill RBDR */
- tasklet_init(&nic->rbdr_task, nicvf_rbdr_task,
- (unsigned long)nic);
+ tasklet_setup(&nic->rbdr_task, nicvf_rbdr_task);
INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
/* Configure CPI alorithm */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index a45223f..7a141ce 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -460,9 +460,9 @@ void nicvf_rbdr_work(struct work_struct *work)
}
/* In Softirq context, alloc rcv buffers in atomic mode */
-void nicvf_rbdr_task(unsigned long data)
+void nicvf_rbdr_task(struct tasklet_struct *t)
{
- struct nicvf *nic = (struct nicvf *)data;
+ struct nicvf *nic = from_tasklet(nic, t, rbdr_task);
nicvf_refill_rbdr(nic, GFP_ATOMIC);
if (nic->rb_alloc_fail) {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 2460451..8453def 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -348,7 +348,7 @@ void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num);
struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
struct cqe_rx_t *cqe_rx, bool xdp);
-void nicvf_rbdr_task(unsigned long data);
+void nicvf_rbdr_task(struct tasklet_struct *t);
void nicvf_rbdr_work(struct work_struct *work);
void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx);
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 21016de..2d9c2b5 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -239,8 +239,10 @@ struct sched {
unsigned int num; /* num skbs in per port queues */
struct sched_port p[MAX_NPORTS];
struct tasklet_struct sched_tsk;/* tasklet used to run scheduler */
+ struct sge *sge;
};
-static void restart_sched(unsigned long);
+
+static void restart_sched(struct tasklet_struct *t);
/*
@@ -378,7 +380,8 @@ static int tx_sched_init(struct sge *sge)
return -ENOMEM;
pr_debug("tx_sched_init\n");
- tasklet_init(&s->sched_tsk, restart_sched, (unsigned long) sge);
+ tasklet_setup(&s->sched_tsk, restart_sched);
+ s->sge = sge;
sge->tx_sched = s;
for (i = 0; i < MAX_NPORTS; i++) {
@@ -1305,9 +1308,10 @@ static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q)
* Called from tasklet. Checks the scheduler for any
* pending skbs that can be sent.
*/
-static void restart_sched(unsigned long arg)
+static void restart_sched(struct tasklet_struct *t)
{
- struct sge *sge = (struct sge *) arg;
+ struct sched *s = from_tasklet(s, t, sched_tsk);
+ struct sge *sge = s->sge;
struct adapter *adapter = sge->adapter;
struct cmdQ *q = &sge->cmdQ[0];
struct sk_buff *skb;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index ee6188d..a978a00 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1516,14 +1516,14 @@ static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
/**
* restart_ctrlq - restart a suspended control queue
- * @qs: the queue set cotaining the control queue
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx control queue.
*/
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
{
struct sk_buff *skb;
- struct sge_qset *qs = (struct sge_qset *)data;
+ struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_CTRL].qresume_tsk);
struct sge_txq *q = &qs->txq[TXQ_CTRL];
spin_lock(&q->lock);
@@ -1733,14 +1733,14 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
/**
* restart_offloadq - restart a suspended offload queue
- * @qs: the queue set cotaining the offload queue
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx offload queue.
*/
-static void restart_offloadq(unsigned long data)
+static void restart_offloadq(struct tasklet_struct *t)
{
struct sk_buff *skb;
- struct sge_qset *qs = (struct sge_qset *)data;
+ struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_OFLD].qresume_tsk);
struct sge_txq *q = &qs->txq[TXQ_OFLD];
const struct port_info *pi = netdev_priv(qs->netdev);
struct adapter *adap = pi->adapter;
@@ -3081,10 +3081,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
skb_queue_head_init(&q->txq[i].sendq);
}
- tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
- (unsigned long)q);
- tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
- (unsigned long)q);
+ tasklet_setup(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq);
+ tasklet_setup(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq);
q->fl[0].gen = q->fl[1].gen = 1;
q->fl[0].size = p->fl_size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 437c054..2b942b4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2660,15 +2660,15 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
/**
* restart_ctrlq - restart a suspended control queue
- * @data: the control queue to restart
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx control queue.
*/
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
{
struct sk_buff *skb;
unsigned int written = 0;
- struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
+ struct sge_ctrl_txq *q = from_tasklet(q, t, qresume_tsk);
spin_lock(&q->sendq.lock);
reclaim_completed_tx_imm(&q->q);
@@ -2961,13 +2961,13 @@ static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
/**
* restart_ofldq - restart a suspended offload queue
- * @data: the offload queue to restart
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx offload queue.
*/
-static void restart_ofldq(unsigned long data)
+static void restart_ofldq(struct tasklet_struct *t)
{
- struct sge_uld_txq *q = (struct sge_uld_txq *)data;
+ struct sge_uld_txq *q = from_tasklet(q, t, qresume_tsk);
spin_lock(&q->sendq.lock);
q->full = 0; /* the queue actually is completely empty now */
@@ -4580,7 +4580,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
txq->adap = adap;
skb_queue_head_init(&txq->sendq);
- tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
+ tasklet_setup(&txq->qresume_tsk, restart_ctrlq);
txq->full = 0;
return 0;
}
@@ -4670,7 +4670,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
txq->q.q_type = CXGB4_TXQ_ULD;
txq->adap = adap;
skb_queue_head_init(&txq->sendq);
- tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
+ tasklet_setup(&txq->qresume_tsk, restart_ofldq);
txq->full = 0;
txq->mapping_err = 0;
return 0;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index b3f8597e..e3a8858 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -367,6 +367,7 @@ struct netdev_private {
dma_addr_t tx_ring_dma;
dma_addr_t rx_ring_dma;
struct timer_list timer; /* Media monitoring timer. */
+ struct net_device *ndev; /* backpointer */
/* ethtool extra stats */
struct {
u64 tx_multiple_collisions;
@@ -429,8 +430,8 @@ static void init_ring(struct net_device *dev);
static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
static int reset_tx (struct net_device *dev);
static irqreturn_t intr_handler(int irq, void *dev_instance);
-static void rx_poll(unsigned long data);
-static void tx_poll(unsigned long data);
+static void rx_poll(struct tasklet_struct *t);
+static void tx_poll(struct tasklet_struct *t);
static void refill_rx (struct net_device *dev);
static void netdev_error(struct net_device *dev, int intr_status);
static void netdev_error(struct net_device *dev, int intr_status);
@@ -531,14 +532,15 @@ static int sundance_probe1(struct pci_dev *pdev,
cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET));
np = netdev_priv(dev);
+ np->ndev = dev;
np->base = ioaddr;
np->pci_dev = pdev;
np->chip_id = chip_idx;
np->msg_enable = (1 << debug) - 1;
spin_lock_init(&np->lock);
spin_lock_init(&np->statlock);
- tasklet_init(&np->rx_tasklet, rx_poll, (unsigned long)dev);
- tasklet_init(&np->tx_tasklet, tx_poll, (unsigned long)dev);
+ tasklet_setup(&np->rx_tasklet, rx_poll);
+ tasklet_setup(&np->tx_tasklet, tx_poll);
ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE,
&ring_dma, GFP_KERNEL);
@@ -1054,10 +1056,9 @@ static void init_ring(struct net_device *dev)
}
}
-static void tx_poll (unsigned long data)
+static void tx_poll(struct tasklet_struct *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = netdev_priv(dev);
+ struct netdev_private *np = from_tasklet(np, t, tx_tasklet);
unsigned head = np->cur_task % TX_RING_SIZE;
struct netdev_desc *txdesc =
&np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE];
@@ -1312,10 +1313,10 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
return IRQ_RETVAL(handled);
}
-static void rx_poll(unsigned long data)
+static void rx_poll(struct tasklet_struct *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = netdev_priv(dev);
+ struct netdev_private *np = from_tasklet(np, t, rx_tasklet);
+ struct net_device *dev = np->ndev;
int entry = np->cur_rx % RX_RING_SIZE;
int boguscnt = np->budget;
void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 3143df9..7f87b0f 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -507,12 +507,12 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dnet *bp = netdev_priv(dev);
- u32 tx_status, irq_enable;
+ u32 irq_enable;
unsigned int i, tx_cmd, wrsz;
unsigned long flags;
unsigned int *bufp;
- tx_status = dnet_readl(bp, TX_STATUS);
+ dnet_readl(bp, TX_STATUS);
pr_debug("start_xmit: len %u head %p data %p\n",
skb->len, skb->head, skb->data);
@@ -520,7 +520,7 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
spin_lock_irqsave(&bp->lock, flags);
- tx_status = dnet_readl(bp, TX_STATUS);
+ dnet_readl(bp, TX_STATUS);
bufp = (unsigned int *)(((unsigned long) skb->data) & ~0x3UL);
wrsz = (u32) skb->len + 3;
@@ -542,7 +542,7 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (dnet_readl(bp, TX_FIFO_WCNT) > DNET_FIFO_TX_DATA_AF_TH) {
netif_stop_queue(dev);
- tx_status = dnet_readl(bp, INTR_SRC);
+ dnet_readl(bp, INTR_SRC);
irq_enable = dnet_readl(bp, INTR_ENB);
irq_enable |= DNET_INTR_ENB_TX_FIFOAE;
dnet_writel(bp, irq_enable, INTR_ENB);
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index a0c1f44..0405a39 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -520,13 +520,12 @@ static void fec_time_keep(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct fec_enet_private *fep = container_of(dwork, struct fec_enet_private, time_keep);
- u64 ns;
unsigned long flags;
mutex_lock(&fep->ptp_clk_mutex);
if (fep->ptp_clk_on) {
spin_lock_irqsave(&fep->tmreg_lock, flags);
- ns = timecounter_read(&fep->tc);
+ timecounter_read(&fep->tc);
spin_unlock_irqrestore(&fep->tmreg_lock, flags);
}
mutex_unlock(&fep->ptp_clk_mutex);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index b30733f..0501856 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -370,9 +370,9 @@ static void eq_irq_work(struct work_struct *work)
* ceq_tasklet - the tasklet of the EQ that received the event
* @ceq_data: the eq
**/
-static void ceq_tasklet(unsigned long ceq_data)
+static void ceq_tasklet(struct tasklet_struct *t)
{
- struct hinic_eq *ceq = (struct hinic_eq *)ceq_data;
+ struct hinic_eq *ceq = from_tasklet(ceq, t, ceq_tasklet);
eq_irq_handler(ceq);
}
@@ -782,8 +782,7 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
INIT_WORK(&aeq_work->work, eq_irq_work);
} else if (type == HINIC_CEQ) {
- tasklet_init(&eq->ceq_tasklet, ceq_tasklet,
- (unsigned long)eq);
+ tasklet_setup(&eq->ceq_tasklet, ceq_tasklet);
}
/* set the attributes of the msix entry */
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 797c55a..2c63e3a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -929,11 +929,16 @@ static void netdev_features_init(struct net_device *netdev)
netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
NETIF_F_RXCSUM | NETIF_F_LRO |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM;
netdev->vlan_features = netdev->hw_features;
netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ netdev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SCTP_CRC |
+ NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_UDP_TUNNEL;
}
static void hinic_refresh_nic_cfg(struct hinic_dev *nic_dev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index a97498e..502f00e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -357,6 +357,7 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
enum hinic_l4_offload_type l4_offload;
u32 offset, l4_len, network_hdr_len;
enum hinic_l3_offload_type l3_type;
+ u32 tunnel_type = NOT_TUNNEL;
union hinic_l3 ip;
union hinic_l4 l4;
u8 l4_proto;
@@ -367,27 +368,55 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
if (skb->encapsulation) {
u32 l4_tunnel_len;
+ tunnel_type = TUNNEL_UDP_NO_CSUM;
ip.hdr = skb_network_header(skb);
- if (ip.v4->version == 4)
+ if (ip.v4->version == 4) {
l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
- else if (ip.v4->version == 6)
+ l4_proto = ip.v4->protocol;
+ } else if (ip.v4->version == 6) {
+ unsigned char *exthdr;
+ __be16 frag_off;
l3_type = IPV6_PKT;
- else
+ tunnel_type = TUNNEL_UDP_CSUM;
+ exthdr = ip.hdr + sizeof(*ip.v6);
+ l4_proto = ip.v6->nexthdr;
+ l4.hdr = skb_transport_header(skb);
+ if (l4.hdr != exthdr)
+ ipv6_skip_exthdr(skb, exthdr - skb->data,
+ &l4_proto, &frag_off);
+ } else {
l3_type = L3TYPE_UNKNOWN;
+ l4_proto = IPPROTO_RAW;
+ }
hinic_task_set_outter_l3(task, l3_type,
skb_network_header_len(skb));
- l4_tunnel_len = skb_inner_network_offset(skb) -
- skb_transport_offset(skb);
+ switch (l4_proto) {
+ case IPPROTO_UDP:
+ l4_tunnel_len = skb_inner_network_offset(skb) -
+ skb_transport_offset(skb);
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_inner_transport_header(skb);
+ network_hdr_len = skb_inner_network_header_len(skb);
+ break;
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ tunnel_type = NOT_TUNNEL;
+ l4_tunnel_len = 0;
- hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
- l4_tunnel_len);
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+ network_hdr_len = skb_network_header_len(skb);
+ break;
+ default:
+ /* Unsupported tunnel packet, disable csum offload */
+ skb_checksum_help(skb);
+ return 0;
+ }
- ip.hdr = skb_inner_network_header(skb);
- l4.hdr = skb_inner_transport_header(skb);
- network_hdr_len = skb_inner_network_header_len(skb);
+ hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
} else {
ip.hdr = skb_network_header(skb);
l4.hdr = skb_transport_header(skb);
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 3153d62..c2e7404 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -1212,9 +1212,9 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe)
}
}
-static void ehea_neq_tasklet(unsigned long data)
+static void ehea_neq_tasklet(struct tasklet_struct *t)
{
- struct ehea_adapter *adapter = (struct ehea_adapter *)data;
+ struct ehea_adapter *adapter = from_tasklet(adapter, t, neq_tasklet);
struct ehea_eqe *eqe;
u64 event_mask;
@@ -3417,8 +3417,7 @@ static int ehea_probe_adapter(struct platform_device *dev)
goto out_free_ad;
}
- tasklet_init(&adapter->neq_tasklet, ehea_neq_tasklet,
- (unsigned long)adapter);
+ tasklet_setup(&adapter->neq_tasklet, ehea_neq_tasklet);
ret = ehea_create_device_sysfs(dev);
if (ret)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 6b619c1..e2a3c4b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4842,9 +4842,9 @@ static irqreturn_t ibmvnic_interrupt(int irq, void *instance)
return IRQ_HANDLED;
}
-static void ibmvnic_tasklet(void *data)
+static void ibmvnic_tasklet(struct tasklet_struct *t)
{
- struct ibmvnic_adapter *adapter = data;
+ struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet);
struct ibmvnic_crq_queue *queue = &adapter->crq;
union ibmvnic_crq *crq;
unsigned long flags;
@@ -4979,8 +4979,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
retrc = 0;
- tasklet_init(&adapter->tasklet, (void *)ibmvnic_tasklet,
- (unsigned long)adapter);
+ tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet);
netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq);
snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x",
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index ddc7576..e9efe07 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1187,9 +1187,9 @@ jme_shutdown_nic(struct jme_adapter *jme)
}
static void
-jme_pcc_tasklet(unsigned long arg)
+jme_pcc_tasklet(struct tasklet_struct *t)
{
- struct jme_adapter *jme = (struct jme_adapter *)arg;
+ struct jme_adapter *jme = from_tasklet(jme, t, pcc_task);
struct net_device *netdev = jme->dev;
if (unlikely(test_bit(JME_FLAG_SHUTDOWN, &jme->flags))) {
@@ -1265,10 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme)
jwrite32f(jme, JME_APMC, apmc);
}
-static void
-jme_link_change_tasklet(unsigned long arg)
+static void jme_link_change_tasklet(struct tasklet_struct *t)
{
- struct jme_adapter *jme = (struct jme_adapter *)arg;
+ struct jme_adapter *jme = from_tasklet(jme, t, linkch_task);
struct net_device *netdev = jme->dev;
int rc;
@@ -1345,9 +1344,9 @@ jme_link_change_tasklet(unsigned long arg)
}
static void
-jme_rx_clean_tasklet(unsigned long arg)
+jme_rx_clean_tasklet(struct tasklet_struct *t)
{
- struct jme_adapter *jme = (struct jme_adapter *)arg;
+ struct jme_adapter *jme = from_tasklet(jme, t, rxclean_task);
struct dynpcc_info *dpi = &(jme->dpi);
jme_process_receive(jme, jme->rx_ring_size);
@@ -1380,9 +1379,9 @@ jme_poll(JME_NAPI_HOLDER(holder), JME_NAPI_WEIGHT(budget))
}
static void
-jme_rx_empty_tasklet(unsigned long arg)
+jme_rx_empty_tasklet(struct tasklet_struct *t)
{
- struct jme_adapter *jme = (struct jme_adapter *)arg;
+ struct jme_adapter *jme = from_tasklet(jme, t, rxempty_task);
if (unlikely(atomic_read(&jme->link_changing) != 1))
return;
@@ -1392,7 +1391,7 @@ jme_rx_empty_tasklet(unsigned long arg)
netif_info(jme, rx_status, jme->dev, "RX Queue Full!\n");
- jme_rx_clean_tasklet(arg);
+ jme_rx_clean_tasklet(&jme->rxclean_task);
while (atomic_read(&jme->rx_empty) > 0) {
atomic_dec(&jme->rx_empty);
@@ -1416,10 +1415,9 @@ jme_wake_queue_if_stopped(struct jme_adapter *jme)
}
-static void
-jme_tx_clean_tasklet(unsigned long arg)
+static void jme_tx_clean_tasklet(struct tasklet_struct *t)
{
- struct jme_adapter *jme = (struct jme_adapter *)arg;
+ struct jme_adapter *jme = from_tasklet(jme, t, txclean_task);
struct jme_ring *txring = &(jme->txring[0]);
struct txdesc *txdesc = txring->desc;
struct jme_buffer_info *txbi = txring->bufinf, *ctxbi, *ttxbi;
@@ -1834,14 +1832,10 @@ jme_open(struct net_device *netdev)
jme_clear_pm_disable_wol(jme);
JME_NAPI_ENABLE(jme);
- tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
- (unsigned long) jme);
- tasklet_init(&jme->txclean_task, jme_tx_clean_tasklet,
- (unsigned long) jme);
- tasklet_init(&jme->rxclean_task, jme_rx_clean_tasklet,
- (unsigned long) jme);
- tasklet_init(&jme->rxempty_task, jme_rx_empty_tasklet,
- (unsigned long) jme);
+ tasklet_setup(&jme->linkch_task, jme_link_change_tasklet);
+ tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet);
+ tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet);
+ tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet);
rc = jme_request_irq(jme);
if (rc)
@@ -3040,9 +3034,7 @@ jme_init_one(struct pci_dev *pdev,
atomic_set(&jme->tx_cleaning, 1);
atomic_set(&jme->rx_empty, 1);
- tasklet_init(&jme->pcc_task,
- jme_pcc_tasklet,
- (unsigned long) jme);
+ tasklet_setup(&jme->pcc_task, jme_pcc_tasklet);
jme->dpi.cur = PCC_P1;
jme->reg_ghc = 0;
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index faac94b..d1e4d42 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1187,11 +1187,10 @@ static int pxa168_eth_stop(struct net_device *dev)
static int pxa168_eth_change_mtu(struct net_device *dev, int mtu)
{
- int retval;
struct pxa168_eth_private *pep = netdev_priv(dev);
dev->mtu = mtu;
- retval = set_port_config_ext(pep);
+ set_port_config_ext(pep);
if (!netif_running(dev))
return 0;
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 6a93035..8a9c0f4 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3338,9 +3338,9 @@ static void skge_error_irq(struct skge_hw *hw)
* because accessing phy registers requires spin wait which might
* cause excess interrupt latency.
*/
-static void skge_extirq(unsigned long arg)
+static void skge_extirq(struct tasklet_struct *t)
{
- struct skge_hw *hw = (struct skge_hw *) arg;
+ struct skge_hw *hw = from_tasklet(hw, t, phy_task);
int port;
for (port = 0; port < hw->ports; port++) {
@@ -3927,7 +3927,7 @@ static int skge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->pdev = pdev;
spin_lock_init(&hw->hw_lock);
spin_lock_init(&hw->phy_lock);
- tasklet_init(&hw->phy_task, skge_extirq, (unsigned long) hw);
+ tasklet_setup(&hw->phy_task, skge_extirq);
hw->regs = ioremap(pci_resource_start(pdev, 0), 0x4000);
if (!hw->regs) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 65f8a4b..3b8576b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -55,11 +55,11 @@
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
-void mlx4_cq_tasklet_cb(unsigned long data)
+void mlx4_cq_tasklet_cb(struct tasklet_struct *t)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
- struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
+ struct mlx4_eq_tasklet *ctx = from_tasklet(ctx, t, task);
struct mlx4_cq *mcq, *temp;
spin_lock_irqsave(&ctx->lock, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index ae305c2..9e48509 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1057,8 +1057,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
+ tasklet_setup(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 527b52e..64bed7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1217,7 +1217,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
u16 op, unsigned long timeout);
-void mlx4_cq_tasklet_cb(unsigned long data);
+void mlx4_cq_tasklet_cb(struct tasklet_struct *t);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 8379b24..df3e4938 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -42,11 +42,11 @@
#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
-void mlx5_cq_tasklet_cb(unsigned long data)
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
- struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task);
struct mlx5_core_cq *mcq;
struct mlx5_core_cq *temp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 31ef9f8..ec38405 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -788,8 +788,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
- tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
- (unsigned long)&eq->tasklet_ctx);
+ tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
eq->irq_nb.notifier_call = mlx5_eq_comp_int;
param = (struct mlx5_eq_param) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 831d2c3..9f6d97e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -388,9 +388,9 @@ static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
mlx5_fpga_conn_arm_cq(conn);
}
-static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t)
{
- struct mlx5_fpga_conn *conn = (void *)data;
+ struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet);
if (unlikely(!conn->qp.active))
return;
@@ -478,8 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
conn->cq.mcq.irqn = irqn;
conn->cq.mcq.uar = fdev->conn_res.uar;
- tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
- (unsigned long)conn);
+ tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 4aaca74..078a7cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -77,7 +77,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn);
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev);
-void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cq_tasklet_cb(struct tasklet_struct *t);
struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 1c64b03..641cdd8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -620,9 +620,9 @@ static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
return elem;
}
-static void mlxsw_pci_cq_tasklet(unsigned long data)
+static void mlxsw_pci_cq_tasklet(struct tasklet_struct *t)
{
- struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+ struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet);
struct mlxsw_pci *mlxsw_pci = q->pci;
char *cqe;
int items = 0;
@@ -733,9 +733,9 @@ static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
return elem;
}
-static void mlxsw_pci_eq_tasklet(unsigned long data)
+static void mlxsw_pci_eq_tasklet(struct tasklet_struct *t)
{
- struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+ struct mlxsw_pci_queue *q = from_tasklet(q, t, tasklet);
struct mlxsw_pci *mlxsw_pci = q->pci;
u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci);
unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)];
@@ -792,7 +792,7 @@ struct mlxsw_pci_queue_ops {
struct mlxsw_pci_queue *q);
void (*fini)(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue *q);
- void (*tasklet)(unsigned long data);
+ void (*tasklet)(struct tasklet_struct *t);
u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
u16 elem_count;
@@ -855,7 +855,7 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
q->pci = mlxsw_pci;
if (q_ops->tasklet)
- tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q);
+ tasklet_setup(&q->tasklet, q_ops->tasklet);
mem_item->size = MLXSW_PCI_AQ_SIZE;
mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index f3f6dfe..caa251d 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -587,10 +587,10 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
return err;
}
-static void ks8842_rx_frame_dma_tasklet(unsigned long arg)
+static void ks8842_rx_frame_dma_tasklet(struct tasklet_struct *t)
{
- struct net_device *netdev = (struct net_device *)arg;
- struct ks8842_adapter *adapter = netdev_priv(netdev);
+ struct ks8842_adapter *adapter = from_tasklet(adapter, t, dma_rx.tasklet);
+ struct net_device *netdev = adapter->netdev;
struct ks8842_rx_dma_ctl *ctl = &adapter->dma_rx;
struct sk_buff *skb = ctl->skb;
dma_addr_t addr = sg_dma_address(&ctl->sg);
@@ -720,10 +720,10 @@ static void ks8842_handle_rx_overrun(struct net_device *netdev,
netdev->stats.rx_fifo_errors++;
}
-static void ks8842_tasklet(unsigned long arg)
+static void ks8842_tasklet(struct tasklet_struct *t)
{
- struct net_device *netdev = (struct net_device *)arg;
- struct ks8842_adapter *adapter = netdev_priv(netdev);
+ struct ks8842_adapter *adapter = from_tasklet(adapter, t, tasklet);
+ struct net_device *netdev = adapter->netdev;
u16 isr;
unsigned long flags;
u16 entry_bank;
@@ -953,8 +953,7 @@ static int ks8842_alloc_dma_bufs(struct net_device *netdev)
goto err;
}
- tasklet_init(&rx_ctl->tasklet, ks8842_rx_frame_dma_tasklet,
- (unsigned long)netdev);
+ tasklet_setup(&rx_ctl->tasklet, ks8842_rx_frame_dma_tasklet);
return 0;
err:
@@ -1173,7 +1172,7 @@ static int ks8842_probe(struct platform_device *pdev)
adapter->dma_tx.channel = -1;
}
- tasklet_init(&adapter->tasklet, ks8842_tasklet, (unsigned long)netdev);
+ tasklet_setup(&adapter->tasklet, ks8842_tasklet);
spin_lock_init(&adapter->lock);
netdev->netdev_ops = &ks8842_netdev_ops;
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index bb646b6..5130507 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -5159,9 +5159,9 @@ static int dev_rcv_special(struct dev_info *hw_priv)
return received;
}
-static void rx_proc_task(unsigned long data)
+static void rx_proc_task(struct tasklet_struct *t)
{
- struct dev_info *hw_priv = (struct dev_info *) data;
+ struct dev_info *hw_priv = from_tasklet(hw_priv, t, rx_tasklet);
struct ksz_hw *hw = &hw_priv->hw;
if (!hw->enabled)
@@ -5181,9 +5181,9 @@ static void rx_proc_task(unsigned long data)
}
}
-static void tx_proc_task(unsigned long data)
+static void tx_proc_task(struct tasklet_struct *t)
{
- struct dev_info *hw_priv = (struct dev_info *) data;
+ struct dev_info *hw_priv = from_tasklet(hw_priv, t, tx_tasklet);
struct ksz_hw *hw = &hw_priv->hw;
hw_ack_intr(hw, KS884X_INT_TX_MASK);
@@ -5436,10 +5436,8 @@ static int prepare_hardware(struct net_device *dev)
rc = request_irq(dev->irq, netdev_intr, IRQF_SHARED, dev->name, dev);
if (rc)
return rc;
- tasklet_init(&hw_priv->rx_tasklet, rx_proc_task,
- (unsigned long) hw_priv);
- tasklet_init(&hw_priv->tx_tasklet, tx_proc_task,
- (unsigned long) hw_priv);
+ tasklet_setup(&hw_priv->rx_tasklet, rx_proc_task);
+ tasklet_setup(&hw_priv->tx_tasklet, tx_proc_task);
hw->promiscuous = 0;
hw->all_multi = 0;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index d171b51..f59f96f 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -923,10 +923,10 @@ static void rx_irq(struct net_device *ndev)
spin_unlock_irqrestore(&info->lock, flags);
}
-static void rx_action(unsigned long _dev)
+static void rx_action(struct tasklet_struct *t)
{
- struct net_device *ndev = (void *)_dev;
- struct ns83820 *dev = PRIV(ndev);
+ struct ns83820 *dev = from_tasklet(dev, t, rx_tasklet);
+ struct net_device *ndev = dev->ndev;
rx_irq(ndev);
writel(ihr, dev->base + IHR);
@@ -1924,7 +1924,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
SET_NETDEV_DEV(ndev, &pci_dev->dev);
INIT_WORK(&dev->tq_refill, queue_refill);
- tasklet_init(&dev->rx_tasklet, rx_action, (unsigned long)ndev);
+ tasklet_setup(&dev->rx_tasklet, rx_action);
err = pci_enable_device(pci_dev);
if (err) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 21ea226..b150da43 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2287,9 +2287,9 @@ static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
return budget;
}
-static void nfp_ctrl_poll(unsigned long arg)
+static void nfp_ctrl_poll(struct tasklet_struct *t)
{
- struct nfp_net_r_vector *r_vec = (void *)arg;
+ struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet);
spin_lock(&r_vec->lock);
nfp_net_tx_complete(r_vec->tx_ring, 0);
@@ -2337,8 +2337,7 @@ static void nfp_net_vecs_init(struct nfp_net *nn)
__skb_queue_head_init(&r_vec->queue);
spin_lock_init(&r_vec->lock);
- tasklet_init(&r_vec->tasklet, nfp_ctrl_poll,
- (unsigned long)r_vec);
+ tasklet_setup(&r_vec->tasklet, nfp_ctrl_poll);
tasklet_disable(&r_vec->tasklet);
}
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 4075f5e..a6861df 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -787,9 +787,9 @@ static irqreturn_t nixge_rx_irq(int irq, void *_ndev)
return IRQ_HANDLED;
}
-static void nixge_dma_err_handler(unsigned long data)
+static void nixge_dma_err_handler(struct tasklet_struct *t)
{
- struct nixge_priv *lp = (struct nixge_priv *)data;
+ struct nixge_priv *lp = from_tasklet(lp, t, dma_err_tasklet);
struct nixge_hw_dma_bd *cur_p;
struct nixge_tx_skb *tx_skb;
u32 cr, i;
@@ -879,8 +879,7 @@ static int nixge_open(struct net_device *ndev)
phy_start(phy);
/* Enable tasklets for Axi DMA error handling */
- tasklet_init(&priv->dma_err_tasklet, nixge_dma_err_handler,
- (unsigned long)priv);
+ tasklet_setup(&priv->dma_err_tasklet, nixge_dma_err_handler);
napi_enable(&priv->napi);
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index f34b25a..a20cb8a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -572,7 +572,7 @@ struct qed_hwfn {
struct qed_consq *p_consq;
/* Slow-Path definitions */
- struct tasklet_struct *sp_dpc;
+ struct tasklet_struct sp_dpc;
bool b_sp_dpc_enabled;
struct qed_ptt *p_main_ptt;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index f8c5a86..578935f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1216,9 +1216,9 @@ static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
barrier();
}
-void qed_int_sp_dpc(unsigned long hwfn_cookie)
+void qed_int_sp_dpc(struct tasklet_struct *t)
{
- struct qed_hwfn *p_hwfn = (struct qed_hwfn *)hwfn_cookie;
+ struct qed_hwfn *p_hwfn = from_tasklet(p_hwfn, t, sp_dpc);
struct qed_pi_info *pi_info = NULL;
struct qed_sb_attn_info *sb_attn;
struct qed_sb_info *sb_info;
@@ -2285,34 +2285,14 @@ u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
static void qed_int_sp_dpc_setup(struct qed_hwfn *p_hwfn)
{
- tasklet_init(p_hwfn->sp_dpc,
- qed_int_sp_dpc, (unsigned long)p_hwfn);
+ tasklet_setup(&p_hwfn->sp_dpc, qed_int_sp_dpc);
p_hwfn->b_sp_dpc_enabled = true;
}
-static int qed_int_sp_dpc_alloc(struct qed_hwfn *p_hwfn)
-{
- p_hwfn->sp_dpc = kmalloc(sizeof(*p_hwfn->sp_dpc), GFP_KERNEL);
- if (!p_hwfn->sp_dpc)
- return -ENOMEM;
-
- return 0;
-}
-
-static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn)
-{
- kfree(p_hwfn->sp_dpc);
- p_hwfn->sp_dpc = NULL;
-}
-
int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
int rc = 0;
- rc = qed_int_sp_dpc_alloc(p_hwfn);
- if (rc)
- return rc;
-
rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt);
if (rc)
return rc;
@@ -2326,7 +2306,6 @@ void qed_int_free(struct qed_hwfn *p_hwfn)
{
qed_int_sp_sb_free(p_hwfn);
qed_int_sb_attn_free(p_hwfn);
- qed_int_sp_dpc_free(p_hwfn);
}
void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 86809d7..c5550e9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -140,7 +140,7 @@ int qed_int_sb_release(struct qed_hwfn *p_hwfn,
* @param p_hwfn - pointer to hwfn
*
*/
-void qed_int_sp_dpc(unsigned long hwfn_cookie);
+void qed_int_sp_dpc(struct tasklet_struct *t);
/**
* @brief qed_int_get_num_sbs - get the number of status
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 5b149ce..f2ab2b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -734,7 +734,7 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
/* Slowpath interrupt */
if (unlikely(status & 0x1)) {
- tasklet_schedule(hwfn->sp_dpc);
+ tasklet_schedule(&hwfn->sp_dpc);
status &= ~0x1;
rc = IRQ_HANDLED;
}
@@ -780,7 +780,7 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
id, cdev->pdev->bus->number,
PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
rc = request_irq(cdev->int_params.msix_table[id].vector,
- qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
+ qed_msix_sp_int, 0, hwfn->name, &hwfn->sp_dpc);
} else {
unsigned long flags = 0;
@@ -812,8 +812,8 @@ static void qed_slowpath_tasklet_flush(struct qed_hwfn *p_hwfn)
* enable function makes this sequence a flush-like operation.
*/
if (p_hwfn->b_sp_dpc_enabled) {
- tasklet_disable(p_hwfn->sp_dpc);
- tasklet_enable(p_hwfn->sp_dpc);
+ tasklet_disable(&p_hwfn->sp_dpc);
+ tasklet_enable(&p_hwfn->sp_dpc);
}
}
@@ -842,7 +842,7 @@ static void qed_slowpath_irq_free(struct qed_dev *cdev)
break;
synchronize_irq(cdev->int_params.msix_table[i].vector);
free_irq(cdev->int_params.msix_table[i].vector,
- cdev->hwfns[i].sp_dpc);
+ &cdev->hwfns[i].sp_dpc);
}
} else {
if (QED_LEADING_HWFN(cdev)->b_int_requested)
@@ -861,11 +861,11 @@ static int qed_nic_stop(struct qed_dev *cdev)
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
if (p_hwfn->b_sp_dpc_enabled) {
- tasklet_disable(p_hwfn->sp_dpc);
+ tasklet_disable(&p_hwfn->sp_dpc);
p_hwfn->b_sp_dpc_enabled = false;
DP_VERBOSE(cdev, NETIF_MSG_IFDOWN,
"Disabled sp tasklet [hwfn %d] at %p\n",
- i, p_hwfn->sp_dpc);
+ i, &p_hwfn->sp_dpc);
}
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 29b9c72..fc49088 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -658,11 +658,10 @@ int qlcnic_83xx_cam_lock(struct qlcnic_adapter *adapter)
void qlcnic_83xx_cam_unlock(struct qlcnic_adapter *adapter)
{
void __iomem *addr;
- u32 val;
struct qlcnic_hardware_context *ahw = adapter->ahw;
addr = ahw->pci_base0 + QLC_83XX_SEM_UNLOCK_FUNC(ahw->pci_func);
- val = readl(addr);
+ readl(addr);
}
void qlcnic_83xx_read_crb(struct qlcnic_adapter *adapter, char *buf,
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index 91b60c5..1fd08a0 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -301,6 +301,7 @@ struct sc92031_priv {
/* for dev->get_stats */
long rx_value;
+ struct net_device *ndev;
};
/* I don't know which registers can be safely read; however, I can guess
@@ -829,10 +830,10 @@ static void _sc92031_link_tasklet(struct net_device *dev)
}
}
-static void sc92031_tasklet(unsigned long data)
+static void sc92031_tasklet(struct tasklet_struct *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct sc92031_priv *priv = netdev_priv(dev);
+ struct sc92031_priv *priv = from_tasklet(priv, t, tasklet);
+ struct net_device *dev = priv->ndev;
void __iomem *port_base = priv->port_base;
u32 intr_status, intr_mask;
@@ -1108,7 +1109,7 @@ static void sc92031_poll_controller(struct net_device *dev)
disable_irq(irq);
if (sc92031_interrupt(irq, dev) != IRQ_NONE)
- sc92031_tasklet((unsigned long)dev);
+ sc92031_tasklet(&priv->tasklet);
enable_irq(irq);
}
#endif
@@ -1443,10 +1444,11 @@ static int sc92031_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->ethtool_ops = &sc92031_ethtool_ops;
priv = netdev_priv(dev);
+ priv->ndev = dev;
spin_lock_init(&priv->lock);
priv->port_base = port_base;
priv->pdev = pdev;
- tasklet_init(&priv->tasklet, sc92031_tasklet, (unsigned long)dev);
+ tasklet_setup(&priv->tasklet, sc92031_tasklet);
/* Fudge tasklet count so the call to sc92031_enable_interrupts at
* sc92031_open will work correctly */
tasklet_disable_nosync(&priv->tasklet);
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 4492715..f6b73af 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -535,10 +535,10 @@ static inline void smc_rcv(struct net_device *dev)
/*
* This is called to actually send a packet to the chip.
*/
-static void smc_hardware_send_pkt(unsigned long data)
+static void smc_hardware_send_pkt(struct tasklet_struct *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct smc_local *lp = netdev_priv(dev);
+ struct smc_local *lp = from_tasklet(lp, t, tx_task);
+ struct net_device *dev = lp->dev;
void __iomem *ioaddr = lp->base;
struct sk_buff *skb;
unsigned int packet_no, len;
@@ -688,7 +688,7 @@ smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
* Allocation succeeded: push packet to the chip's own memory
* immediately.
*/
- smc_hardware_send_pkt((unsigned long)dev);
+ smc_hardware_send_pkt(&lp->tx_task);
}
return NETDEV_TX_OK;
@@ -1964,7 +1964,7 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr,
dev->netdev_ops = &smc_netdev_ops;
dev->ethtool_ops = &smc_ethtool_ops;
- tasklet_init(&lp->tx_task, smc_hardware_send_pkt, (unsigned long)dev);
+ tasklet_setup(&lp->tx_task, smc_hardware_send_pkt);
INIT_WORK(&lp->phy_configure, smc_phy_configure);
lp->dev = dev;
lp->mii.phy_id_mask = 0x1f;
diff --git a/include/net/sock.h b/include/net/sock.h
index 7dd3051..eaa5cac 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -845,7 +845,6 @@ enum sock_flags {
SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
- SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
SOCK_MEMALLOC, /* VM depends on this socket for swapping */
SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */
SOCK_FASYNC, /* fasync() active */
@@ -1526,7 +1525,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e85d564..852f0d7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1414,6 +1414,8 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
* SO_RCVLOWAT constraint, since we are receiving skbs with too small
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 3b67d59..e70c901 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -579,7 +579,7 @@ TRACE_EVENT(rxrpc_client,
__entry->channel = channel;
__entry->usage = conn ? atomic_read(&conn->usage) : -2;
__entry->op = op;
- __entry->cid = conn->proto.cid;
+ __entry->cid = conn ? conn->proto.cid : 0;
),
TP_printk("C=%08x h=%2d %s i=%08x u=%d",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 57a5688..d3781b6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1527,7 +1527,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-static void tcp_cleanup_rbuf(struct sock *sk, int copied)
+void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3658ad8..50834e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5332,12 +5332,6 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
return true;
}
-/* When incoming ACK allowed to free some skb from write_queue,
- * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
- * on the exit from tcp input handler.
- *
- * PROBLEM: sndbuf expansion does not work well with largesend.
- */
static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5352,16 +5346,13 @@ static void tcp_new_space(struct sock *sk)
static void tcp_check_space(struct sock *sk)
{
- if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
- sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
- /* pairs with tcp_poll() */
- smp_mb();
- if (sk->sk_socket &&
- test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- tcp_new_space(sk);
- if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
- tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
- }
+ /* pairs with tcp_poll() */
+ smp_mb();
+ if (sk->sk_socket &&
+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ tcp_new_space(sk);
+ if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ab79d36..386978d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1682,7 +1682,6 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
skb->truesize -= delta_truesize;
sk_wmem_queued_add(sk, -delta_truesize);
sk_mem_uncharge(sk, delta_truesize);
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
}
/* Any change of skb->len requires recalculation of tso factor. */
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 0a6a15f..056986c 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -22,6 +22,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
+ SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
+ SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
+ SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
+ SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index d7de340..937a177 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -15,6 +15,11 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
+ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
+ MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
+ MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */
+ MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2c208d2..6947f4f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -23,8 +23,6 @@ static int pm_nl_pernet_id;
struct mptcp_pm_addr_entry {
struct list_head list;
- unsigned int flags;
- int ifindex;
struct mptcp_addr_info addr;
struct rcu_head rcu;
};
@@ -119,7 +117,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
rcu_read_lock();
spin_lock_bh(&msk->join_list_lock);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
+ if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
/* avoid any address already in use by subflows and
@@ -150,7 +148,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
* can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+ if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
if (i++ == pos) {
ret = entry;
@@ -210,8 +208,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
msk->pm.subflows++;
check_work_pending(msk);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, local->ifindex,
- &local->addr, &remote);
+ __mptcp_subflow_connect(sk, &local->addr, &remote);
spin_lock_bh(&msk->pm.lock);
return;
}
@@ -257,13 +254,13 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
local.family = remote.family;
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect((struct sock *)msk, 0, &local, &remote);
+ __mptcp_subflow_connect((struct sock *)msk, &local, &remote);
spin_lock_bh(&msk->pm.lock);
}
static bool address_use_port(struct mptcp_pm_addr_entry *entry)
{
- return (entry->flags &
+ return (entry->addr.flags &
(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
MPTCP_PM_ADDR_FLAG_SIGNAL;
}
@@ -293,9 +290,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
goto out;
}
- if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
pernet->add_addr_signal_max++;
- if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
pernet->local_addr_max++;
entry->addr.id = pernet->next_id++;
@@ -345,8 +342,9 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (!entry)
return -ENOMEM;
- entry->flags = 0;
entry->addr = skc_local;
+ entry->addr.ifindex = 0;
+ entry->addr.flags = 0;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
kfree(entry);
@@ -460,14 +458,17 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]);
skip_family:
- if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX])
- entry->ifindex = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
+ if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
+ u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
+
+ entry->addr.ifindex = val;
+ }
if (tb[MPTCP_PM_ADDR_ATTR_ID])
entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
return 0;
}
@@ -535,9 +536,9 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
ret = -EINVAL;
goto out;
}
- if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
pernet->add_addr_signal_max--;
- if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
pernet->local_addr_max--;
pernet->addrs--;
@@ -593,10 +594,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
goto nla_put_failure;
- if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags))
+ if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags))
goto nla_put_failure;
- if (entry->ifindex &&
- nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
+ if (entry->addr.ifindex &&
+ nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex))
goto nla_put_failure;
if (addr->family == AF_INET &&
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6831962..ef0dd2f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -32,6 +32,8 @@ struct mptcp6_sock {
#endif
struct mptcp_skb_cb {
+ u64 map_seq;
+ u64 end_seq;
u32 offset;
};
@@ -110,10 +112,159 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
-static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
- struct sk_buff *skb,
- unsigned int offset, size_t copy_len)
+static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
+ sk_drops_add(sk, skb);
+ __kfree_skb(skb);
+}
+
+static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
+ struct sk_buff *from)
+{
+ bool fragstolen;
+ int delta;
+
+ if (MPTCP_SKB_CB(from)->offset ||
+ !skb_try_coalesce(to, from, &fragstolen, &delta))
+ return false;
+
+ pr_debug("colesced seq %llx into %llx new len %d new end seq %llx",
+ MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq,
+ to->len, MPTCP_SKB_CB(from)->end_seq);
+ MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
+ kfree_skb_partial(from, fragstolen);
+ atomic_add(delta, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, delta);
+ return true;
+}
+
+static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
+ struct sk_buff *from)
+{
+ if (MPTCP_SKB_CB(from)->map_seq != MPTCP_SKB_CB(to)->end_seq)
+ return false;
+
+ return mptcp_try_coalesce((struct sock *)msk, to, from);
+}
+
+/* "inspired" by tcp_data_queue_ofo(), main differences:
+ * - use mptcp seqs
+ * - don't cope with sacks
+ */
+static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct rb_node **p, *parent;
+ u64 seq, end_seq, max_seq;
+ struct sk_buff *skb1;
+
+ seq = MPTCP_SKB_CB(skb)->map_seq;
+ end_seq = MPTCP_SKB_CB(skb)->end_seq;
+ max_seq = tcp_space(sk);
+ max_seq = max_seq > 0 ? max_seq + msk->ack_seq : msk->ack_seq;
+
+ pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
+ RB_EMPTY_ROOT(&msk->out_of_order_queue));
+ if (after64(seq, max_seq)) {
+ /* out of window */
+ mptcp_drop(sk, skb);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
+ return;
+ }
+
+ p = &msk->out_of_order_queue.rb_node;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUE);
+ if (RB_EMPTY_ROOT(&msk->out_of_order_queue)) {
+ rb_link_node(&skb->rbnode, NULL, p);
+ rb_insert_color(&skb->rbnode, &msk->out_of_order_queue);
+ msk->ooo_last_skb = skb;
+ goto end;
+ }
+
+ /* with 2 subflows, adding at end of ooo queue is quite likely
+ * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ */
+ if (mptcp_ooo_try_coalesce(msk, msk->ooo_last_skb, skb)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOMERGE);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUETAIL);
+ return;
+ }
+
+ /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+ if (!before64(seq, MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOQUEUETAIL);
+ parent = &msk->ooo_last_skb->rbnode;
+ p = &parent->rb_right;
+ goto insert;
+ }
+
+ /* Find place to insert this segment. Handle overlaps on the way. */
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ skb1 = rb_to_skb(parent);
+ if (before64(seq, MPTCP_SKB_CB(skb1)->map_seq)) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (before64(seq, MPTCP_SKB_CB(skb1)->end_seq)) {
+ if (!after64(end_seq, MPTCP_SKB_CB(skb1)->end_seq)) {
+ /* All the bits are present. Drop. */
+ mptcp_drop(sk, skb);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ return;
+ }
+ if (after64(seq, MPTCP_SKB_CB(skb1)->map_seq)) {
+ /* partial overlap:
+ * | skb |
+ * | skb1 |
+ * continue traversing
+ */
+ } else {
+ /* skb's seq == skb1's seq and skb covers skb1.
+ * Replace skb1 with skb.
+ */
+ rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ &msk->out_of_order_queue);
+ mptcp_drop(sk, skb1);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ goto merge_right;
+ }
+ } else if (mptcp_ooo_try_coalesce(msk, skb1, skb)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_OFOMERGE);
+ return;
+ }
+ p = &parent->rb_right;
+ }
+
+insert:
+ /* Insert segment into RB tree. */
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, &msk->out_of_order_queue);
+
+merge_right:
+ /* Remove other segments covered by skb. */
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
+ if (before64(end_seq, MPTCP_SKB_CB(skb1)->end_seq))
+ break;
+ rb_erase(&skb1->rbnode, &msk->out_of_order_queue);
+ mptcp_drop(sk, skb1);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ }
+ /* If there is no skb after us, we are the last_skb ! */
+ if (!skb1)
+ msk->ooo_last_skb = skb;
+
+end:
+ skb_condense(skb);
+ skb_set_owner_r(skb, sk);
+}
+
+static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
+ struct sk_buff *skb, unsigned int offset,
+ size_t copy_len)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
struct sk_buff *tail;
@@ -121,24 +272,36 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_ext_reset(skb);
skb_orphan(skb);
- msk->ack_seq += copy_len;
- tail = skb_peek_tail(&sk->sk_receive_queue);
- if (offset == 0 && tail) {
- bool fragstolen;
- int delta;
+ /* the skb map_seq accounts for the skb offset:
+ * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
+ * value
+ */
+ MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
+ MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
+ MPTCP_SKB_CB(skb)->offset = offset;
- if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
- kfree_skb_partial(skb, fragstolen);
- atomic_add(delta, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, delta);
- return;
- }
+ if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
+ /* in sequence */
+ msk->ack_seq += copy_len;
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ if (tail && mptcp_try_coalesce(sk, tail, skb))
+ return true;
+
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ return true;
+ } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
+ mptcp_data_queue_ofo(msk, skb);
+ return false;
}
- skb_set_owner_r(skb, sk);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- MPTCP_SKB_CB(skb)->offset = offset;
+ /* old data, keep it simple and drop the whole pkt, sender
+ * will retransmit as needed, if needed.
+ */
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ mptcp_drop(sk, skb);
+ return false;
}
static void mptcp_stop_timer(struct sock *sk)
@@ -149,27 +312,6 @@ static void mptcp_stop_timer(struct sock *sk)
mptcp_sk(sk)->timer_ival = 0;
}
-/* both sockets must be locked */
-static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
- struct sock *ssk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- u64 dsn = mptcp_subflow_get_mapped_dsn(subflow);
-
- /* revalidate data sequence number.
- *
- * mptcp_subflow_data_available() is usually called
- * without msk lock. Its unlikely (but possible)
- * that msk->ack_seq has been advanced since the last
- * call found in-sequence data.
- */
- if (likely(dsn == msk->ack_seq))
- return true;
-
- subflow->data_avail = 0;
- return mptcp_subflow_data_available(ssk);
-}
-
static void mptcp_check_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -313,11 +455,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct tcp_sock *tp;
bool done = false;
- if (!mptcp_subflow_dsn_valid(msk, ssk)) {
- *bytes = 0;
- return false;
- }
-
+ pr_debug("msk=%p ssk=%p", msk, ssk);
tp = tcp_sk(ssk);
do {
u32 map_remaining, offset;
@@ -355,9 +493,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (tp->urg_data)
done = true;
- __mptcp_move_skb(msk, ssk, skb, offset, len);
+ if (__mptcp_move_skb(msk, ssk, skb, offset, len))
+ moved += len;
seq += len;
- moved += len;
if (WARN_ON_ONCE(map_remaining < len))
break;
@@ -376,20 +514,56 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
} while (more_data_avail);
- *bytes = moved;
-
- /* If the moves have caught up with the DATA_FIN sequence number
- * it's time to ack the DATA_FIN and change socket state, but
- * this is not a good place to change state. Let the workqueue
- * do it.
- */
- if (mptcp_pending_data_fin(sk, NULL) &&
- schedule_work(&msk->work))
- sock_hold(sk);
+ *bytes += moved;
+ if (moved)
+ tcp_cleanup_rbuf(ssk, moved);
return done;
}
+static bool mptcp_ofo_queue(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct sk_buff *skb, *tail;
+ bool moved = false;
+ struct rb_node *p;
+ u64 end_seq;
+
+ p = rb_first(&msk->out_of_order_queue);
+ pr_debug("msk=%p empty=%d", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue));
+ while (p) {
+ skb = rb_to_skb(p);
+ if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq))
+ break;
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &msk->out_of_order_queue);
+
+ if (unlikely(!after64(MPTCP_SKB_CB(skb)->end_seq,
+ msk->ack_seq))) {
+ mptcp_drop(sk, skb);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
+ continue;
+ }
+
+ end_seq = MPTCP_SKB_CB(skb)->end_seq;
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ if (!tail || !mptcp_ooo_try_coalesce(msk, tail, skb)) {
+ int delta = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
+
+ /* skip overlapping data, if any */
+ pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d",
+ MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq,
+ delta);
+ MPTCP_SKB_CB(skb)->offset += delta;
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ }
+ msk->ack_seq = end_seq;
+ moved = true;
+ }
+ return moved;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -405,8 +579,19 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
return false;
/* must re-check after taking the lock */
- if (!READ_ONCE(sk->sk_lock.owned))
+ if (!READ_ONCE(sk->sk_lock.owned)) {
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
+ mptcp_ofo_queue(msk);
+
+ /* If the moves have caught up with the DATA_FIN sequence number
+ * it's time to ack the DATA_FIN and change socket state, but
+ * this is not a good place to change state. Let the workqueue
+ * do it.
+ */
+ if (mptcp_pending_data_fin(sk, NULL) &&
+ schedule_work(&msk->work))
+ sock_hold(sk);
+ }
spin_unlock_bh(&sk->sk_lock.slock);
@@ -415,9 +600,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool wake;
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ /* move_skbs_to_msk below can legitly clear the data_avail flag,
+ * but we will need later to properly woke the reader, cache its
+ * value
+ */
+ wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
+ if (wake)
+ set_bit(MPTCP_DATA_READY, &msk->flags);
if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) &&
move_skbs_to_msk(msk, ssk))
@@ -438,7 +631,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
move_skbs_to_msk(msk, ssk);
}
wake:
- sk->sk_data_ready(sk);
+ if (wake)
+ sk->sk_data_ready(sk);
}
static void __mptcp_flush_join_list(struct mptcp_sock *msk)
@@ -472,7 +666,7 @@ void mptcp_data_acked(struct sock *sk)
{
mptcp_reset_timer(sk);
- if ((!sk_stream_is_writeable(sk) ||
+ if ((!test_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags) ||
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
@@ -567,6 +761,20 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}
+static bool mptcp_is_writeable(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (!sk_stream_is_writeable((struct sock *)msk))
+ return false;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (sk_stream_is_writeable(subflow->tcp_sock))
+ return true;
+ }
+ return false;
+}
+
static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -609,8 +817,15 @@ static void mptcp_clean_una(struct sock *sk)
sk_mem_reclaim_partial(sk);
/* Only wake up writers if a subflow is ready */
- if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ if (mptcp_is_writeable(msk)) {
+ set_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags);
+ smp_mb__after_atomic();
+
+ /* set SEND_SPACE before sk_stream_write_space clears
+ * NOSPACE
+ */
sk_stream_write_space(sk);
+ }
}
}
@@ -801,60 +1016,128 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return ret;
}
-static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock)
+static void mptcp_nospace(struct mptcp_sock *msk)
{
+ struct mptcp_subflow_context *subflow;
+
clear_bit(MPTCP_SEND_SPACE, &msk->flags);
smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
- /* enables sk->write_space() callbacks */
- set_bit(SOCK_NOSPACE, &sock->flags);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct socket *sock = READ_ONCE(ssk->sk_socket);
+
+ /* enables ssk->write_space() callbacks */
+ if (sock)
+ set_bit(SOCK_NOSPACE, &sock->flags);
+ }
}
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
+ if (subflow->request_join && !subflow->fully_established)
+ return false;
+
+ /* only send if our side has not closed yet */
+ return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
+}
+
+#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
+ sizeof(struct tcphdr) - \
+ MAX_TCP_OPTION_SPACE - \
+ sizeof(struct ipv6hdr) - \
+ sizeof(struct frag_hdr))
+
+struct subflow_send_info {
+ struct sock *ssk;
+ u64 ratio;
+};
+
+static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
+ u32 *sndbuf)
+{
+ struct subflow_send_info send_info[2];
struct mptcp_subflow_context *subflow;
- struct sock *backup = NULL;
+ int i, nr_active = 0;
+ struct sock *ssk;
+ u64 ratio;
+ u32 pace;
- sock_owned_by_me((const struct sock *)msk);
+ sock_owned_by_me((struct sock *)msk);
+ *sndbuf = 0;
if (!mptcp_ext_cache_refill(msk))
return NULL;
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- if (!sk_stream_memory_free(ssk)) {
- struct socket *sock = ssk->sk_socket;
-
- if (sock)
- mptcp_nospace(msk, sock);
-
+ if (__mptcp_check_fallback(msk)) {
+ if (!msk->first)
return NULL;
- }
-
- if (subflow->backup) {
- if (!backup)
- backup = ssk;
-
- continue;
- }
-
- return ssk;
+ *sndbuf = msk->first->sk_sndbuf;
+ return sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
- return backup;
+ /* re-use last subflow, if the burst allow that */
+ if (msk->last_snd && msk->snd_burst > 0 &&
+ sk_stream_memory_free(msk->last_snd) &&
+ mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
+ }
+ return msk->last_snd;
+ }
+
+ /* pick the subflow with the lower wmem/wspace ratio */
+ for (i = 0; i < 2; ++i) {
+ send_info[i].ssk = NULL;
+ send_info[i].ratio = -1;
+ }
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (!mptcp_subflow_active(subflow))
+ continue;
+
+ nr_active += !subflow->backup;
+ *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
+ if (!sk_stream_memory_free(subflow->tcp_sock))
+ continue;
+
+ pace = READ_ONCE(ssk->sk_pacing_rate);
+ if (!pace)
+ continue;
+
+ ratio = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32,
+ pace);
+ if (ratio < send_info[subflow->backup].ratio) {
+ send_info[subflow->backup].ssk = ssk;
+ send_info[subflow->backup].ratio = ratio;
+ }
+ }
+
+ pr_debug("msk=%p nr_active=%d ssk=%p:%lld backup=%p:%lld",
+ msk, nr_active, send_info[0].ssk, send_info[0].ratio,
+ send_info[1].ssk, send_info[1].ratio);
+
+ /* pick the best backup if no other subflow is active */
+ if (!nr_active)
+ send_info[0].ssk = send_info[1].ssk;
+
+ if (send_info[0].ssk) {
+ msk->last_snd = send_info[0].ssk;
+ msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE,
+ sk_stream_wspace(msk->last_snd));
+ return msk->last_snd;
+ }
+ return NULL;
}
-static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
+static void ssk_check_wmem(struct mptcp_sock *msk)
{
- struct socket *sock;
-
- if (likely(sk_stream_is_writeable(ssk)))
- return;
-
- sock = READ_ONCE(ssk->sk_socket);
- if (sock)
- mptcp_nospace(msk, sock);
+ if (unlikely(!mptcp_is_writeable(msk)))
+ mptcp_nospace(msk);
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -864,6 +1147,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct page_frag *pfrag;
size_t copied = 0;
struct sock *ssk;
+ u32 sndbuf;
bool tx_ok;
long timeo;
@@ -890,7 +1174,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
__mptcp_flush_join_list(msk);
- ssk = mptcp_subflow_get_send(msk);
+ ssk = mptcp_subflow_get_send(msk, &sndbuf);
while (!sk_stream_memory_free(sk) ||
!ssk ||
!mptcp_page_frag_refill(ssk, pfrag)) {
@@ -907,19 +1191,25 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
mptcp_reset_timer(sk);
}
+ mptcp_nospace(msk);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
mptcp_clean_una(sk);
- ssk = mptcp_subflow_get_send(msk);
+ ssk = mptcp_subflow_get_send(msk, &sndbuf);
if (list_empty(&msk->conn_list)) {
ret = -ENOTCONN;
goto out;
}
}
+ /* do auto tuning */
+ if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
+ sndbuf > READ_ONCE(sk->sk_sndbuf))
+ WRITE_ONCE(sk->sk_sndbuf, sndbuf);
+
pr_debug("conn_list->subflow=%p", ssk);
lock_sock(ssk);
@@ -936,6 +1226,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
break;
}
+ /* burst can be negative, we will try move to the next subflow
+ * at selection time, if possible.
+ */
+ msk->snd_burst -= ret;
copied += ret;
tx_ok = msg_data_left(msg);
@@ -945,7 +1239,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!sk_stream_memory_free(ssk) ||
!mptcp_page_frag_refill(ssk, pfrag) ||
!mptcp_ext_cache_refill(msk)) {
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
tcp_push(ssk, msg->msg_flags, mss_now,
tcp_sk(ssk)->nonagle, size_goal);
mptcp_set_timeout(sk, ssk);
@@ -993,9 +1286,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
mptcp_reset_timer(sk);
}
- ssk_check_wmem(msk, ssk);
release_sock(ssk);
out:
+ ssk_check_wmem(msk);
release_sock(sk);
return copied ? : ret;
}
@@ -1133,10 +1426,14 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
*/
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk;
+ bool slow;
ssk = mptcp_subflow_tcp_sock(subflow);
+ slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
tcp_sk(ssk)->window_clamp = window_clamp;
+ tcp_cleanup_rbuf(ssk, 1);
+ unlock_sock_fast(ssk, slow);
}
}
}
@@ -1152,6 +1449,11 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
unsigned int moved = 0;
bool done;
+ /* avoid looping forever below on racing close */
+ if (((struct sock *)msk)->sk_state == TCP_CLOSE)
+ return false;
+
+ __mptcp_flush_join_list(msk);
do {
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
@@ -1163,7 +1465,11 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
release_sock(ssk);
} while (!done);
- return moved > 0;
+ if (mptcp_ofo_queue(msk) || moved > 0) {
+ mptcp_check_data_fin((struct sock *)msk);
+ return true;
+ }
+ return false;
}
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
@@ -1259,6 +1565,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
set_bit(MPTCP_DATA_READY, &msk->flags);
}
out_err:
+ pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
+ msk, test_bit(MPTCP_DATA_READY, &msk->flags),
+ skb_queue_empty(&sk->sk_receive_queue), copied);
mptcp_rcv_space_adjust(msk, copied);
release_sock(sk);
@@ -1309,9 +1618,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
sock_owned_by_me((const struct sock *)msk);
+ if (__mptcp_check_fallback(msk))
+ return msk->first;
+
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ if (!mptcp_subflow_active(subflow))
+ continue;
+
/* still data outstanding at TCP level? Don't retransmit. */
if (!tcp_write_queue_empty(ssk))
return NULL;
@@ -1472,6 +1787,7 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->rtx_queue);
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
INIT_WORK(&msk->work, mptcp_worker);
+ msk->out_of_order_queue = RB_ROOT;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
@@ -1505,7 +1821,7 @@ static int mptcp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2];
+ sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
return 0;
}
@@ -1811,6 +2127,7 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ skb_rbtree_purge(&msk->out_of_order_queue);
mptcp_token_destroy(msk);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
@@ -2286,13 +2603,13 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
state = inet_sk_state_load(sk);
+ pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN)
return mptcp_check_readable(msk);
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
- if (sk_stream_is_writeable(sk) &&
- test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
mask |= EPOLLOUT | EPOLLWRNORM;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 60b27d4..493bd2c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -140,6 +140,8 @@ struct mptcp_addr_info {
sa_family_t family;
__be16 port;
u8 id;
+ u8 flags;
+ int ifindex;
union {
struct in_addr addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -194,6 +196,8 @@ struct mptcp_sock {
u64 write_seq;
u64 ack_seq;
u64 rcv_data_fin_seq;
+ struct sock *last_snd;
+ int snd_burst;
atomic64_t snd_una;
unsigned long timer_ival;
u32 token;
@@ -204,6 +208,8 @@ struct mptcp_sock {
bool snd_data_fin_enable;
spinlock_t join_list_lock;
struct work_struct work;
+ struct sk_buff *ooo_last_skb;
+ struct rb_root out_of_order_queue;
struct list_head conn_list;
struct list_head rtx_queue;
struct list_head join_list;
@@ -268,6 +274,12 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
return (struct mptcp_subflow_request_sock *)rsk;
}
+enum mptcp_data_avail {
+ MPTCP_SUBFLOW_NODATA,
+ MPTCP_SUBFLOW_DATA_AVAIL,
+ MPTCP_SUBFLOW_OOO_DATA
+};
+
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
@@ -292,10 +304,10 @@ struct mptcp_subflow_context {
map_valid : 1,
mpc_map : 1,
backup : 1,
- data_avail : 1,
rx_eof : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */
+ enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -350,8 +362,7 @@ bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
/* called with sk socket lock held */
-int __mptcp_subflow_connect(struct sock *sk, int ifindex,
- const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
@@ -464,12 +475,12 @@ static inline bool before64(__u64 seq1, __u64 seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
-static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
+static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk)
{
return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
-static inline bool mptcp_check_fallback(struct sock *sk)
+static inline bool mptcp_check_fallback(const struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e8cac26..a2ae308 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -20,6 +20,7 @@
#include <net/ip6_route.h>
#endif
#include <net/mptcp.h>
+#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
@@ -434,6 +435,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
+ skb_rbtree_purge(&mptcp_sk(sk)->out_of_order_queue);
mptcp_token_destroy(mptcp_sk(sk));
inet_sock_destruct(sk);
}
@@ -804,16 +806,25 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_OK;
}
-static int subflow_read_actor(read_descriptor_t *desc,
- struct sk_buff *skb,
- unsigned int offset, size_t len)
+static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
+ unsigned int limit)
{
- size_t copy_len = min(desc->count, len);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
+ u32 incr;
- desc->count -= copy_len;
+ incr = limit >= skb->len ? skb->len + fin : limit;
- pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
- return copy_len;
+ pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
+ subflow->map_subflow_seq);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
+ tcp_sk(ssk)->copied_seq += incr;
+ if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
+ sk_eat_skb(ssk, skb);
+ if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
+ subflow->map_valid = 0;
+ if (incr)
+ tcp_cleanup_rbuf(ssk, incr);
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -825,13 +836,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
+ if (!skb_peek(&ssk->sk_receive_queue))
+ subflow->data_avail = 0;
if (subflow->data_avail)
return true;
msk = mptcp_sk(subflow->conn);
for (;;) {
- u32 map_remaining;
- size_t delta;
u64 ack_seq;
u64 old_ack;
@@ -849,6 +860,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
subflow->ssn_offset;
+ subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
return true;
}
@@ -876,42 +888,18 @@ static bool subflow_check_data_avail(struct sock *ssk)
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
ack_seq);
- if (ack_seq == old_ack)
+ if (ack_seq == old_ack) {
+ subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
break;
+ } else if (after64(ack_seq, old_ack)) {
+ subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
+ break;
+ }
/* only accept in-sequence mapping. Old values are spurious
- * retransmission; we can hit "future" values on active backup
- * subflow switch, we relay on retransmissions to get
- * in-sequence data.
- * Cuncurrent subflows support will require subflow data
- * reordering
+ * retransmission
*/
- map_remaining = subflow->map_data_len -
- mptcp_subflow_get_map_offset(subflow);
- if (before64(ack_seq, old_ack))
- delta = min_t(size_t, old_ack - ack_seq, map_remaining);
- else
- delta = min_t(size_t, ack_seq - old_ack, map_remaining);
-
- /* discard mapped data */
- pr_debug("discarding %zu bytes, current map len=%d", delta,
- map_remaining);
- if (delta) {
- read_descriptor_t desc = {
- .count = delta,
- };
- int ret;
-
- ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
- if (ret < 0) {
- ssk->sk_err = -ret;
- goto fatal;
- }
- if (ret < delta)
- return false;
- if (delta == map_remaining)
- subflow->map_valid = 0;
- }
+ mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
}
return true;
@@ -922,13 +910,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE);
tcp_send_active_reset(ssk, GFP_ATOMIC);
+ subflow->data_avail = 0;
return false;
}
bool mptcp_subflow_data_available(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sk_buff *skb;
/* check if current mapping is still valid */
if (subflow->map_valid &&
@@ -941,15 +929,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
subflow->map_data_len);
}
- if (!subflow_check_data_avail(sk)) {
- subflow->data_avail = 0;
- return false;
- }
-
- skb = skb_peek(&sk->sk_receive_queue);
- subflow->data_avail = skb &&
- before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
- return subflow->data_avail;
+ return subflow_check_data_avail(sk);
}
/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
@@ -996,8 +976,10 @@ static void subflow_write_space(struct sock *sk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
- sk_stream_write_space(sk);
- if (sk_stream_is_writeable(sk)) {
+ if (!sk_stream_is_writeable(sk))
+ return;
+
+ if (sk_stream_is_writeable(parent)) {
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
smp_mb__after_atomic();
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
@@ -1056,8 +1038,7 @@ static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
#endif
}
-int __mptcp_subflow_connect(struct sock *sk, int ifindex,
- const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1101,7 +1082,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
if (loc->family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = loc->ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
goto failed;
@@ -1112,7 +1093,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
subflow->remote_token = remote_token;
subflow->local_id = local_id;
subflow->request_join = 1;
- subflow->request_bkup = 1;
+ subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af6c93e..2d5d5fb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,12 +93,15 @@
/*
Assumptions:
- - if device has no dev->hard_header routine, it adds and removes ll header
- inside itself. In this case ll header is invisible outside of device,
- but higher levels still should reserve dev->hard_header_len.
- Some devices are enough clever to reallocate skb, when header
- will not fit to reserved space (tunnel), another ones are silly
- (PPP).
+ - If the device has no dev->header_ops, there is no LL header visible
+ above the device. In this case, its hard_header_len should be 0.
+ The device may prepend its own header internally. In this case, its
+ needed_headroom should be set to the space needed for it to add its
+ internal header.
+ For example, a WiFi driver pretending to be an Ethernet driver should
+ set its hard_header_len to be the Ethernet header length, and set its
+ needed_headroom to be (the real WiFi header length - the fake Ethernet
+ header length).
- packet socket receives packets with pulled ll header,
so that SOCK_RAW should push it back.
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index cd5a80b..19f7143 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -395,7 +395,7 @@ struct rxrpc_bundle {
unsigned int debug_id;
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
- unsigned short alloc_error; /* Error from last conn allocation */
+ short alloc_error; /* Error from last conn allocation */
spinlock_t channel_lock;
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 0e4e187..78c845a 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -433,7 +433,6 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
if (!rxrpc_may_reuse_conn(old)) {
if (old)
trace_rxrpc_client(old, -1, rxrpc_client_replace);
- candidate->bundle = rxrpc_get_bundle(bundle);
candidate->bundle_shift = shift;
bundle->conns[i] = candidate;
for (j = 0; j < RXRPC_MAXCALLS; j++)
@@ -724,8 +723,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
/* Paired with the write barrier in rxrpc_activate_one_channel(). */
smp_rmb();
-out:
+out_put_bundle:
rxrpc_put_bundle(bundle);
+out:
_leave(" = %d", ret);
return ret;
@@ -742,7 +742,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
rxrpc_disconnect_client_call(bundle, call);
- goto out;
+ goto out_put_bundle;
}
/*
@@ -1111,6 +1111,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
conn = list_entry(graveyard.next,
struct rxrpc_connection, cache_link);
list_del_init(&conn->cache_link);
+ rxrpc_unbundle_conn(conn);
rxrpc_put_connection(conn);
}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index aa254ae..00bb158 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,8 @@
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+ simult_flows.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 0000000..0d88225
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+capture=false
+ksft_skip=4
+timeout=30
+test_cnt=1
+ret=0
+bail=0
+
+usage() {
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-d: debug this script"
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ rm -f "$capout"
+
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3";do
+ ip netns del $netns
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+# "$ns1" ns2 ns3
+# ns1eth1 ns2eth1 ns2eth3 ns3eth1
+# netem
+# ns1eth2 ns2eth2
+# netem
+
+setup()
+{
+ large=$(mktemp)
+ small=$(mktemp)
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+ size=$((2048 * 4096))
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+ trap cleanup EXIT
+
+ for i in "$ns1" "$ns2" "$ns3";do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+ done
+
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+ ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+ ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+ ip -net "$ns1" link set ns1eth1 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.1.2
+ ip -net "$ns1" route add default via dead:beef:1::2
+
+ ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ ip -net "$ns1" link set ns1eth2 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.2.2 metric 101
+ ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+ ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+ ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
+
+ ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+ ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+ ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+ ip -net "$ns2" link set ns2eth3 up mtu 1500
+ ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+ ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+ ip -net "$ns3" link set ns3eth1 up mtu 1500
+ ip -net "$ns3" route add default via 10.0.3.2
+ ip -net "$ns3" route add default via dead:beef:3::2
+
+ ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+do_transfer()
+{
+ local cin=$1
+ local sin=$2
+ local max_time=$3
+ local port
+ port=$((10000+$test_cnt))
+ test_cnt=$((test_cnt+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ local addr_port
+ addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+
+ if $capture; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${rndh}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${ns3}" "${port}"
+
+ local start
+ start=$(date +%s%3N)
+ ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ local stop
+ stop=$(date +%s%3N)
+
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ local duration
+ duration=$((stop-start))
+
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+ printf "%16s" "$duration max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+ [ $duration -lt $max_time ]; then
+ echo "[ OK ]"
+ cat "$capout"
+ return 0
+ fi
+
+ echo " [ fail ]"
+ echo "client exit code $retc, server $rets" 1>&2
+ echo "\nnetns ${ns3} socket stat for $port:" 1>&2
+ ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+ echo "\nnetns ${ns1} socket stat for $port:" 1>&2
+ ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ ls -l $sin $cout
+ ls -l $cin $sout
+
+ cat "$capout"
+ return 1
+}
+
+run_test()
+{
+ local rate1=$1
+ local rate2=$2
+ local delay1=$3
+ local delay2=$4
+ local lret
+ local dev
+ shift 4
+ local msg=$*
+
+ [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+ for dev in ns1eth1 ns1eth2; do
+ tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ for dev in ns2eth1 ns2eth2; do
+ tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # time is measure in ms
+ local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+ # completion
+ time=$((time + 1350))
+
+ printf "%-50s" "$msg"
+ do_transfer $small $large $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+ printf "%-50s" "$msg - reverse direction"
+ do_transfer $large $small $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+}
+
+while getopts "bcdh" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "b")
+ bail=1
+ ;;
+ "c")
+ capture=true
+ ;;
+ "d")
+ set -x
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+# run_test 30 10 0 0 "unbalanced bwidth"
+# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+exit $ret