Merge branch 'xsa' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Merge xen fixes from Juergen Gross:
"Fixes for two issues related to Xen and malicious guests:
- Guest can force the netback driver to hog large amounts of memory
- Denial of Service in other guests due to event storms"
* 'xsa' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/netback: don't queue unlimited number of packages
xen/netback: fix rx queue stall detection
xen/console: harden hvc_xen against event channel storms
xen/netfront: harden netfront against event channel storms
xen/blkfront: harden blkfront against event channel storms
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8e3983e4..286cf1a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long flags;
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
struct blkfront_info *info = rinfo->dev_info;
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
return IRQ_HANDLED;
+ }
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
@@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long id;
unsigned int op;
+ eoiflag = 0;
+
RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
id = bret.id;
@@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
err:
@@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ /* No EOI in order to avoid further interrupts. */
+
pr_alert("%s disabled for further use\n", info->gd->disk_name);
return IRQ_HANDLED;
}
@@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev,
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
- "blkif", rinfo);
+ err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
+ 0, "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 4a16d6e..d9dea48 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
unsigned int rx_queue_max;
unsigned int rx_queue_len;
unsigned long last_rx_time;
+ unsigned int rx_slots_needed;
bool stalled;
struct xenvif_copy_state rx_copy;
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index accc991..dbac4c0 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -33,28 +33,36 @@
#include <xen/xen.h>
#include <xen/events.h>
+/*
+ * Update the needed ring page slots for the first SKB queued.
+ * Note that any call sequence outside the RX thread calling this function
+ * needs to wake up the RX thread via a call of xenvif_kick_thread()
+ * afterwards in order to avoid a race with putting the thread to sleep.
+ */
+static void xenvif_update_needed_slots(struct xenvif_queue *queue,
+ const struct sk_buff *skb)
+{
+ unsigned int needed = 0;
+
+ if (skb) {
+ needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
+ if (skb_is_gso(skb))
+ needed++;
+ if (skb->sw_hash)
+ needed++;
+ }
+
+ WRITE_ONCE(queue->rx_slots_needed, needed);
+}
+
static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
{
RING_IDX prod, cons;
- struct sk_buff *skb;
- int needed;
- unsigned long flags;
+ unsigned int needed;
- spin_lock_irqsave(&queue->rx_queue.lock, flags);
-
- skb = skb_peek(&queue->rx_queue);
- if (!skb) {
- spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+ needed = READ_ONCE(queue->rx_slots_needed);
+ if (!needed)
return false;
- }
-
- needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
- if (skb_is_gso(skb))
- needed++;
- if (skb->sw_hash)
- needed++;
-
- spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
do {
prod = queue->rx.sring->req_prod;
@@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
spin_lock_irqsave(&queue->rx_queue.lock, flags);
- __skb_queue_tail(&queue->rx_queue, skb);
-
- queue->rx_queue_len += skb->len;
- if (queue->rx_queue_len > queue->rx_queue_max) {
+ if (queue->rx_queue_len >= queue->rx_queue_max) {
struct net_device *dev = queue->vif->dev;
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+ kfree_skb(skb);
+ queue->vif->dev->stats.rx_dropped++;
+ } else {
+ if (skb_queue_empty(&queue->rx_queue))
+ xenvif_update_needed_slots(queue, skb);
+
+ __skb_queue_tail(&queue->rx_queue, skb);
+
+ queue->rx_queue_len += skb->len;
}
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
@@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
skb = __skb_dequeue(&queue->rx_queue);
if (skb) {
+ xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue));
+
queue->rx_queue_len -= skb->len;
if (queue->rx_queue_len < queue->rx_queue_max) {
struct netdev_queue *txq;
@@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
break;
xenvif_rx_dequeue(queue);
kfree_skb(skb);
+ queue->vif->dev->stats.rx_dropped++;
}
}
@@ -487,27 +504,31 @@ void xenvif_rx_action(struct xenvif_queue *queue)
xenvif_rx_copy_flush(queue);
}
-static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue)
{
RING_IDX prod, cons;
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
+ return prod - cons;
+}
+
+static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue)
+{
+ unsigned int needed = READ_ONCE(queue->rx_slots_needed);
+
return !queue->stalled &&
- prod - cons < 1 &&
+ xenvif_rx_queue_slots(queue) < needed &&
time_after(jiffies,
queue->last_rx_time + queue->vif->stall_timeout);
}
static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
{
- RING_IDX prod, cons;
+ unsigned int needed = READ_ONCE(queue->rx_slots_needed);
- prod = queue->rx.sring->req_prod;
- cons = queue->rx.req_cons;
-
- return queue->stalled && prod - cons >= 1;
+ return queue->stalled && xenvif_rx_queue_slots(queue) >= needed;
}
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 911f439..d514d96 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -148,6 +148,9 @@ struct netfront_queue {
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+ unsigned int rx_rsp_unconsumed;
+ spinlock_t rx_cons_lock;
+
struct page_pool *page_pool;
struct xdp_rxq_info xdp_rxq;
};
@@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev)
return 0;
}
-static void xennet_tx_buf_gc(struct netfront_queue *queue)
+static bool xennet_tx_buf_gc(struct netfront_queue *queue)
{
RING_IDX cons, prod;
unsigned short id;
struct sk_buff *skb;
bool more_to_do;
+ bool work_done = false;
const struct device *dev = &queue->info->netdev->dev;
BUG_ON(!netif_carrier_ok(queue->info->netdev));
@@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
struct xen_netif_tx_response txrsp;
+ work_done = true;
+
RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
if (txrsp.status == XEN_NETIF_RSP_NULL)
continue;
@@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
xennet_maybe_wake_tx(queue);
- return;
+ return work_done;
err:
queue->info->broken = true;
dev_alert(dev, "Disabled for further use\n");
+
+ return work_done;
}
struct xennet_gnttab_make_txreq {
@@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev)
return 0;
}
+static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_cons_lock, flags);
+ queue->rx.rsp_cons = val;
+ queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+}
+
static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
grant_ref_t ref)
{
@@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
xennet_move_rx_slot(queue, skb, ref);
} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
- queue->rx.rsp_cons = cons;
+ xennet_set_rx_rsp_cons(queue, cons);
return err;
}
@@ -1039,7 +1057,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
}
if (unlikely(err))
- queue->rx.rsp_cons = cons + slots;
+ xennet_set_rx_rsp_cons(queue, cons + slots);
return err;
}
@@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue,
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
- queue->rx.rsp_cons = ++cons + skb_queue_len(list);
+ xennet_set_rx_rsp_cons(queue,
+ ++cons + skb_queue_len(list));
kfree_skb(nskb);
return -ENOENT;
}
@@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
kfree_skb(nskb);
}
- queue->rx.rsp_cons = cons;
+ xennet_set_rx_rsp_cons(queue, cons);
return 0;
}
@@ -1229,7 +1248,9 @@ static int xennet_poll(struct napi_struct *napi, int budget)
if (unlikely(xennet_set_skb_gso(skb, gso))) {
__skb_queue_head(&tmpq, skb);
- queue->rx.rsp_cons += skb_queue_len(&tmpq);
+ xennet_set_rx_rsp_cons(queue,
+ queue->rx.rsp_cons +
+ skb_queue_len(&tmpq));
goto err;
}
}
@@ -1253,7 +1274,8 @@ static int xennet_poll(struct napi_struct *napi, int budget)
__skb_queue_tail(&rxq, skb);
- i = ++queue->rx.rsp_cons;
+ i = queue->rx.rsp_cons + 1;
+ xennet_set_rx_rsp_cons(queue, i);
work_done++;
}
if (need_xdp_flush)
@@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev,
return 0;
}
-static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
+static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
{
- struct netfront_queue *queue = dev_id;
unsigned long flags;
- if (queue->info->broken)
- return IRQ_HANDLED;
+ if (unlikely(queue->info->broken))
+ return false;
spin_lock_irqsave(&queue->tx_lock, flags);
- xennet_tx_buf_gc(queue);
+ if (xennet_tx_buf_gc(queue))
+ *eoi = 0;
spin_unlock_irqrestore(&queue->tx_lock, flags);
+ return true;
+}
+
+static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
+{
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+ if (likely(xennet_handle_tx(dev_id, &eoiflag)))
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
}
+static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
+{
+ unsigned int work_queued;
+ unsigned long flags;
+
+ if (unlikely(queue->info->broken))
+ return false;
+
+ spin_lock_irqsave(&queue->rx_cons_lock, flags);
+ work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
+ if (work_queued > queue->rx_rsp_unconsumed) {
+ queue->rx_rsp_unconsumed = work_queued;
+ *eoi = 0;
+ } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
+ const struct device *dev = &queue->info->netdev->dev;
+
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+ dev_alert(dev, "RX producer index going backwards\n");
+ dev_alert(dev, "Disabled for further use\n");
+ queue->info->broken = true;
+ return false;
+ }
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+
+ if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
+ napi_schedule(&queue->napi);
+
+ return true;
+}
+
static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
{
- struct netfront_queue *queue = dev_id;
- struct net_device *dev = queue->info->netdev;
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
- if (queue->info->broken)
- return IRQ_HANDLED;
-
- if (likely(netif_carrier_ok(dev) &&
- RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
- napi_schedule(&queue->napi);
+ if (likely(xennet_handle_rx(dev_id, &eoiflag)))
+ xen_irq_lateeoi(irq, eoiflag);
return IRQ_HANDLED;
}
static irqreturn_t xennet_interrupt(int irq, void *dev_id)
{
- xennet_tx_interrupt(irq, dev_id);
- xennet_rx_interrupt(irq, dev_id);
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+ if (xennet_handle_tx(dev_id, &eoiflag) &&
+ xennet_handle_rx(dev_id, &eoiflag))
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
}
@@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue)
if (err < 0)
goto fail;
- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
- xennet_interrupt,
- 0, queue->info->netdev->name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
+ xennet_interrupt, 0,
+ queue->info->netdev->name,
+ queue);
if (err < 0)
goto bind_fail;
queue->rx_evtchn = queue->tx_evtchn;
@@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue)
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
- xennet_tx_interrupt,
- 0, queue->tx_irq_name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
+ xennet_tx_interrupt, 0,
+ queue->tx_irq_name, queue);
if (err < 0)
goto bind_tx_fail;
queue->tx_irq = err;
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
- err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
- xennet_rx_interrupt,
- 0, queue->rx_irq_name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
+ xennet_rx_interrupt, 0,
+ queue->rx_irq_name, queue);
if (err < 0)
goto bind_rx_fail;
queue->rx_irq = err;
@@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
spin_lock_init(&queue->tx_lock);
spin_lock_init(&queue->rx_lock);
+ spin_lock_init(&queue->rx_cons_lock);
timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 71e0dd2..ebaf750 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -37,6 +37,8 @@ struct xencons_info {
struct xenbus_device *xbdev;
struct xencons_interface *intf;
unsigned int evtchn;
+ XENCONS_RING_IDX out_cons;
+ unsigned int out_cons_same;
struct hvc_struct *hvc;
int irq;
int vtermno;
@@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
XENCONS_RING_IDX cons, prod;
int recv = 0;
struct xencons_info *xencons = vtermno_to_xencons(vtermno);
+ unsigned int eoiflag = 0;
+
if (xencons == NULL)
return -EINVAL;
intf = xencons->intf;
@@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
mb(); /* read ring before consuming */
intf->in_cons = cons;
- notify_daemon(xencons);
+ /*
+ * When to mark interrupt having been spurious:
+ * - there was no new data to be read, and
+ * - the backend did not consume some output bytes, and
+ * - the previous round with no read data didn't see consumed bytes
+ * (we might have a race with an interrupt being in flight while
+ * updating xencons->out_cons, so account for that by allowing one
+ * round without any visible reason)
+ */
+ if (intf->out_cons != xencons->out_cons) {
+ xencons->out_cons = intf->out_cons;
+ xencons->out_cons_same = 0;
+ }
+ if (recv) {
+ notify_daemon(xencons);
+ } else if (xencons->out_cons_same++ > 1) {
+ eoiflag = XEN_EOI_FLAG_SPURIOUS;
+ }
+
+ xen_irq_lateeoi(xencons->irq, eoiflag);
+
return recv;
}
@@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev,
if (ret)
return ret;
info->evtchn = evtchn;
- irq = bind_evtchn_to_irq(evtchn);
+ irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
if (irq < 0)
return irq;
info->irq = irq;
@@ -551,7 +575,7 @@ static int __init xen_hvc_init(void)
return r;
info = vtermno_to_xencons(HVC_COOKIE);
- info->irq = bind_evtchn_to_irq(info->evtchn);
+ info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn);
}
if (info->irq < 0)
info->irq = 0; /* NO_IRQ */
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index a78704a..46d9295 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1251,6 +1251,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
+
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
diff --git a/include/xen/events.h b/include/xen/events.h
index c204262..344081e 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -17,6 +17,7 @@ struct xenbus_device;
unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(evtchn_port_t evtchn);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,