block: defer timeouts to a workqueue
Timer context is not very useful for drivers to perform any meaningful abort
action from. So instead of calling the driver from this useless context
defer it to a workqueue as soon as possible.
Note that while a delayed_work item would seem the right thing here I didn't
dare to use it due to the magic in blk_add_timer that pokes deep into timer
internals. But maybe this encourages Tejun to add a sensible API for that to
the workqueue API and we'll all be fine in the end :)
Contains a major update from Keith Bush:
"This patch removes synchronizing the timeout work so that the timer can
start a freeze on its own queue. The timer enters the queue, so timer
context can only start a freeze, but not wait for frozen."
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 93a4e19..9cb2894 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -615,15 +615,19 @@
}
}
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_timeout_work(struct work_struct *work)
{
- struct request_queue *q = (struct request_queue *)priv;
+ struct request_queue *q =
+ container_of(work, struct request_queue, timeout_work);
struct blk_mq_timeout_data data = {
.next = 0,
.next_set = 0,
};
int i;
+ if (blk_queue_enter(q, true))
+ return;
+
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
if (data.next_set) {
@@ -638,6 +642,7 @@
blk_mq_tag_idle(hctx);
}
}
+ blk_queue_exit(q);
}
/*
@@ -2015,7 +2020,7 @@
hctxs[i]->queue_num = i;
}
- setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+ INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
q->nr_queues = nr_cpu_ids;