io_uring: don't burn CPU for iopoll on exit
First of all don't spin in io_ring_ctx_wait_and_kill() on iopoll.
Requests won't complete faster because of that, but only lengthen
io_uring_release().
The same goes for offloaded cleanup in io_ring_exit_work() -- it
already has waiting loop, don't do blocking active spinning.
For that, pass min=0 into io_iopoll_[try_]reap_events(), so it won't
actively spin. Leave the function if io_do_iopoll() there can't
complete a request to sleep in io_ring_exit_work().
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 38bf423..4c9a494 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2055,7 +2055,7 @@ static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
* We can't just wait for polled events to come to us, we have to actively
* find and complete them.
*/
-static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
+static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
{
if (!(ctx->flags & IORING_SETUP_IOPOLL))
return;
@@ -2064,8 +2064,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
while (!list_empty(&ctx->poll_list)) {
unsigned int nr_events = 0;
- io_do_iopoll(ctx, &nr_events, 1);
+ io_do_iopoll(ctx, &nr_events, 0);
+ /* let it sleep and repeat later if can't complete a request */
+ if (nr_events == 0)
+ break;
/*
* Ensure we allow local-to-the-cpu processing to take place,
* in this case we need to ensure that we reap all events.
@@ -7648,7 +7651,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
ctx->sqo_mm = NULL;
}
- io_iopoll_reap_events(ctx);
io_sqe_buffer_unregister(ctx);
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
@@ -7715,11 +7717,8 @@ static int io_remove_personalities(int id, void *p, void *data)
static void io_ring_exit_work(struct work_struct *work)
{
- struct io_ring_ctx *ctx;
-
- ctx = container_of(work, struct io_ring_ctx, exit_work);
- if (ctx->rings)
- io_cqring_overflow_flush(ctx, true);
+ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
+ exit_work);
/*
* If we're doing polled IO and end up having requests being
@@ -7727,11 +7726,11 @@ static void io_ring_exit_work(struct work_struct *work)
* we're waiting for refs to drop. We need to reap these manually,
* as nobody else will be looking for them.
*/
- while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
- io_iopoll_reap_events(ctx);
+ do {
if (ctx->rings)
io_cqring_overflow_flush(ctx, true);
- }
+ io_iopoll_try_reap_events(ctx);
+ } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
io_ring_ctx_free(ctx);
}
@@ -7747,10 +7746,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
if (ctx->io_wq)
io_wq_cancel_all(ctx->io_wq);
- io_iopoll_reap_events(ctx);
/* if we failed setting up the ctx, we might not have any rings */
if (ctx->rings)
io_cqring_overflow_flush(ctx, true);
+ io_iopoll_try_reap_events(ctx);
idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
queue_work(system_wq, &ctx->exit_work);