io_uring: only wake up sq thread while current task is in io worker context
If IORING_SETUP_SQPOLL is enabled, sqes are either handled in sq thread
task context or in io worker task context. If current task context is sq
thread, we don't need to check whether should wake up sq thread.
io_iopoll_req_issued() calls wq_has_sleeper(), which has smp_mb() memory
barrier, before this patch, perf shows obvious overhead:
Samples: 481K of event 'cycles', Event count (approx.): 299807382878
Overhead Comma Shared Object Symbol
3.69% :9630 [kernel.vmlinux] [k] io_issue_sqe
With this patch, perf shows:
Samples: 482K of event 'cycles', Event count (approx.): 299929547283
Overhead Comma Shared Object Symbol
0.70% :4015 [kernel.vmlinux] [k] io_issue_sqe
It shows some obvious improvements.
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 67bf904..3617bde 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2712,7 +2712,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
* find it from a io_iopoll_getevents() thread before the issuer is done
* accessing the kiocb cookie.
*/
-static void io_iopoll_req_issued(struct io_kiocb *req)
+static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -2741,7 +2741,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
else
list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
- if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+ /*
+ * If IORING_SETUP_SQPOLL is enabled, sqes are either handled in sq thread
+ * task context or in io worker task context. If current task context is
+ * sq thread, we don't need to check whether should wake up sq thread.
+ */
+ if (in_async && (ctx->flags & IORING_SETUP_SQPOLL) &&
wq_has_sleeper(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
}
@@ -6241,7 +6246,7 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
if (in_async)
mutex_lock(&ctx->uring_lock);
- io_iopoll_req_issued(req);
+ io_iopoll_req_issued(req, in_async);
if (in_async)
mutex_unlock(&ctx->uring_lock);