io_uring: pass in completion state to appropriate issue side handlers
Provide the completion state to the handlers that we know can complete
inline, so they can utilize this for batching completions.
Cap the max batch count at 32. This should be enough to provide a good
amortization of the cost of the lock+commit dance for completions, while
still being low enough not to cause any real latency issues for SQPOLL
applications.
Xuan Zhuo <xuanzhuo@linux.alibaba.com> reports that this changes his
profile from:
17.97% [kernel] [k] copy_user_generic_unrolled
13.92% [kernel] [k] io_commit_cqring
11.04% [kernel] [k] __io_cqring_fill_event
10.33% [kernel] [k] udp_recvmsg
5.94% [kernel] [k] skb_release_data
4.31% [kernel] [k] udp_rmem_release
2.68% [kernel] [k] __check_object_size
2.24% [kernel] [k] __slab_free
2.22% [kernel] [k] _raw_spin_lock_bh
2.21% [kernel] [k] kmem_cache_free
2.13% [kernel] [k] free_pcppages_bulk
1.83% [kernel] [k] io_submit_sqes
1.38% [kernel] [k] page_frag_free
1.31% [kernel] [k] inet_recvmsg
to
19.99% [kernel] [k] copy_user_generic_unrolled
11.63% [kernel] [k] skb_release_data
9.36% [kernel] [k] udp_rmem_release
8.64% [kernel] [k] udp_recvmsg
6.21% [kernel] [k] __slab_free
4.39% [kernel] [k] __check_object_size
3.64% [kernel] [k] free_pcppages_bulk
2.41% [kernel] [k] kmem_cache_free
2.00% [kernel] [k] io_submit_sqes
1.95% [kernel] [k] page_frag_free
1.54% [kernel] [k] io_put_req
[...]
0.07% [kernel] [k] io_commit_cqring
0.44% [kernel] [k] __io_cqring_fill_event
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 46241c1..6c9ca4f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1360,15 +1360,50 @@ static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
io_cqring_ev_posted(ctx);
}
-static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags)
+static void io_submit_flush_completions(struct io_comp_state *cs)
{
- io_cqring_add_event(req, res, cflags);
- io_put_req(req);
+ struct io_ring_ctx *ctx = cs->ctx;
+
+ spin_lock_irq(&ctx->completion_lock);
+ while (!list_empty(&cs->list)) {
+ struct io_kiocb *req;
+
+ req = list_first_entry(&cs->list, struct io_kiocb, list);
+ list_del(&req->list);
+ io_cqring_fill_event(req, req->result);
+ if (!(req->flags & REQ_F_LINK_HEAD)) {
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
+ } else {
+ spin_unlock_irq(&ctx->completion_lock);
+ io_put_req(req);
+ spin_lock_irq(&ctx->completion_lock);
+ }
+ }
+ io_commit_cqring(ctx);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ io_cqring_ev_posted(ctx);
+ cs->nr = 0;
+}
+
+static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
+ struct io_comp_state *cs)
+{
+ if (!cs) {
+ io_cqring_add_event(req, res, cflags);
+ io_put_req(req);
+ } else {
+ req->result = res;
+ list_add_tail(&req->list, &cs->list);
+ if (++cs->nr >= 32)
+ io_submit_flush_completions(cs);
+ }
}
static void io_req_complete(struct io_kiocb *req, long res)
{
- __io_req_complete(req, res, 0);
+ __io_req_complete(req, res, 0, NULL);
}
static inline bool io_is_fallback_req(struct io_kiocb *req)
@@ -3179,14 +3214,14 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
-static int io_nop(struct io_kiocb *req)
+static int io_nop(struct io_kiocb *req, struct io_comp_state *cs)
{
struct io_ring_ctx *ctx = req->ctx;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- io_req_complete(req, 0);
+ __io_req_complete(req, 0, 0, cs);
return 0;
}
@@ -3408,7 +3443,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
return i;
}
-static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_provide_buf *p = &req->pbuf;
struct io_ring_ctx *ctx = req->ctx;
@@ -3427,7 +3463,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
io_ring_submit_lock(ctx, !force_nonblock);
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -3485,7 +3521,8 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
return i ? i : -ENOMEM;
}
-static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_provide_buf *p = &req->pbuf;
struct io_ring_ctx *ctx = req->ctx;
@@ -3514,7 +3551,7 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
io_ring_submit_unlock(ctx, !force_nonblock);
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -3545,7 +3582,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#endif
}
-static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
+static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
#if defined(CONFIG_EPOLL)
struct io_epoll *ie = &req->epoll;
@@ -3557,7 +3595,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
#else
return -EOPNOTSUPP;
@@ -3702,7 +3740,8 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int io_close(struct io_kiocb *req, bool force_nonblock)
+static int io_close(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_close *close = &req->close;
int ret;
@@ -3729,7 +3768,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
req_set_fail_links(req);
fput(close->put_file);
close->put_file = NULL;
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -3815,7 +3854,8 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
-static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
@@ -3864,11 +3904,12 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
-static int io_send(struct io_kiocb *req, bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct socket *sock;
int ret;
@@ -3906,7 +3947,7 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -4049,7 +4090,8 @@ static int io_recvmsg_prep(struct io_kiocb *req,
return ret;
}
-static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
@@ -4105,11 +4147,12 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret < 0)
req_set_fail_links(req);
- __io_req_complete(req, ret, cflags);
+ __io_req_complete(req, ret, cflags, cs);
return 0;
}
-static int io_recv(struct io_kiocb *req, bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_buffer *kbuf = NULL;
struct socket *sock;
@@ -4161,7 +4204,7 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
req->flags &= ~REQ_F_NEED_CLEANUP;
if (ret < 0)
req_set_fail_links(req);
- __io_req_complete(req, ret, cflags);
+ __io_req_complete(req, ret, cflags, cs);
return 0;
}
@@ -4181,7 +4224,8 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_accept *accept = &req->accept;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -4200,7 +4244,7 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
ret = -EINTR;
req_set_fail_links(req);
}
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -4224,7 +4268,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
&io->connect.address);
}
-static int io_connect(struct io_kiocb *req, bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_async_ctx __io, *io;
unsigned file_flags;
@@ -4260,7 +4305,7 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock)
out:
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
#else /* !CONFIG_NET */
@@ -5141,7 +5186,8 @@ static int io_files_update_prep(struct io_kiocb *req,
return 0;
}
-static int io_files_update(struct io_kiocb *req, bool force_nonblock)
+static int io_files_update(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_uring_files_update up;
@@ -5159,7 +5205,7 @@ static int io_files_update(struct io_kiocb *req, bool force_nonblock)
if (ret < 0)
req_set_fail_links(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, ret, 0, cs);
return 0;
}
@@ -5360,7 +5406,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
switch (req->opcode) {
case IORING_OP_NOP:
- ret = io_nop(req);
+ ret = io_nop(req, cs);
break;
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
@@ -5422,9 +5468,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_SENDMSG)
- ret = io_sendmsg(req, force_nonblock);
+ ret = io_sendmsg(req, force_nonblock, cs);
else
- ret = io_send(req, force_nonblock);
+ ret = io_send(req, force_nonblock, cs);
break;
case IORING_OP_RECVMSG:
case IORING_OP_RECV:
@@ -5434,9 +5480,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
if (req->opcode == IORING_OP_RECVMSG)
- ret = io_recvmsg(req, force_nonblock);
+ ret = io_recvmsg(req, force_nonblock, cs);
else
- ret = io_recv(req, force_nonblock);
+ ret = io_recv(req, force_nonblock, cs);
break;
case IORING_OP_TIMEOUT:
if (sqe) {
@@ -5460,7 +5506,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_accept(req, force_nonblock);
+ ret = io_accept(req, force_nonblock, cs);
break;
case IORING_OP_CONNECT:
if (sqe) {
@@ -5468,7 +5514,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_connect(req, force_nonblock);
+ ret = io_connect(req, force_nonblock, cs);
break;
case IORING_OP_ASYNC_CANCEL:
if (sqe) {
@@ -5500,7 +5546,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_close(req, force_nonblock);
+ ret = io_close(req, force_nonblock, cs);
break;
case IORING_OP_FILES_UPDATE:
if (sqe) {
@@ -5508,7 +5554,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_files_update(req, force_nonblock);
+ ret = io_files_update(req, force_nonblock, cs);
break;
case IORING_OP_STATX:
if (sqe) {
@@ -5548,7 +5594,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_epoll_ctl(req, force_nonblock);
+ ret = io_epoll_ctl(req, force_nonblock, cs);
break;
case IORING_OP_SPLICE:
if (sqe) {
@@ -5564,7 +5610,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_provide_buffers(req, force_nonblock);
+ ret = io_provide_buffers(req, force_nonblock, cs);
break;
case IORING_OP_REMOVE_BUFFERS:
if (sqe) {
@@ -5572,7 +5618,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret)
break;
}
- ret = io_remove_buffers(req, force_nonblock);
+ ret = io_remove_buffers(req, force_nonblock, cs);
break;
case IORING_OP_TEE:
if (sqe) {
@@ -6006,33 +6052,6 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
-static void io_submit_flush_completions(struct io_comp_state *cs)
-{
- struct io_ring_ctx *ctx = cs->ctx;
-
- spin_lock_irq(&ctx->completion_lock);
- while (!list_empty(&cs->list)) {
- struct io_kiocb *req;
-
- req = list_first_entry(&cs->list, struct io_kiocb, list);
- list_del(&req->list);
- io_cqring_fill_event(req, req->result);
- if (!(req->flags & REQ_F_LINK_HEAD)) {
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- } else {
- spin_unlock_irq(&ctx->completion_lock);
- io_put_req(req);
- spin_lock_irq(&ctx->completion_lock);
- }
- }
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
-
- io_cqring_ev_posted(ctx);
- cs->nr = 0;
-}
-
/*
* Batched submission is done, ensure local IO is flushed out.
*/