Skip to content

Commit

Permalink
io_uring: skip spinlocking for ->task_complete
Browse files Browse the repository at this point in the history
->task_complete was added to serialised CQE posting by doing it from
the task context only (or fallback wq when the task is dead), and now we
can use that to avoid taking ->completion_lock while filling CQ entries.
The patch skips spinlocking only in two spots,
__io_submit_flush_completions() and flushing in io_aux_cqe, it's safer
and covers all cases we care about. Extra care is taken to force taking
the lock while queueing overflow entries.

It fundamentally relies on SINGLE_ISSUER to have only one task posting
events. It also need to take into account overflowed CQEs, flushing of
which happens in the cq wait path, and so this implementation also needs
DEFER_TASKRUN to limit waiters. For the same reason we disable it for
SQPOLL, and for IOPOLL as it won't benefit from it in any case.
DEFER_TASKRUN, SQPOLL and IOPOLL requirement may be relaxed in the
future.

Signed-off-by: Pavel Begunkov <[email protected]>
Link: https://lore.kernel.org/r/2a8c91fd82cfcdcc1d2e5bac7051fe2c183bda73.1670384893.git.asml.silence@gmail.com
[axboe: modify to apply]
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
isilence authored and axboe committed Dec 7, 2022
1 parent 6d043ee commit f66f734
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 20 deletions.
71 changes: 52 additions & 19 deletions io_uring/io_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,21 +584,36 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
io_eventfd_flush_signal(ctx);
}

static inline void __io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
if (!ctx->task_complete)
spin_lock(&ctx->completion_lock);
}

static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
{
if (!ctx->task_complete)
spin_unlock(&ctx->completion_lock);
}

/* keep it inlined for io_submit_flush_completions() */
static inline void io_cq_unlock_post_inline(struct io_ring_ctx *ctx)
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock)
{
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);

__io_cq_unlock(ctx);
io_commit_cqring_flush(ctx);
io_cqring_wake(ctx);
}

void io_cq_unlock_post(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock)
{
io_cq_unlock_post_inline(ctx);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_commit_cqring_flush(ctx);
io_cqring_wake(ctx);
}

/* Returns true if there are no backlogged entries after the flush */
Expand Down Expand Up @@ -785,12 +800,13 @@ struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow)
return &rings->cqes[off];
}

static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
bool allow_overflow)
static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
u32 cflags)
{
struct io_uring_cqe *cqe;

lockdep_assert_held(&ctx->completion_lock);
if (!ctx->task_complete)
lockdep_assert_held(&ctx->completion_lock);

ctx->cq_extra++;

Expand All @@ -813,10 +829,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32
}
return true;
}

if (allow_overflow)
return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);

return false;
}

Expand All @@ -830,7 +842,17 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
for (i = 0; i < state->cqes_count; i++) {
struct io_uring_cqe *cqe = &state->cqes[i];

io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags, true);
if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
if (ctx->task_complete) {
spin_lock(&ctx->completion_lock);
io_cqring_event_overflow(ctx, cqe->user_data,
cqe->res, cqe->flags, 0, 0);
spin_unlock(&ctx->completion_lock);
} else {
io_cqring_event_overflow(ctx, cqe->user_data,
cqe->res, cqe->flags, 0, 0);
}
}
}
state->cqes_count = 0;
}
Expand All @@ -841,7 +863,10 @@ static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u
bool filled;

io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags, allow_overflow);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
if (!filled && allow_overflow)
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);

io_cq_unlock_post(ctx);
return filled;
}
Expand All @@ -865,10 +890,10 @@ bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32
lockdep_assert_held(&ctx->uring_lock);

if (ctx->submit_state.cqes_count == length) {
io_cq_lock(ctx);
__io_cq_lock(ctx);
__io_flush_post_cqes(ctx);
/* no need to flush - flush is deferred */
io_cq_unlock(ctx);
__io_cq_unlock_post(ctx);
}

/* For defered completions this is not as strict as it is otherwise,
Expand Down Expand Up @@ -1403,18 +1428,26 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_wq_work_node *node, *prev;
struct io_submit_state *state = &ctx->submit_state;

io_cq_lock(ctx);
__io_cq_lock(ctx);
/* must come first to preserve CQE ordering in failure cases */
if (state->cqes_count)
__io_flush_post_cqes(ctx);
wq_list_for_each(node, prev, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);

if (!(req->flags & REQ_F_CQE_SKIP))
__io_fill_cqe_req(ctx, req);
if (!(req->flags & REQ_F_CQE_SKIP) &&
unlikely(!__io_fill_cqe_req(ctx, req))) {
if (ctx->task_complete) {
spin_lock(&ctx->completion_lock);
io_req_cqe_overflow(req);
spin_unlock(&ctx->completion_lock);
} else {
io_req_cqe_overflow(req);
}
}
}
io_cq_unlock_post_inline(ctx);
__io_cq_unlock_post(ctx);

if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
io_free_batch_list(ctx, state->compl_reqs.first);
Expand Down
10 changes: 9 additions & 1 deletion io_uring/io_uring.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
*/
cqe = io_get_cqe(ctx);
if (unlikely(!cqe))
return io_req_cqe_overflow(req);
return false;

trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
req->cqe.res, req->cqe.flags,
Expand All @@ -156,6 +156,14 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
return true;
}

static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
if (likely(__io_fill_cqe_req(ctx, req)))
return true;
return io_req_cqe_overflow(req);
}

static inline void req_set_fail(struct io_kiocb *req)
{
req->flags |= REQ_F_FAIL;
Expand Down

0 comments on commit f66f734

Please sign in to comment.