Skip to content

Commit

Permalink
aio: fix reqs_available handling
Browse files Browse the repository at this point in the history
As reported by Dan Aloni, commit f8567a3 ("aio: fix aio request
leak when events are reaped by userspace") introduces a regression when
user code attempts to perform io_submit() with more events than are
available in the ring buffer.  Reverting that commit would reintroduce a
regression when user space event reaping is used.

Fixing this bug is a bit more involved than the previous attempts to fix
this regression.  Since we do not have a single point at which we can
count events as being reaped by user space and io_getevents(), we have
to track event completion by looking at the number of events left in the
event ring.  So long as there are as many events in the ring buffer as
there have been completion events generate, we cannot call
put_reqs_available().  The code to check for this is now placed in
refill_reqs_available().

A test program from Dan and modified by me for verifying this bug is available
at http://www.kvack.org/~bcrl/20140824-aio_bug.c .

Reported-by: Dan Aloni <[email protected]>
Signed-off-by: Benjamin LaHaise <[email protected]>
Acked-by: Dan Aloni <[email protected]>
Cc: Kent Overstreet <[email protected]>
Cc: Mateusz Guzik <[email protected]>
Cc: Petr Matousek <[email protected]>
Cc: [email protected]      # v3.16 and anything that f8567a3 was backported to
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
bcrl authored and torvalds committed Aug 24, 2014
1 parent 451fd72 commit d856f32
Showing 1 changed file with 73 additions and 4 deletions.
77 changes: 73 additions & 4 deletions fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ struct kioctx {

struct {
unsigned tail;
unsigned completed_events;
spinlock_t completion_lock;
} ____cacheline_aligned_in_smp;

Expand Down Expand Up @@ -857,6 +858,68 @@ static bool get_reqs_available(struct kioctx *ctx)
return ret;
}

/* refill_reqs_available
* Updates the reqs_available reference counts used for tracking the
* number of free slots in the completion ring. This can be called
* from aio_complete() (to optimistically update reqs_available) or
* from aio_get_req() (the we're out of events case). It must be
* called holding ctx->completion_lock.
*/
static void refill_reqs_available(struct kioctx *ctx, unsigned head,
unsigned tail)
{
unsigned events_in_ring, completed;

/* Clamp head since userland can write to it. */
head %= ctx->nr_events;
if (head <= tail)
events_in_ring = tail - head;
else
events_in_ring = ctx->nr_events - (head - tail);

completed = ctx->completed_events;
if (events_in_ring < completed)
completed -= events_in_ring;
else
completed = 0;

if (!completed)
return;

ctx->completed_events -= completed;
put_reqs_available(ctx, completed);
}

/* user_refill_reqs_available
* Called to refill reqs_available when aio_get_req() encounters an
* out of space in the completion ring.
*/
static void user_refill_reqs_available(struct kioctx *ctx)
{
spin_lock_irq(&ctx->completion_lock);
if (ctx->completed_events) {
struct aio_ring *ring;
unsigned head;

/* Access of ring->head may race with aio_read_events_ring()
* here, but that's okay since whether we read the old version
* or the new version, and either will be valid. The important
* part is that head cannot pass tail since we prevent
* aio_complete() from updating tail by holding
* ctx->completion_lock. Even if head is invalid, the check
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
ring = kmap_atomic(ctx->ring_pages[0]);
head = ring->head;
kunmap_atomic(ring);

refill_reqs_available(ctx, head, ctx->tail);
}

spin_unlock_irq(&ctx->completion_lock);
}

/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
Expand All @@ -865,8 +928,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;

if (!get_reqs_available(ctx))
return NULL;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
if (!get_reqs_available(ctx))
return NULL;
}

req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
if (unlikely(!req))
Expand Down Expand Up @@ -925,8 +991,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
unsigned tail, pos, head;
unsigned long flags;
unsigned tail, pos;

/*
* Special case handling for sync iocbs:
Expand Down Expand Up @@ -987,10 +1053,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ctx->tail = tail;

ring = kmap_atomic(ctx->ring_pages[0]);
head = ring->head;
ring->tail = tail;
kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);

ctx->completed_events++;
if (ctx->completed_events > 1)
refill_reqs_available(ctx, head, tail);
spin_unlock_irqrestore(&ctx->completion_lock, flags);

pr_debug("added to ring %p at [%u]\n", iocb, tail);
Expand All @@ -1005,7 +1075,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)

/* everything turned out well, dispose of the aiocb. */
kiocb_free(iocb);
put_reqs_available(ctx, 1);

/*
* We have to order our ring_info tail store above and test
Expand Down

0 comments on commit d856f32

Please sign in to comment.