Skip to content

Commit

Permalink
fs: split generic and aio kiocb
Browse files Browse the repository at this point in the history
Most callers in the kernel want to perform synchronous file I/O, but
still have to bloat the stack with a full struct kiocb.  Split out
the parts needed in filesystem code from those in the aio code, and
only allocate those needed to pass down argument on the stack.  The
aio code embedds the generic iocb in the one it allocates and can
easily get back to it by using container_of.

Also add a ->ki_complete method to struct kiocb, this is used to call
into the aio code and thus removes the dependency on aio for filesystems
impementing asynchronous operations.  It will also allow other callers
to substitute their own completion callback.

We also add a new ->ki_flags field to work around the nasty layering
violation recently introduced in commit 5e33f6 ("usb: gadget: ffs: add
eventfd notification about ffs events").

Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Al Viro <[email protected]>
  • Loading branch information
Christoph Hellwig authored and Al Viro committed Mar 13, 2015
1 parent 599bd19 commit 04b2fa9
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 77 deletions.
5 changes: 3 additions & 2 deletions drivers/usb/gadget/function/f_fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,9 +655,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
unuse_mm(io_data->mm);
}

aio_complete(io_data->kiocb, ret, ret);
io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);

if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
if (io_data->ffs->ffs_eventfd &&
!(io_data->kiocb->ki_flags & IOCB_EVENTFD))
eventfd_signal(io_data->ffs->ffs_eventfd, 1);

usb_ep_free_request(io_data->ep, io_data->req);
Expand Down
5 changes: 3 additions & 2 deletions drivers/usb/gadget/legacy/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ static void ep_user_copy_worker(struct work_struct *work)
ret = -EFAULT;

/* completing the iocb can drop the ctx and mm, don't touch mm after */
aio_complete(iocb, ret, ret);
iocb->ki_complete(iocb, ret, ret);

kfree(priv->buf);
kfree(priv->to_free);
Expand Down Expand Up @@ -497,7 +497,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
kfree(priv);
iocb->private = NULL;
/* aio_complete() reports bytes-transferred _and_ faults */
aio_complete(iocb, req->actual ? req->actual : req->status,

iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
req->status);
} else {
/* ep_copy_to_user() won't report both; we hide some faults */
Expand Down
94 changes: 65 additions & 29 deletions fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,38 @@ struct kioctx {
unsigned id;
};

/*
* We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
* cancelled or completed (this makes a certain amount of sense because
* successful cancellation - io_cancel() - does deliver the completion to
* userspace).
*
* And since most things don't implement kiocb cancellation and we'd really like
* kiocb completion to be lockless when possible, we use ki_cancel to
* synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
* with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
*/
#define KIOCB_CANCELLED ((void *) (~0ULL))

struct aio_kiocb {
struct kiocb common;

struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;

struct iocb __user *ki_user_iocb; /* user's aiocb */
__u64 ki_user_data; /* user's data for completion */

struct list_head ki_list; /* the aio core uses this
* for cancellation */

/*
* If the aio_resfd field of the userspace iocb is not zero,
* this is the underlying eventfd context to deliver events to.
*/
struct eventfd_ctx *ki_eventfd;
};

/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
Expand Down Expand Up @@ -220,7 +252,7 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");

kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);

pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
Expand Down Expand Up @@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx)
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)

void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;

Expand All @@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);

static int kiocb_cancel(struct kiocb *kiocb)
static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;

Expand All @@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);

return cancel(kiocb);
return cancel(&kiocb->common);
}

static void free_ioctx(struct work_struct *work)
Expand Down Expand Up @@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
struct kiocb *req;
struct aio_kiocb *req;

spin_lock_irq(&ctx->ctx_lock);

while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct kiocb, ki_list);
struct aio_kiocb, ki_list);

list_del_init(&req->ki_list);
kiocb_cancel(req);
Expand Down Expand Up @@ -932,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
static inline struct kiocb *aio_get_req(struct kioctx *ctx)
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;
struct aio_kiocb *req;

if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
Expand All @@ -955,10 +988,10 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
return NULL;
}

static void kiocb_free(struct kiocb *req)
static void kiocb_free(struct aio_kiocb *req)
{
if (req->ki_filp)
fput(req->ki_filp);
if (req->common.ki_filp)
fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
Expand Down Expand Up @@ -994,8 +1027,9 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
void aio_complete(struct kiocb *iocb, long res, long res2)
static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
Expand All @@ -1009,7 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
BUG_ON(is_sync_kiocb(iocb));
BUG_ON(is_sync_kiocb(kiocb));

if (iocb->ki_list.next) {
unsigned long flags;
Expand All @@ -1035,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;

event->obj = (u64)(unsigned long)iocb->ki_obj.user;
event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
Expand All @@ -1044,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);

pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);

/* after flagging the request as done, we
Expand Down Expand Up @@ -1091,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)

percpu_ref_put(&ctx->reqs);
}
EXPORT_SYMBOL(aio_complete);

/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
Expand Down Expand Up @@ -1480,7 +1513,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
struct kiocb *req;
struct aio_kiocb *req;
ssize_t ret;

/* enforce forwards compatibility on users */
Expand All @@ -1503,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;

req->ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->ki_filp)) {
req->common.ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = 0;

if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
Expand All @@ -1522,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = NULL;
goto out_put_req;
}

req->common.ki_flags |= IOCB_EVENTFD;
}

ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
Expand All @@ -1530,11 +1568,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
}

req->ki_obj.user = user_iocb;
req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
req->ki_pos = iocb->aio_offset;

ret = aio_run_iocb(req, iocb->aio_lio_opcode,
ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
iocb->aio_nbytes,
compat);
Expand Down Expand Up @@ -1623,20 +1660,19 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
u32 key)
static struct aio_kiocb *
lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
struct list_head *pos;
struct aio_kiocb *kiocb;

assert_spin_locked(&ctx->ctx_lock);

if (key != KIOCB_KEY)
return NULL;

/* TODO: use a hash or array, this sucks. */
list_for_each(pos, &ctx->active_reqs) {
struct kiocb *kiocb = list_kiocb(pos);
if (kiocb->ki_obj.user == iocb)
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
Expand All @@ -1656,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
struct kioctx *ctx;
struct kiocb *kiocb;
struct aio_kiocb *kiocb;
u32 key;
int ret;

Expand Down
4 changes: 2 additions & 2 deletions fs/direct-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
ret = err;
}

aio_complete(dio->iocb, ret, 0);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}

kmem_cache_free(dio_cache, dio);
Expand Down Expand Up @@ -1056,7 +1056,7 @@ static inline int drop_refcount(struct dio *dio)
* operation. AIO can if it was a broken operation described above or
* in fact if all the bios race to complete before we get here. In
* that case dio_complete() translates the EIOCBQUEUED into the proper
* return code that the caller will hand to aio_complete().
* return code that the caller will hand to ->complete().
*
* This is managed by the bio_lock instead of being an atomic_t so that
* completion paths can drop their ref and use the remaining count to
Expand Down
2 changes: 1 addition & 1 deletion fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
spin_unlock(&fc->lock);
}

aio_complete(io->iocb, res, 0);
io->iocb->ki_complete(io->iocb, res, 0);
kfree(io);
}
}
Expand Down
2 changes: 1 addition & 1 deletion fs/nfs/direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
dreq->iocb->ki_complete(dreq->iocb, res, 0);
}

complete_all(&dreq->completion);
Expand Down
Loading

0 comments on commit 04b2fa9

Please sign in to comment.