Skip to content

Commit

Permalink
xfs: optimize bio handling in the buffer writeback path
Browse files Browse the repository at this point in the history
This patch implements two closely related changes:  First it embeds
a bio the ioend structure so that we don't have to allocate one
separately.  Second it uses the block layer bio chaining mechanism
to chain additional bios off this first one if needed instead of
manually accounting for multiple bio completions in the ioend
structure.  Together this removes a memory allocation per ioend and
greatly simplifies the ioend setup and I/O completion path.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Brian Foster <[email protected]>
Signed-off-by: Dave Chinner <[email protected]>
  • Loading branch information
Christoph Hellwig authored and dchinner committed Apr 5, 2016
1 parent 37992c1 commit 0e51a8e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 165 deletions.
247 changes: 110 additions & 137 deletions fs/xfs/xfs_aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,27 +124,32 @@ xfs_finish_page_writeback(
*/
STATIC void
xfs_destroy_ioend(
struct xfs_ioend *ioend)
struct xfs_ioend *ioend,
int error)
{
struct inode *inode = ioend->io_inode;
int error = ioend->io_error;
struct bio *last = ioend->io_bio;
struct bio *bio, *next;

for (bio = ioend->io_bio_done; bio; bio = next) {
for (bio = &ioend->io_inline_bio; bio; bio = next) {
struct bio_vec *bvec;
int i;

next = bio->bi_private;
bio->bi_private = NULL;
/*
* For the last bio, bi_private points to the ioend, so we
* need to explicitly end the iteration here.
*/
if (bio == last)
next = NULL;
else
next = bio->bi_private;

/* walk each page on bio, ending page IO on them */
bio_for_each_segment_all(bvec, bio, i)
xfs_finish_page_writeback(inode, bvec, error);

bio_put(bio);
}

mempool_free(ioend, xfs_ioend_pool);
}

/*
Expand Down Expand Up @@ -218,7 +223,8 @@ xfs_setfilesize(

STATIC int
xfs_setfilesize_ioend(
struct xfs_ioend *ioend)
struct xfs_ioend *ioend,
int error)
{
struct xfs_inode *ip = XFS_I(ioend->io_inode);
struct xfs_trans *tp = ioend->io_append_trans;
Expand All @@ -232,53 +238,32 @@ xfs_setfilesize_ioend(
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);

/* we abort the update if there was an IO error */
if (ioend->io_error) {
if (error) {
xfs_trans_cancel(tp);
return ioend->io_error;
return error;
}

return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
}

/*
* Schedule IO completion handling on the final put of an ioend.
*
* If there is no work to do we might as well call it a day and free the
* ioend right now.
*/
STATIC void
xfs_finish_ioend(
struct xfs_ioend *ioend)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;

if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
}

/*
* IO write completion.
*/
STATIC void
xfs_end_io(
struct work_struct *work)
{
xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = ioend->io_bio->bi_error;

/*
* Set an error if the mount has shut down and proceed with end I/O
* processing so it can perform whatever cleanups are necessary.
*/
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
ioend->io_error = -EIO;
error = -EIO;

/*
* For unwritten extents we need to issue transactions to convert a
Expand All @@ -288,50 +273,33 @@ xfs_end_io(
* on error.
*/
if (ioend->io_type == XFS_IO_UNWRITTEN) {
if (ioend->io_error)
if (error)
goto done;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
} else if (ioend->io_append_trans) {
error = xfs_setfilesize_ioend(ioend);
error = xfs_setfilesize_ioend(ioend, error);
} else {
ASSERT(!xfs_ioend_is_append(ioend));
}

done:
if (error)
ioend->io_error = error;
xfs_destroy_ioend(ioend);
xfs_destroy_ioend(ioend, error);
}

/*
* Allocate and initialise an IO completion structure.
* We need to track unwritten extent write completion here initially.
* We'll need to extend this for updating the ondisk inode size later
* (vs. incore size).
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend(
struct inode *inode,
unsigned int type)
STATIC void
xfs_end_bio(
struct bio *bio)
{
xfs_ioend_t *ioend;

ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
memset(ioend, 0, sizeof(*ioend));
struct xfs_ioend *ioend = bio->bi_private;
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;

/*
* Set the count to 1 initially, which will prevent an I/O
* completion callback from happening before we have started
* all the I/O from calling the completion routine too early.
*/
atomic_set(&ioend->io_remaining, 1);
INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = type;
ioend->io_inode = inode;
INIT_WORK(&ioend->io_work, xfs_end_io);
spin_lock_init(&ioend->io_lock);
return ioend;
if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend, bio->bi_error);
}

STATIC int
Expand Down Expand Up @@ -403,56 +371,6 @@ xfs_imap_valid(
offset < imap->br_startoff + imap->br_blockcount;
}

/*
* BIO completion handler for buffered IO.
*/
STATIC void
xfs_end_bio(
struct bio *bio)
{
struct xfs_ioend *ioend = bio->bi_private;
unsigned long flags;

bio->bi_private = NULL;
bio->bi_end_io = NULL;

spin_lock_irqsave(&ioend->io_lock, flags);
if (!ioend->io_error)
ioend->io_error = bio->bi_error;
if (!ioend->io_bio_done)
ioend->io_bio_done = bio;
else
ioend->io_bio_done_tail->bi_private = bio;
ioend->io_bio_done_tail = bio;
spin_unlock_irqrestore(&ioend->io_lock, flags);

xfs_finish_ioend(ioend);
}

STATIC void
xfs_submit_ioend_bio(
struct writeback_control *wbc,
xfs_ioend_t *ioend,
struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
}

STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);

ASSERT(bio->bi_private == NULL);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
return bio;
}

STATIC void
xfs_start_buffer_writeback(
struct buffer_head *bh)
Expand Down Expand Up @@ -513,36 +431,95 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
STATIC int
xfs_submit_ioend(
struct writeback_control *wbc,
xfs_ioend_t *ioend,
struct xfs_ioend *ioend,
int status)
{
/* Reserve log space if we might write beyond the on-disk inode size. */
if (!status &&
ioend->io_bio && ioend->io_type != XFS_IO_UNWRITTEN &&
ioend->io_type != XFS_IO_UNWRITTEN &&
xfs_ioend_is_append(ioend) &&
!ioend->io_append_trans)
status = xfs_setfilesize_trans_alloc(ioend);

ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;

/*
* If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
* as there is only one reference to the ioend at this point in
* time.
*/
if (status) {
if (ioend->io_bio)
bio_put(ioend->io_bio);
ioend->io_error = status;
xfs_finish_ioend(ioend);
ioend->io_bio->bi_error = status;
bio_endio(ioend->io_bio);
return status;
}

xfs_submit_ioend_bio(wbc, ioend, ioend->io_bio);
ioend->io_bio = NULL;
xfs_finish_ioend(ioend);
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
ioend->io_bio);
return 0;
}

static void
xfs_init_bio_from_bh(
struct bio *bio,
struct buffer_head *bh)
{
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
}

static struct xfs_ioend *
xfs_alloc_ioend(
struct inode *inode,
unsigned int type,
xfs_off_t offset,
struct buffer_head *bh)
{
struct xfs_ioend *ioend;
struct bio *bio;

bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
xfs_init_bio_from_bh(bio, bh);

ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_size = 0;
ioend->io_offset = offset;
INIT_WORK(&ioend->io_work, xfs_end_io);
ioend->io_append_trans = NULL;
ioend->io_bio = bio;
return ioend;
}

/*
* Allocate a new bio, and chain the old bio to the new one.
*
* Note that we have to do perform the chaining in this unintuitive order
* so that the bi_private linkage is set up in the right direction for the
* traversal in xfs_destroy_ioend().
*/
static void
xfs_chain_bio(
struct xfs_ioend *ioend,
struct writeback_control *wbc,
struct buffer_head *bh)
{
struct bio *new;

new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
xfs_init_bio_from_bh(new, bh);

bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
ioend->io_bio);
ioend->io_bio = new;
}

/*
* Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we
Expand All @@ -564,19 +541,15 @@ xfs_add_to_ioend(
offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
if (wpc->ioend)
list_add(&wpc->ioend->io_list, iolist);
wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type);
wpc->ioend->io_offset = offset;
wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
}

retry:
if (!wpc->ioend->io_bio)
wpc->ioend->io_bio = xfs_alloc_ioend_bio(bh);

if (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) {
xfs_submit_ioend_bio(wbc, wpc->ioend, wpc->ioend->io_bio);
wpc->ioend->io_bio = NULL;
goto retry;
}
/*
* If the buffer doesn't fit into the bio we need to allocate a new
* one. This shouldn't happen more than once for a given buffer.
*/
while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
xfs_chain_bio(wpc->ioend, wbc, bh);

wpc->ioend->io_size += bh->b_size;
wpc->last_block = bh->b_blocknr;
Expand Down
Loading

0 comments on commit 0e51a8e

Please sign in to comment.