Skip to content

Commit

Permalink
Merge branch 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/viro/vfs

Pull lazytime mount option support from Al Viro:
 "Lazytime stuff from tytso"

* 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ext4: add optimization for the lazytime mount option
  vfs: add find_inode_nowait() function
  vfs: add support for a lazytime mount option
  • Loading branch information
torvalds committed Feb 18, 2015
2 parents 66dc830 + a26f499 commit 0389115
Show file tree
Hide file tree
Showing 15 changed files with 343 additions and 37 deletions.
70 changes: 68 additions & 2 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0;
}

struct other_inode {
unsigned long orig_ino;
struct ext4_inode *raw_inode;
};

static int other_inode_match(struct inode * inode, unsigned long ino,
void *data)
{
struct other_inode *oi = (struct other_inode *) data;

if ((inode->i_ino != ino) ||
(inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
return 0;
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
(inode->i_state & I_DIRTY_TIME)) {
struct ext4_inode_info *ei = EXT4_I(inode);

inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
spin_unlock(&inode->i_lock);

spin_lock(&ei->i_raw_lock);
EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
ext4_inode_csum_set(inode, oi->raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
trace_ext4_other_inode_update_time(inode, oi->orig_ino);
return -1;
}
spin_unlock(&inode->i_lock);
return -1;
}

/*
* Opportunistically update the other time fields for other inodes in
* the same inode table block.
*/
static void ext4_update_other_inodes_time(struct super_block *sb,
unsigned long orig_ino, char *buf)
{
struct other_inode oi;
unsigned long ino;
int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int inode_size = EXT4_INODE_SIZE(sb);

oi.orig_ino = orig_ino;
ino = orig_ino & ~(inodes_per_block - 1);
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
oi.raw_inode = (struct ext4_inode *) buf;
(void) find_inode_nowait(sb, ino, other_inode_match, &oi);
}
}

/*
* Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the
Expand Down Expand Up @@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}

ext4_inode_csum_set(inode, raw_inode, ei);

spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
bh->b_data);

BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, NULL, bh);
Expand Down Expand Up @@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
* If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level.
*
* If only the I_DIRTY_TIME flag is set, we can skip everything. If
* I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
* to copy into the on-disk inode structure are the timestamp files.
*/
void ext4_dirty_inode(struct inode *inode, int flags)
{
handle_t *handle;

if (flags == I_DIRTY_TIME)
return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
goto out;
Expand Down
10 changes: 10 additions & 0 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,7 @@ enum {
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Expand Down Expand Up @@ -1190,6 +1191,8 @@ static const match_table_t tokens = {
{Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"},
Expand Down Expand Up @@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
case Opt_i_version:
sb->s_flags |= MS_I_VERSION;
return 1;
case Opt_lazytime:
sb->s_flags |= MS_LAZYTIME;
return 1;
case Opt_nolazytime:
sb->s_flags &= ~MS_LAZYTIME;
return 1;
}

for (m = ext4_mount_opts; m->token != Opt_err; m++)
Expand Down Expand Up @@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#endif

*flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
Expand Down
62 changes: 51 additions & 11 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,28 +253,41 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
return ret;
}

#define EXPIRE_DIRTY_ATIME 0x0001

/*
* Move expired (dirtied before work->older_than_this) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
int flags,
struct wb_writeback_work *work)
{
unsigned long *older_than_this = NULL;
unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
struct inode *inode;
int do_sb_sort = 0;
int moved = 0;

if ((flags & EXPIRE_DIRTY_ATIME) == 0)
older_than_this = work->older_than_this;
else if ((work->reason == WB_REASON_SYNC) == 0) {
expire_time = jiffies - (HZ * 86400);
older_than_this = &expire_time;
}
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
if (work->older_than_this &&
inode_dirtied_after(inode, *work->older_than_this))
if (older_than_this &&
inode_dirtied_after(inode, *older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
moved++;
if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
Expand Down Expand Up @@ -315,9 +328,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
{
int moved;

assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
EXPIRE_DIRTY_ATIME, work);
trace_writeback_queue_io(wb, work, moved);
}

Expand Down Expand Up @@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* updates after data IO completion.
*/
redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
list_move(&inode->i_wb_list, &wb->b_dirty_time);
} else {
/* The inode is clean. Remove from writeback lists. */
list_del_init(&inode->i_wb_list);
Expand Down Expand Up @@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock);

dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~I_DIRTY;
if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
(inode->i_state & I_DIRTY_TIME)) ||
(inode->i_state & I_DIRTY_TIME_EXPIRED)) {
dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
trace_writeback_lazytime(inode);
}
inode->i_state &= ~dirty;

/*
* Paired with smp_mb() in __mark_inode_dirty(). This allows
Expand All @@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)

spin_unlock(&inode->i_lock);

if (dirty & I_DIRTY_TIME)
mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc);
if (ret == 0)
ret = err;
Expand Down Expand Up @@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* make sure inode is on some writeback list and leave it there unless
* we have completely cleaned the inode.
*/
if (!(inode->i_state & I_DIRTY) &&
if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL ||
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
Expand All @@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* If inode is clean, remove it from writeback lists. Otherwise don't
* touch it. See comment above for explanation.
*/
if (!(inode->i_state & I_DIRTY))
if (!(inode->i_state & I_DIRTY_ALL))
list_del_init(&inode->i_wb_list);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
Expand Down Expand Up @@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb,
wrote += write_chunk - wbc.nr_to_write;
spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY))
if (!(inode->i_state & I_DIRTY_ALL))
wrote++;
requeue_inode(inode, wb, &wbc);
inode_sync_complete(inode);
Expand Down Expand Up @@ -1151,40 +1177,52 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
* page->mapping->host, so the page-dirtying time is recorded in the internal
* blockdev inode.
*/
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = NULL;
int dirtytime;

trace_writeback_mark_inode_dirty(inode, flags);

/*
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags);

if (sb->s_op->dirty_inode)
sb->s_op->dirty_inode(inode, flags);

trace_writeback_dirty_inode(inode, flags);
}
if (flags & I_DIRTY_INODE)
flags &= ~I_DIRTY_TIME;
dirtytime = flags & I_DIRTY_TIME;

/*
* Paired with smp_mb() in __writeback_single_inode() for the
* following lockless i_state test. See there for details.
*/
smp_mb();

if ((inode->i_state & flags) == flags)
if (((inode->i_state & flags) == flags) ||
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;

if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);

spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;

if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;

/*
Expand Down Expand Up @@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}

inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
list_move(&inode->i_wb_list, dirtytime ?
&bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
spin_unlock(&bdi->wb.list_lock);
trace_writeback_dirty_inode_enqueue(inode);

if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
Expand Down
4 changes: 2 additions & 2 deletions fs/gfs2/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
int sync_state = inode->i_state & I_DIRTY;
int sync_state = inode->i_state & I_DIRTY_ALL;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;

Expand All @@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
if (!gfs2_is_jdata(ip))
sync_state &= ~I_DIRTY_PAGES;
if (datasync)
sync_state &= ~I_DIRTY_SYNC;
sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);

if (sync_state) {
ret = sync_inode_metadata(inode, 1);
Expand Down
Loading

0 comments on commit 0389115

Please sign in to comment.