Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/viro/vfs

Pull vfs updates from Al Viro:
 "In this one:

   - d_move fixes (Eric Biederman)

   - UFS fixes (me; locking is mostly sane now, a bunch of bugs in error
     handling ought to be fixed)

   - switch of sb_writers to percpu rwsem (Oleg Nesterov)

   - superblock scalability (Josef Bacik and Dave Chinner)

   - swapon(2) race fix (Hugh Dickins)"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (65 commits)
  vfs: Test for and handle paths that are unreachable from their mnt_root
  dcache: Reduce the scope of i_lock in d_splice_alias
  dcache: Handle escaped paths in prepend_path
  mm: fix potential data race in SyS_swapon
  inode: don't softlockup when evicting inodes
  inode: rename i_wb_list to i_io_list
  sync: serialise per-superblock sync operations
  inode: convert inode_sb_list_lock to per-sb
  inode: add hlist_fake to avoid the inode hash lock in evict
  writeback: plug writeback at a high level
  change sb_writers to use percpu_rw_semaphore
  shift percpu_counter_destroy() into destroy_super_work()
  percpu-rwsem: kill CONFIG_PERCPU_RWSEM
  percpu-rwsem: introduce percpu_rwsem_release() and percpu_rwsem_acquire()
  percpu-rwsem: introduce percpu_down_read_trylock()
  document rwsem_release() in sb_wait_write()
  fix the broken lockdep logic in __sb_start_write()
  introduce __sb_writers_{acquired,release}() helpers
  ufs_inode_get{frag,block}(): get rid of 'phys' argument
  ufs_getfrag_block(): tidy up a bit
  ...
  • Loading branch information
torvalds committed Sep 6, 2015
2 parents bd77966 + 397d425 commit 7d9071a
Show file tree
Hide file tree
Showing 29 changed files with 936 additions and 1,119 deletions.
1 change: 0 additions & 1 deletion arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE

config UPROBES
def_bool n
select PERCPU_RWSEM
help
Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe')
Expand Down
12 changes: 6 additions & 6 deletions fs/block_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{
struct inode *inode, *old_inode = NULL;

spin_lock(&inode_sb_list_lock);
spin_lock(&blockdev_superblock->s_inode_list_lock);
list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
struct address_space *mapping = inode->i_mapping;

Expand All @@ -1781,22 +1781,22 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
}
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
spin_unlock(&blockdev_superblock->s_inode_list_lock);
/*
* We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can
* s_inode_list_lock We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it
* s_inode_list_lock. So we keep the reference and iput it
* later.
*/
iput(old_inode);
old_inode = inode;

func(I_BDEV(inode), arg);

spin_lock(&inode_sb_list_lock);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}
spin_unlock(&inode_sb_list_lock);
spin_unlock(&blockdev_superblock->s_inode_list_lock);
iput(old_inode);
}
8 changes: 2 additions & 6 deletions fs/btrfs/transaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work)
* Tell lockdep about it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
__sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);

current->journal_info = ac->newtrans;

Expand Down Expand Up @@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* async commit thread will be the one to unlock it.
*/
if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
__sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);

schedule_work(&ac->work);

Expand Down
14 changes: 10 additions & 4 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
* dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
* dentry->d_parent->d_inode->i_mutex, and rename_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
Expand All @@ -2744,7 +2744,6 @@ static int __d_unalias(struct inode *inode,
__d_move(alias, dentry, false);
ret = 0;
out_err:
spin_unlock(&inode->i_lock);
if (m2)
mutex_unlock(m2);
if (m1)
Expand Down Expand Up @@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode);
if (unlikely(new)) {
/* The reference to new ensures it remains an alias */
spin_unlock(&inode->i_lock);
write_seqlock(&rename_lock);
if (unlikely(d_ancestor(new, dentry))) {
write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
dput(new);
new = ERR_PTR(-ELOOP);
pr_warn_ratelimited(
Expand All @@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode);
}
iput(inode);
Expand Down Expand Up @@ -2926,6 +2925,13 @@ static int prepend_path(const struct path *path,

if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Escaped? */
if (dentry != vfsmnt->mnt_root) {
bptr = *buffer;
blen = *buflen;
error = 3;
break;
}
/* Global root? */
if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
Expand Down
10 changes: 6 additions & 4 deletions fs/drop_caches.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;

spin_lock(&inode_sb_list_lock);
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
Expand All @@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
}
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
spin_unlock(&sb->s_inode_list_lock);

invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
spin_lock(&inode_sb_list_lock);

spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&inode_sb_list_lock);
spin_unlock(&sb->s_inode_list_lock);
iput(toput_inode);
}

Expand Down
72 changes: 43 additions & 29 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;

static inline struct inode *wb_inode(struct list_head *head)
{
return list_entry(head, struct inode, i_wb_list);
return list_entry(head, struct inode, i_io_list);
}

/*
Expand Down Expand Up @@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
}

/**
* inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list
* inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
* @inode: inode to be moved
* @wb: target bdi_writeback
* @head: one of @wb->b_{dirty|io|more_io}
*
* Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io.
* Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
* Returns %true if @inode is the first occupant of the !dirty_time IO
* lists; otherwise, %false.
*/
static bool inode_wb_list_move_locked(struct inode *inode,
static bool inode_io_list_move_locked(struct inode *inode,
struct bdi_writeback *wb,
struct list_head *head)
{
assert_spin_locked(&wb->list_lock);

list_move(&inode->i_wb_list, head);
list_move(&inode->i_io_list, head);

/* dirty_time doesn't count as dirty_io until expiration */
if (head != &wb->b_dirty_time)
Expand All @@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
}

/**
* inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list
* inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from
*
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards.
*/
static void inode_wb_list_del_locked(struct inode *inode,
static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);

list_del_init(&inode->i_wb_list);
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}

Expand Down Expand Up @@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)

/*
* Once I_FREEING is visible under i_lock, the eviction path owns
* the inode and we shouldn't modify ->i_wb_list.
* the inode and we shouldn't modify ->i_io_list.
*/
if (unlikely(inode->i_state & I_FREEING))
goto skip_switch;
Expand Down Expand Up @@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* is always correct including from ->b_dirty_time. The transfer
* preserves @inode->dirtied_when ordering.
*/
if (!list_empty(&inode->i_wb_list)) {
if (!list_empty(&inode->i_io_list)) {
struct inode *pos;

inode_wb_list_del_locked(inode, old_wb);
inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb;
list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list)
list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when,
pos->dirtied_when))
break;
inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev);
inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
} else {
inode->i_wb = new_wb;
}
Expand Down Expand Up @@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
/*
* Remove the inode from the writeback list it is on.
*/
void inode_wb_list_del(struct inode *inode)
void inode_io_list_del(struct inode *inode)
{
struct bdi_writeback *wb;

wb = inode_to_wb_and_lock_list(inode);
inode_wb_list_del_locked(inode, wb);
inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock);
}

Expand All @@ -988,15 +988,15 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies;
}
inode_wb_list_move_locked(inode, wb, &wb->b_dirty);
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
}

/*
* requeue inode for re-scanning after bdi->b_io list is exhausted.
*/
static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{
inode_wb_list_move_locked(inode, wb, &wb->b_more_io);
inode_io_list_move_locked(inode, wb, &wb->b_more_io);
}

static void inode_sync_complete(struct inode *inode)
Expand Down Expand Up @@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
if (older_than_this &&
inode_dirtied_after(inode, *older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
list_move(&inode->i_io_list, &tmp);
moved++;
if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
Expand All @@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
list_for_each_prev_safe(pos, node, &tmp) {
inode = wb_inode(pos);
if (inode->i_sb == sb)
list_move(&inode->i_wb_list, dispatch_queue);
list_move(&inode->i_io_list, dispatch_queue);
}
}
out:
Expand Down Expand Up @@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time);
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
} else {
/* The inode is clean. Remove from writeback lists. */
inode_wb_list_del_locked(inode, wb);
inode_io_list_del_locked(inode, wb);
}
}

Expand Down Expand Up @@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* touch it. See comment above for explanation.
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_wb_list_del_locked(inode, wb);
inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
out:
Expand Down Expand Up @@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
unsigned long start_time = jiffies;
long write_chunk;
long wrote = 0; /* count both pages and inodes */
struct blk_plug plug;

blk_start_plug(&plug);
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);

Expand Down Expand Up @@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
break;
}
}
blk_finish_plug(&plug);
return wrote;
}

Expand Down Expand Up @@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
else
dirty_list = &wb->b_dirty_time;

wakeup_bdi = inode_wb_list_move_locked(inode, wb,
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);

spin_unlock(&wb->list_lock);
Expand All @@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
EXPORT_SYMBOL(__mark_inode_dirty);

/*
* The @s_sync_lock is used to serialise concurrent sync operations
* to avoid lock contention problems with concurrent wait_sb_inodes() calls.
* Concurrent callers will block on the s_sync_lock rather than doing contending
* walks. The queueing maintains sync(2) required behaviour as all the IO that
* has been issued up to the time this function is enter is guaranteed to be
* completed by the time we have gained the lock and waited for all IO that is
* in progress regardless of the order callers are granted the lock.
*/
static void wait_sb_inodes(struct super_block *sb)
{
struct inode *inode, *old_inode = NULL;
Expand All @@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb)
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));

spin_lock(&inode_sb_list_lock);
mutex_lock(&sb->s_sync_lock);
spin_lock(&sb->s_inode_list_lock);

/*
* Data integrity sync. Must wait for all pages under writeback,
Expand All @@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb)
}
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
spin_unlock(&sb->s_inode_list_lock);

/*
* We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can
* s_inode_list_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it
* s_inode_list_lock. So we keep the reference and iput it
* later.
*/
iput(old_inode);
Expand All @@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb)

cond_resched();

spin_lock(&inode_sb_list_lock);
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&inode_sb_list_lock);
spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
mutex_unlock(&sb->s_sync_lock);
}

static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
Expand Down
Loading

0 comments on commit 7d9071a

Please sign in to comment.