Skip to content

Commit

Permalink
mm: prevent concurrent unmap_mapping_range() on the same inode
Browse files Browse the repository at this point in the history
Michael Leun reported that running parallel opens on a fuse filesystem
can trigger a "kernel BUG at mm/truncate.c:475"

Gurudas Pai reported the same bug on NFS.

The reason is, unmap_mapping_range() is not prepared for more than
one concurrent invocation per inode.  For example:

  thread1: going through a big range, stops in the middle of a vma and
     stores the restart address in vm_truncate_count.

  thread2: comes in with a small (e.g. single page) unmap request on
     the same vma, somewhere before restart_address, finds that the
     vma was already unmapped up to the restart address and happily
     returns without doing anything.

Another scenario would be two big unmap requests, both having to
restart the unmapping and each one setting vm_truncate_count to its
own value.  This could go on forever without any of them being able to
finish.

Truncate and hole punching already serialize with i_mutex.  Other
callers of unmap_mapping_range() do not, and it's difficult to get
i_mutex protection for all callers.  In particular ->d_revalidate(),
which calls invalidate_inode_pages2_range() in fuse, may be called
with or without i_mutex.

This patch adds a new mutex to 'struct address_space' to prevent
running multiple concurrent unmap_mapping_range() on the same mapping.

[ We'll hopefully get rid of all this with the upcoming mm
  preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
  lockbreak" patch in particular.  But that is for 2.6.39 ]

Signed-off-by: Miklos Szeredi <[email protected]>
Reported-by: Michael Leun <[email protected]>
Reported-by: Gurudas Pai <[email protected]>
Tested-by: Gurudas Pai <[email protected]>
Acked-by: Hugh Dickins <[email protected]>
Cc: [email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Miklos Szeredi authored and torvalds committed Feb 24, 2011
1 parent 78794b2 commit 2aa1589
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 38 deletions.
9 changes: 1 addition & 8 deletions fs/gfs2/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
struct address_space *mapping = (struct address_space *)(gl + 1);

gfs2_init_glock_once(gl);
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
spin_lock_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
address_space_init_once(mapping);
}

/**
Expand Down
22 changes: 15 additions & 7 deletions fs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, i_callback);
}

void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
spin_lock_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
mutex_init(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(address_space_init_once);

/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
Expand All @@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
Expand Down
5 changes: 0 additions & 5 deletions fs/nilfs2/btnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,6 @@
#include "btnode.h"


void nilfs_btnode_cache_init_once(struct address_space *btnc)
{
nilfs_mapping_init_once(btnc);
}

static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
Expand Down
1 change: 0 additions & 1 deletion fs/nilfs2/btnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};

void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
Expand Down
4 changes: 2 additions & 2 deletions fs/nilfs2/mdt.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct backing_dev_info *bdi = inode->i_sb->s_bdi;

INIT_LIST_HEAD(&shadow->frozen_buffers);
nilfs_mapping_init_once(&shadow->frozen_data);
address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
nilfs_mapping_init_once(&shadow->frozen_btnodes);
address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
mi->mi_shadow = shadow;
return 0;
Expand Down
13 changes: 0 additions & 13 deletions fs/nilfs2/page.c
Original file line number Diff line number Diff line change
Expand Up @@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}

void nilfs_mapping_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);

spin_lock_init(&mapping->i_mmap_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}

void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops)
Expand Down
1 change: 0 additions & 1 deletion fs/nilfs2/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
void nilfs_mapping_init_once(struct address_space *mapping);
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
Expand Down
2 changes: 1 addition & 1 deletion fs/nilfs2/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ struct address_space {
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
struct mutex unmap_mutex; /* to protect unmapping */
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
Expand Down Expand Up @@ -2225,6 +2226,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);

extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
extern void address_space_init_once(struct address_space *mapping);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
extern struct inode * igrab(struct inode *);
Expand Down
2 changes: 2 additions & 0 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2648,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;

mutex_lock(&mapping->unmap_mutex);
spin_lock(&mapping->i_mmap_lock);

/* Protect against endless unmapping loops */
Expand All @@ -2664,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
spin_unlock(&mapping->i_mmap_lock);
mutex_unlock(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(unmap_mapping_range);

Expand Down

0 comments on commit 2aa1589

Please sign in to comment.