Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge misc fixes from Andrew Morton:
 "16 fixes"

* emailed patches from Andrew Morton <[email protected]>:
  coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping
  mm/kmemleak.c: fix unused-function warning
  init: initialize jump labels before command line option parsing
  kernel/watchdog_hld.c: hard lockup message should end with a newline
  kcov: improve CONFIG_ARCH_HAS_KCOV help text
  mm: fix inactive list balancing between NUMA nodes and cgroups
  mm/hotplug: treat CMA pages as unmovable
  proc: fixup proc-pid-vm test
  proc: fix map_files test on F29
  mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n
  mm/memory_hotplug: do not unlock after failing to take the device_hotplug_lock
  mm: swapoff: shmem_unuse() stop eviction without igrab()
  mm: swapoff: take notice of completion sooner
  mm: swapoff: remove too limiting SWAP_UNUSE_MAX_TRIES
  mm: swapoff: shmem_find_swap_entries() filter out other types
  slab: store tagged freelist for off-slab slabmgmt
  • Loading branch information
torvalds committed Apr 19, 2019
2 parents b222e9a + 04f5866 commit 3ecafda
Show file tree
Hide file tree
Showing 19 changed files with 151 additions and 104 deletions.
2 changes: 1 addition & 1 deletion drivers/base/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,

ret = lock_device_hotplug_sysfs();
if (ret)
goto out;
return ret;

nid = memory_add_physaddr_to_nid(phys_addr);
ret = __add_memory(nid, phys_addr,
Expand Down
3 changes: 3 additions & 0 deletions drivers/infiniband/core/uverbs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* will only be one mm, so no big deal.
*/
down_write(&mm->mmap_sem);
if (!mmget_still_valid(mm))
goto skip_mm;
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
Expand All @@ -1007,6 +1009,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
}
Expand Down
18 changes: 18 additions & 0 deletions fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
/*
* Avoid to modify vma->vm_flags
* without locked ops while the
* coredump reads the vm_flags.
*/
if (!mmget_still_valid(mm)) {
/*
* Silently return "count"
* like if get_task_mm()
* failed. FIXME: should this
* function have returned
* -ESRCH if get_task_mm()
* failed like if
* get_proc_task() fails?
*/
up_write(&mm->mmap_sem);
goto out_mm;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
Expand Down
9 changes: 9 additions & 0 deletions fs/userfaultfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,

/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
/* no task can run (and in turn coredump) yet */
VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
Expand Down Expand Up @@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
if (!mmget_still_valid(mm))
goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
Expand All @@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
Expand Down Expand Up @@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;

down_write(&mm->mmap_sem);
if (!mmget_still_valid(mm))
goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
Expand Down Expand Up @@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;

down_write(&mm->mmap_sem);
if (!mmget_still_valid(mm))
goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
Expand Down
21 changes: 21 additions & 0 deletions include/linux/sched/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,27 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}

/*
* This has to be called after a get_task_mm()/mmget_not_zero()
* followed by taking the mmap_sem for writing before modifying the
* vmas or anything the coredump pretends not to change from under it.
*
* NOTE: find_extend_vma() called from GUP context is the only place
* that can modify the "mm" (notably the vm_start/end) under mmap_sem
* for reading and outside the context of the process, so it is also
* the only case that holds the mmap_sem for reading that must call
* this function. Generally if the mmap_sem is hold for reading
* there's no need of this check after get_task_mm()/mmget_not_zero().
*
* This function can be obsoleted and the check can be removed, after
* the coredump code will hold the mmap_sem for writing before
* invoking the ->core_dump methods.
*/
static inline bool mmget_still_valid(struct mm_struct *mm)
{
return likely(!mm->core_state);
}

/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
Expand Down
1 change: 1 addition & 0 deletions include/linux/shmem_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct shmem_inode_info {
struct list_head swaplist; /* chain of maybes on swap */
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
atomic_t stop_eviction; /* hold when working on inode */
struct inode vfs_inode;
};

Expand Down
4 changes: 2 additions & 2 deletions init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ asmlinkage __visible void __init start_kernel(void)
page_alloc_init();

pr_notice("Kernel command line: %s\n", boot_command_line);
/* parameters may set static keys */
jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
Expand All @@ -591,8 +593,6 @@ asmlinkage __visible void __init start_kernel(void)
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);

jump_label_init();

/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
Expand Down
3 changes: 2 additions & 1 deletion kernel/watchdog_hld.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;

pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
Expand Down
6 changes: 3 additions & 3 deletions lib/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -753,9 +753,9 @@ endmenu # "Memory Debugging"
config ARCH_HAS_KCOV
bool
help
KCOV does not have any arch-specific code, but currently it is enabled
only for x86_64. KCOV requires testing on other archs, and most likely
disabling of instrumentation for some early boot code.
An architecture should select this when it can successfully
build and run with CONFIG_KCOV. This typically requires
disabling instrumentation for some early boot code.

config CC_HAS_SANCOV_TRACE_PC
def_bool $(cc-option,-fsanitize-coverage=trace-pc)
Expand Down
2 changes: 2 additions & 0 deletions mm/kmemleak.c
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end,
/*
* Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency.
*/
#ifdef CONFIG_SMP
static void scan_large_block(void *start, void *end)
{
void *next;
Expand All @@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end)
cond_resched();
}
}
#endif

/*
* Scan a memory block corresponding to a kmemleak_object. A condition is
Expand Down
7 changes: 6 additions & 1 deletion mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
#include <linux/sched/mm.h>

#include <linux/uaccess.h>
#include <asm/cacheflush.h>
Expand Down Expand Up @@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
if (!prev || expand_stack(prev, addr))
/* don't alter vm_end if the coredump is running */
if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
Expand All @@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
/* don't alter vm_start if the coredump is running */
if (!mmget_still_valid(mm))
return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
Expand Down
30 changes: 18 additions & 12 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -8005,7 +8005,10 @@ void *__init alloc_large_system_hash(const char *tablename,
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
int migratetype, int flags)
{
unsigned long pfn, iter, found;
unsigned long found;
unsigned long iter = 0;
unsigned long pfn = page_to_pfn(page);
const char *reason = "unmovable page";

/*
* TODO we could make this much more efficient by not checking every
Expand All @@ -8015,17 +8018,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* can still lead to having bootmem allocations in zone_movable.
*/

/*
* CMA allocations (alloc_contig_range) really need to mark isolate
* CMA pageblocks even when they are not movable in fact so consider
* them movable here.
*/
if (is_migrate_cma(migratetype) &&
is_migrate_cma(get_pageblock_migratetype(page)))
return false;
if (is_migrate_cma_page(page)) {
/*
* CMA allocations (alloc_contig_range) really need to mark
* isolate CMA pageblocks even when they are not movable in fact
* so consider them movable here.
*/
if (is_migrate_cma(migratetype))
return false;

reason = "CMA page";
goto unmovable;
}

pfn = page_to_pfn(page);
for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
for (found = 0; iter < pageblock_nr_pages; iter++) {
unsigned long check = pfn + iter;

if (!pfn_valid_within(check))
Expand Down Expand Up @@ -8105,7 +8111,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
unmovable:
WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
if (flags & REPORT_FAILURE)
dump_page(pfn_to_page(pfn+iter), "unmovable page");
dump_page(pfn_to_page(pfn + iter), reason);
return true;
}

Expand Down
58 changes: 27 additions & 31 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode)
}
spin_unlock(&sbinfo->shrinklist_lock);
}
if (!list_empty(&info->swaplist)) {
while (!list_empty(&info->swaplist)) {
/* Wait while shmem_unuse() is scanning this inode... */
wait_var_event(&info->stop_eviction,
!atomic_read(&info->stop_eviction));
mutex_lock(&shmem_swaplist_mutex);
list_del_init(&info->swaplist);
/* ...but beware of the race if we peeked too early */
if (!atomic_read(&info->stop_eviction))
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
}
Expand All @@ -1099,10 +1104,11 @@ extern struct swap_info_struct *swap_info[];
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
bool frontswap)
unsigned int type, bool frontswap)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
swp_entry_t entry;
unsigned int ret = 0;

if (!nr_entries)
Expand All @@ -1116,13 +1122,12 @@ static int shmem_find_swap_entries(struct address_space *mapping,
if (!xa_is_value(page))
continue;

if (frontswap) {
swp_entry_t entry = radix_to_swp_entry(page);

if (!frontswap_test(swap_info[swp_type(entry)],
swp_offset(entry)))
continue;
}
entry = radix_to_swp_entry(page);
if (swp_type(entry) != type)
continue;
if (frontswap &&
!frontswap_test(swap_info[type], swp_offset(entry)))
continue;

indices[ret] = xas.xa_index;
entries[ret] = page;
Expand Down Expand Up @@ -1194,7 +1199,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,

pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
pvec.pages, indices,
frontswap);
type, frontswap);
if (pvec.nr == 0) {
ret = 0;
break;
Expand Down Expand Up @@ -1227,51 +1232,41 @@ int shmem_unuse(unsigned int type, bool frontswap,
unsigned long *fs_pages_to_unuse)
{
struct shmem_inode_info *info, *next;
struct inode *inode;
struct inode *prev_inode = NULL;
int error = 0;

if (list_empty(&shmem_swaplist))
return 0;

mutex_lock(&shmem_swaplist_mutex);

/*
* The extra refcount on the inode is necessary to safely dereference
* p->next after re-acquiring the lock. New shmem inodes with swap
* get added to the end of the list and we will scan them all.
*/
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
if (!info->swapped) {
list_del_init(&info->swaplist);
continue;
}

inode = igrab(&info->vfs_inode);
if (!inode)
continue;

/*
* Drop the swaplist mutex while searching the inode for swap;
* but before doing so, make sure shmem_evict_inode() will not
* remove placeholder inode from swaplist, nor let it be freed
* (igrab() would protect from unlink, but not from unmount).
*/
atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
if (prev_inode)
iput(prev_inode);
prev_inode = inode;

error = shmem_unuse_inode(inode, type, frontswap,
error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
fs_pages_to_unuse);
cond_resched();

mutex_lock(&shmem_swaplist_mutex);
next = list_next_entry(info, swaplist);
if (!info->swapped)
list_del_init(&info->swaplist);
if (atomic_dec_and_test(&info->stop_eviction))
wake_up_var(&info->stop_eviction);
if (error)
break;
}
mutex_unlock(&shmem_swaplist_mutex);

if (prev_inode)
iput(prev_inode);

return error;
}

Expand Down Expand Up @@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
Expand Down
Loading

0 comments on commit 3ecafda

Please sign in to comment.