Skip to content

Commit

Permalink
Merge branch 'akpm' (patches from Andrew)
Browse files Browse the repository at this point in the history
Merge misc fixes from Andrew Morton:
 "13 fixes"

* emailed patches from Andrew Morton <[email protected]>:
  rbtree: include rcu.h
  scripts/faddr2line: fix error when addr2line output contains discriminator
  ocfs2: take inode cluster lock before moving reflinked inode from orphan dir
  mm, oom: fix concurrent munlock and oom reaper unmap, v3
  mm: migrate: fix double call of radix_tree_replace_slot()
  proc/kcore: don't bounds check against address 0
  mm: don't show nr_indirectly_reclaimable in /proc/vmstat
  mm: sections are not offlined during memory hotremove
  z3fold: fix reclaim lock-ups
  init: fix false positives in W+X checking
  lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit()
  KASAN: prohibit KASAN+STRUCTLEAK combination
  MAINTAINERS: update Shuah's email address
  • Loading branch information
torvalds committed May 12, 2018
2 parents 4bc8719 + 2075b16 commit f0ab773
Show file tree
Hide file tree
Showing 17 changed files with 164 additions and 87 deletions.
3 changes: 0 additions & 3 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c

CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <[email protected]>
M: Shuah Khan <[email protected]>
M: Shuah Khan <[email protected]>
L: [email protected]
S: Maintained
Expand Down Expand Up @@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/

KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <[email protected]>
M: Shuah Khan <[email protected]>
L: [email protected]
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
Expand Down Expand Up @@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c

USB OVER IP DRIVER
M: Valentina Manea <[email protected]>
M: Shuah Khan <[email protected]>
M: Shuah Khan <[email protected]>
L: [email protected]
S: Maintained
Expand Down
4 changes: 4 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS
# Currently STRUCTLEAK inserts initialization out of live scope of
# variables from KASAN point of view. This leads to KASAN false
# positive reports. Prohibit this combination for now.
depends on !KASAN_EXTRA
help
This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information
Expand Down
14 changes: 12 additions & 2 deletions fs/ocfs2/refcounttree.c
Original file line number Diff line number Diff line change
Expand Up @@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
int error;
int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
struct ocfs2_lock_holder oh;

if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
Expand Down Expand Up @@ -4295,21 +4296,30 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}

had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
&oh);
if (had_lock < 0) {
error = had_lock;
mlog_errno(error);
goto out;
}

/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
&new_dentry->d_name);
if (error)
mlog_errno(error);
}
out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);

out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
Expand Down
23 changes: 16 additions & 7 deletions fs/proc/kcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
struct page *p;

if (!pfn_valid(pfn))
return 1;

p = pfn_to_page(pfn);
if (!memmap_valid_within(pfn, p, page_zone(p)))
return 1;

ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;

/* Sanity check: Can happen in 32bit arch...maybe */
if (ent->addr < (unsigned long) __va(0))
if (!virt_addr_valid(ent->addr))
goto free_out;

/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;

/* cut when vmalloc() area is higher than direct-map area */
if (VMALLOC_START > (unsigned long)__va(0)) {
if (ent->addr > VMALLOC_START)
goto free_out;
/*
* We've already checked virt_addr_valid so we know this address
* is a valid pointer, therefore we can check against it to determine
* if we need to trim
*/
if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/oom.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0;
}

void __oom_reap_task_mm(struct mm_struct *mm);

extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
Expand Down
1 change: 1 addition & 0 deletions include/linux/rbtree_augmented.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/rcupdate.h>

/*
* Please note - only struct rb_augment_callbacks and the prototypes for
Expand Down
1 change: 1 addition & 0 deletions include/linux/rbtree_latch.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/rcupdate.h>

struct latch_tree_node {
struct rb_node node[2];
Expand Down
7 changes: 7 additions & 0 deletions init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
static void mark_readonly(void)
{
if (rodata_enabled) {
/*
* load_module() results in W+X mappings, which are cleaned up
* with call_rcu_sched(). Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
rcu_barrier_sched();
mark_rodata_ro();
rodata_test();
} else
Expand Down
5 changes: 5 additions & 0 deletions kernel/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
* Note that module_alloc() on most architectures creates W+X page
* mappings which won't be cleaned up until do_free_init() runs. Any
* code such as mark_rodata_ro() which depends on those mappings to
* be cleaned up needs to sync with the queued work - ie
* rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
Expand Down
7 changes: 6 additions & 1 deletion lib/find_bit_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
test_find_first_bit(bitmap, BITMAP_LEN);

/*
* test_find_first_bit() may take some time, so
* traverse only part of bitmap to avoid soft lockup.
*/
test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);

pr_err("\nStart testing find_bit() with sparse bitmap\n");
Expand Down
4 changes: 1 addition & 3 deletions mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
int i;
int index = page_index(page);

for (i = 0; i < HPAGE_PMD_NR; i++) {
for (i = 1; i < HPAGE_PMD_NR; i++) {
pslot = radix_tree_lookup_slot(&mapping->i_pages,
index + i);
radix_tree_replace_slot(&mapping->i_pages, pslot,
newpage + i);
}
} else {
radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
}

/*
Expand Down
44 changes: 26 additions & 18 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);

if (unlikely(mm_is_oom_victim(mm))) {
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
* this mm from further consideration. Taking mm->mmap_sem for
* write after setting MMF_OOM_SKIP will guarantee that the oom
* reaper will not run on this mm again after mmap_sem is
* dropped.
*
* Nothing can be holding mm->mmap_sem here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
* This needs to be done before calling munlock_vma_pages_all(),
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
mutex_lock(&oom_lock);
__oom_reap_task_mm(mm);
mutex_unlock(&oom_lock);

set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}

if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
Expand All @@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1);

if (unlikely(mm_is_oom_victim(mm))) {
/*
* Wait for oom_reap_task() to stop working on this
* mm. Because MMF_OOM_SKIP is already set before
* calling down_read(), oom_reap_task() will not run
* on this "mm" post up_write().
*
* mm_is_oom_victim() cannot be set from under us
* either because victim->mm is already set to NULL
* under task_lock before calling mmput and oom_mm is
* set not NULL by the OOM killer only if victim->mm
* is found not NULL while holding the task_lock.
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
up_write(&mm->mmap_sem);
}
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);

Expand Down
81 changes: 43 additions & 38 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
return false;
}


#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
Expand All @@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);

static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
void __oom_reap_task_mm(struct mm_struct *mm)
{
struct mmu_gather tlb;
struct vm_area_struct *vma;

/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);

for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;

/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
struct mmu_gather tlb;

tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
}

static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;

/*
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
* __oom_reap_task_mm exit_mm
* oom_reap_task_mm exit_mm
* mmget_not_zero
* mmput
* atomic_dec_and_test
Expand Down Expand Up @@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)

trace_start_task_reaping(tsk->pid);

/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);

for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
__oom_reap_task_mm(mm);

/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;

tlb_gather_mmu(&tlb, mm, start, end);
mmu_notifier_invalidate_range_start(mm, start, end);
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
Expand All @@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
struct mm_struct *mm = tsk->signal->oom_mm;

/* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);

if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;


pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
Expand Down
2 changes: 1 addition & 1 deletion mm/sparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;

for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(start_pfn);
unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms;

/*
Expand Down
Loading

0 comments on commit f0ab773

Please sign in to comment.