Skip to content

Commit

Permalink
mm: fix Committed_AS underflow on large NR_CPUS environment
Browse files Browse the repository at this point in the history
The Committed_AS field can underflow in certain situations:

>         # while true; do cat /proc/meminfo  | grep _AS; sleep 1; done | uniq -c
>               1 Committed_AS: 18446744073709323392 kB
>              11 Committed_AS: 18446744073709455488 kB
>               6 Committed_AS:    35136 kB
>               5 Committed_AS: 18446744073709454400 kB
>               7 Committed_AS:    35904 kB
>               3 Committed_AS: 18446744073709453248 kB
>               2 Committed_AS:    34752 kB
>               9 Committed_AS: 18446744073709453248 kB
>               8 Committed_AS:    34752 kB
>               3 Committed_AS: 18446744073709320960 kB
>               7 Committed_AS: 18446744073709454080 kB
>               3 Committed_AS: 18446744073709320960 kB
>               5 Committed_AS: 18446744073709454080 kB
>               6 Committed_AS: 18446744073709320960 kB

Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
not check for underflow.

But NR_CPUS proportional isn't good calculation.  In general,
possibility of lock contention is proportional to the number of online
cpus, not theorical maximum cpus (NR_CPUS).

The current kernel has generic percpu-counter stuff.  using it is right
way.  it makes code simplify and percpu_counter_read_positive() don't
make underflow issue.

Reported-by: Dave Hansen <[email protected]>
Signed-off-by: KOSAKI Motohiro <[email protected]>
Cc: Eric B Munson <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: <[email protected]>		[All kernel versions]
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kosaki authored and torvalds committed May 2, 2009
1 parent 0763ed2 commit 00a62ce
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 65 deletions.
2 changes: 1 addition & 1 deletion fs/proc/meminfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#define K(x) ((x) << (PAGE_SHIFT - 10))
si_meminfo(&i);
si_swapinfo(&i);
committed = atomic_long_read(&vm_committed_space);
committed = percpu_counter_read_positive(&vm_committed_as);
allowed = ((totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100) + total_swap_pages;

Expand Down
9 changes: 3 additions & 6 deletions include/linux/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,18 @@

#ifdef __KERNEL__
#include <linux/mm.h>
#include <linux/percpu_counter.h>

#include <asm/atomic.h>

extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern atomic_long_t vm_committed_space;
extern struct percpu_counter vm_committed_as;

#ifdef CONFIG_SMP
extern void vm_acct_memory(long pages);
#else
static inline void vm_acct_memory(long pages)
{
atomic_long_add(pages, &vm_committed_space);
percpu_counter_add(&vm_committed_as, pages);
}
#endif

static inline void vm_unacct_memory(long pages)
{
Expand Down
12 changes: 6 additions & 6 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
struct percpu_counter vm_committed_as;

/*
* Check that a process has enough memory to allocate a new virtual
Expand Down Expand Up @@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
if (mm)
allowed -= mm->total_vm / 32;

/*
* cast `allowed' as a signed long because vm_committed_space
* sometimes has a negative value
*/
if (atomic_long_read(&vm_committed_space) < (long)allowed)
if (percpu_counter_read_positive(&vm_committed_as) < allowed)
return 0;
error:
vm_unacct_memory(pages);
Expand Down Expand Up @@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm)
*/
void __init mmap_init(void)
{
int ret;

ret = percpu_counter_init(&vm_committed_as, 0);
VM_BUG_ON(ret);
}
13 changes: 7 additions & 6 deletions mm/nommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void *high_memory;
struct page *mem_map;
unsigned long max_mapnr;
unsigned long num_physpages;
atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
struct percpu_counter vm_committed_as;
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
Expand Down Expand Up @@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
*/
void __init mmap_init(void)
{
int ret;

ret = percpu_counter_init(&vm_committed_as, 0);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
}

Expand Down Expand Up @@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
if (mm)
allowed -= mm->total_vm / 32;

/*
* cast `allowed' as a signed long because vm_committed_space
* sometimes has a negative value
*/
if (atomic_long_read(&vm_committed_space) < (long)allowed)
if (percpu_counter_read_positive(&vm_committed_as) < allowed)
return 0;

error:
vm_unacct_memory(pages);

Expand Down
46 changes: 0 additions & 46 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,

EXPORT_SYMBOL(pagevec_lookup_tag);

#ifdef CONFIG_SMP
/*
* We tolerate a little inaccuracy to avoid ping-ponging the counter between
* CPUs
*/
#define ACCT_THRESHOLD max(16, NR_CPUS * 2)

static DEFINE_PER_CPU(long, committed_space);

void vm_acct_memory(long pages)
{
long *local;

preempt_disable();
local = &__get_cpu_var(committed_space);
*local += pages;
if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
atomic_long_add(*local, &vm_committed_space);
*local = 0;
}
preempt_enable();
}

#ifdef CONFIG_HOTPLUG_CPU

/* Drop the CPU's cached committed space back into the central pool. */
static int cpu_swap_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
long *committed;

committed = &per_cpu(committed_space, (long)hcpu);
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
atomic_long_add(*committed, &vm_committed_space);
*committed = 0;
drain_cpu_pagevecs((long)hcpu);
}
return NOTIFY_OK;
}
#endif /* CONFIG_HOTPLUG_CPU */
#endif /* CONFIG_SMP */

/*
* Perform any setup for the swap system
*/
Expand All @@ -554,7 +511,4 @@ void __init swap_setup(void)
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
#ifdef CONFIG_HOTPLUG_CPU
hotcpu_notifier(cpu_swap_callback, 0);
#endif
}

0 comments on commit 00a62ce

Please sign in to comment.