Skip to content

Commit

Permalink
mm: adjust vm_committed_as_batch according to vm overcommit policy
Browse files Browse the repository at this point in the history
When checking a performance change for will-it-scale scalability mmap test
[1], we found very high lock contention for spinlock of percpu counter
'vm_committed_as':

    94.14%     0.35%  [kernel.kallsyms]         [k] _raw_spin_lock_irqsave
    48.21% _raw_spin_lock_irqsave;percpu_counter_add_batch;__vm_enough_memory;mmap_region;do_mmap;
    45.91% _raw_spin_lock_irqsave;percpu_counter_add_batch;__do_munmap;

Actually this heavy lock contention is not always necessary.  The
'vm_committed_as' needs to be very precise when the strict
OVERCOMMIT_NEVER policy is set, which requires a rather small batch number
for the percpu counter.

So keep 'batch' number unchanged for strict OVERCOMMIT_NEVER policy, and
lift it to 64X for OVERCOMMIT_ALWAYS and OVERCOMMIT_GUESS policies.  Also
add a sysctl handler to adjust it when the policy is reconfigured.

Benchmark with the same testcase in [1] shows 53% improvement on a 8C/16T
desktop, and 2097%(20X) on a 4S/72C/144T server.  We tested with test
platforms in 0day (server, desktop and laptop), and 80%+ platforms shows
improvements with that test.  And whether it shows improvements depends on
if the test mmap size is bigger than the batch number computed.

And if the lift is 16X, 1/3 of the platforms will show improvements,
though it should help the mmap/unmap usage generally, as Michal Hocko
mentioned:

: I believe that there are non-synthetic worklaods which would benefit from
: a larger batch.  E.g.  large in memory databases which do large mmaps
: during startups from multiple threads.

[1] https://lore.kernel.org/lkml/20200305062138.GI5972@shao2-debian/

Signed-off-by: Feng Tang <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Qian Cai <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Dennis Zhou <[email protected]>
Cc: Haiyang Zhang <[email protected]>
Cc: kernel test robot <[email protected]>
Cc: "K. Y. Srinivasan" <[email protected]>
Cc: Tejun Heo <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
ftang1 authored and torvalds committed Aug 7, 2020
1 parent 0a4954a commit 56f3547
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 7 deletions.
2 changes: 2 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);

#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))

Expand Down
4 changes: 4 additions & 0 deletions include/linux/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@ extern struct percpu_counter vm_committed_as;

#ifdef CONFIG_SMP
extern s32 vm_committed_as_batch;
extern void mm_compute_batch(int overcommit_policy);
#else
#define vm_committed_as_batch 0
static inline void mm_compute_batch(int overcommit_policy)
{
}
#endif

unsigned long vm_memory_committed(void);
Expand Down
2 changes: 1 addition & 1 deletion kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2671,7 +2671,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_overcommit_memory,
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.proc_handler = overcommit_policy_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
Expand Down
22 changes: 16 additions & 6 deletions mm/mm_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/memory.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/mman.h>
#include "internal.h"

#ifdef CONFIG_DEBUG_MEMORY_INIT
Expand Down Expand Up @@ -144,14 +145,23 @@ EXPORT_SYMBOL_GPL(mm_kobj);
#ifdef CONFIG_SMP
s32 vm_committed_as_batch = 32;

static void __meminit mm_compute_batch(void)
void mm_compute_batch(int overcommit_policy)
{
u64 memsized_batch;
s32 nr = num_present_cpus();
s32 batch = max_t(s32, nr*2, 32);

/* batch size set to 0.4% of (total memory/#cpus), or max int32 */
memsized_batch = min_t(u64, (totalram_pages()/nr)/256, 0x7fffffff);
unsigned long ram_pages = totalram_pages();

/*
* For policy OVERCOMMIT_NEVER, set batch size to 0.4% of
* (total memory/#cpus), and lift it to 25% for other policies
* to easy the possible lock contention for percpu_counter
* vm_committed_as, while the max limit is INT_MAX
*/
if (overcommit_policy == OVERCOMMIT_NEVER)
memsized_batch = min_t(u64, ram_pages/nr/256, INT_MAX);
else
memsized_batch = min_t(u64, ram_pages/nr/4, INT_MAX);

vm_committed_as_batch = max_t(s32, memsized_batch, batch);
}
Expand All @@ -162,7 +172,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
mm_compute_batch();
mm_compute_batch(sysctl_overcommit_memory);
default:
break;
}
Expand All @@ -176,7 +186,7 @@ static struct notifier_block compute_batch_nb __meminitdata = {

static int __init mm_compute_batch_init(void)
{
mm_compute_batch();
mm_compute_batch(sysctl_overcommit_memory);
register_hotmemory_notifier(&compute_batch_nb);

return 0;
Expand Down
41 changes: 41 additions & 0 deletions mm/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,47 @@ int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
return ret;
}

static void sync_overcommit_as(struct work_struct *dummy)
{
percpu_counter_sync(&vm_committed_as);
}

int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
int new_policy;
int ret;

/*
* The deviation of sync_overcommit_as could be big with loose policy
* like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
* strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
* with the strict "NEVER", and to avoid possible race condtion (even
* though user usually won't too frequently do the switching to policy
* OVERCOMMIT_NEVER), the switch is done in the following order:
* 1. changing the batch
* 2. sync percpu count on each CPU
* 3. switch the policy
*/
if (write) {
t = *table;
t.data = &new_policy;
ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
if (ret)
return ret;

mm_compute_batch(new_policy);
if (new_policy == OVERCOMMIT_NEVER)
schedule_on_each_cpu(sync_overcommit_as);
sysctl_overcommit_memory = new_policy;
} else {
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}

return ret;
}

int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
Expand Down

0 comments on commit 56f3547

Please sign in to comment.