Skip to content

Commit

Permalink
mm: memcontrol: use vmalloc fallback for large kmem memcg arrays
Browse files Browse the repository at this point in the history
For quick per-memcg indexing, slab caches and list_lru structures
maintain linear arrays of descriptors.  As the number of concurrent
memory cgroups in the system goes up, this requires large contiguous
allocations (8k cgroups = order-5, 16k cgroups = order-6 etc.) for every
existing slab cache and list_lru, which can easily fail on loaded
systems.  E.g.:

  mkdir: page allocation failure: order:5, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null)
  CPU: 1 PID: 6399 Comm: mkdir Not tainted 4.13.0-mm1-00065-g720bbe532b7c-dirty torvalds#481
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014
  Call Trace:
   ? __alloc_pages_direct_compact+0x4c/0x110
   __alloc_pages_nodemask+0xf50/0x1430
   alloc_pages_current+0x60/0xc0
   kmalloc_order_trace+0x29/0x1b0
   __kmalloc+0x1f4/0x320
   memcg_update_all_list_lrus+0xca/0x2e0
   mem_cgroup_css_alloc+0x612/0x670
   cgroup_apply_control_enable+0x19e/0x360
   cgroup_mkdir+0x322/0x490
   kernfs_iop_mkdir+0x55/0x80
   vfs_mkdir+0xd0/0x120
   SyS_mkdirat+0x6c/0xe0
   SyS_mkdir+0x14/0x20
   entry_SYSCALL_64_fastpath+0x18/0xad
  Mem-Info:
  active_anon:2965 inactive_anon:19 isolated_anon:0
   active_file:100270 inactive_file:98846 isolated_file:0
   unevictable:0 dirty:0 writeback:0 unstable:0
   slab_reclaimable:7328 slab_unreclaimable:16402
   mapped:771 shmem:52 pagetables:278 bounce:0
   free:13718 free_pcp:0 free_cma:0

This output is from an artificial reproducer, but we have repeatedly
observed order-7 failures in production in the Facebook fleet.  These
systems become useless as they cannot run more jobs, even though there
is plenty of memory to allocate 128 individual pages.

Use kvmalloc and kvzalloc to fall back to vmalloc space if these arrays
prove too large for allocating them physically contiguous.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Johannes Weiner <[email protected]>
Reviewed-by: Josef Bacik <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Acked-by: Vladimir Davydov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
hnaz authored and torvalds committed Oct 4, 2017
1 parent 3181c38 commit f80c7da
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 13 deletions.
12 changes: 6 additions & 6 deletions mm/list_lru.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
{
int size = memcg_nr_cache_ids;

nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
if (!nlru->memcg_lrus)
return -ENOMEM;

if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
kfree(nlru->memcg_lrus);
kvfree(nlru->memcg_lrus);
return -ENOMEM;
}

Expand All @@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
{
__memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
kfree(nlru->memcg_lrus);
kvfree(nlru->memcg_lrus);
}

static int memcg_update_list_lru_node(struct list_lru_node *nlru,
Expand All @@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
BUG_ON(old_size > new_size);

old = nlru->memcg_lrus;
new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
if (!new)
return -ENOMEM;

if (__memcg_init_list_lru_node(new, old_size, new_size)) {
kfree(new);
kvfree(new);
return -ENOMEM;
}

Expand All @@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
nlru->memcg_lrus = new;
spin_unlock_irq(&nlru->lock);

kfree(old);
kvfree(old);
return 0;
}

Expand Down
22 changes: 15 additions & 7 deletions mm/slab_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static int init_memcg_params(struct kmem_cache *s,
if (!memcg_nr_cache_ids)
return 0;

arr = kzalloc(sizeof(struct memcg_cache_array) +
memcg_nr_cache_ids * sizeof(void *),
GFP_KERNEL);
arr = kvzalloc(sizeof(struct memcg_cache_array) +
memcg_nr_cache_ids * sizeof(void *),
GFP_KERNEL);
if (!arr)
return -ENOMEM;

Expand All @@ -178,15 +178,23 @@ static int init_memcg_params(struct kmem_cache *s,
static void destroy_memcg_params(struct kmem_cache *s)
{
if (is_root_cache(s))
kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
}

static void free_memcg_params(struct rcu_head *rcu)
{
struct memcg_cache_array *old;

old = container_of(rcu, struct memcg_cache_array, rcu);
kvfree(old);
}

static int update_memcg_params(struct kmem_cache *s, int new_array_size)
{
struct memcg_cache_array *old, *new;

new = kzalloc(sizeof(struct memcg_cache_array) +
new_array_size * sizeof(void *), GFP_KERNEL);
new = kvzalloc(sizeof(struct memcg_cache_array) +
new_array_size * sizeof(void *), GFP_KERNEL);
if (!new)
return -ENOMEM;

Expand All @@ -198,7 +206,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)

rcu_assign_pointer(s->memcg_params.memcg_caches, new);
if (old)
kfree_rcu(old, rcu);
call_rcu(&old->rcu, free_memcg_params);
return 0;
}

Expand Down

0 comments on commit f80c7da

Please sign in to comment.