Skip to content

Commit

Permalink
percpu: rework memcg accounting
Browse files Browse the repository at this point in the history
The current implementation of the memcg accounting of the percpu
memory is based on the idea of having two separate sets of chunks for
accounted and non-accounted memory. This approach has an advantage
of not wasting any extra memory for memcg data for non-accounted
chunks, however it complicates the code and leads to a higher chunks
number due to a lower chunk utilization.

Instead of having two chunk types it's possible to declare all* chunks
memcg-aware unless the kernel memory accounting is disabled globally
by a boot option. The size of objcg_array is usually small in
comparison to chunks themselves (it obviously depends on the number of
CPUs), so even if some chunk will have no accounted allocations, the
memory waste isn't significant and will likely be compensated by
a higher chunk utilization. Also, with time more and more percpu
allocations will likely become accounted.

* The first chunk is initialized before the memory cgroup subsystem,
  so we don't know for sure whether we need to allocate obj_cgroups.
  Because it's small, let's make it free for use. Then we don't need
  to allocate obj_cgroups for it.

Signed-off-by: Roman Gushchin <[email protected]>
Signed-off-by: Dennis Zhou <[email protected]>
  • Loading branch information
rgushchin authored and dennisszhou committed Jun 5, 2021
1 parent 4d5c8ae commit faf65dd
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 172 deletions.
52 changes: 1 addition & 51 deletions mm/percpu-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,6 @@
#include <linux/types.h>
#include <linux/percpu.h>

/*
* There are two chunk types: root and memcg-aware.
* Chunks of each type have separate slots list.
*
* Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is
* used to store memcg membership data of a percpu object. Obj_cgroups are
* ref-counted pointers to a memory cgroup with an ability to switch dynamically
* to the parent memory cgroup. This allows to reclaim a deleted memory cgroup
* without reclaiming of all outstanding objects, which hold a reference at it.
*/
enum pcpu_chunk_type {
PCPU_CHUNK_ROOT,
#ifdef CONFIG_MEMCG_KMEM
PCPU_CHUNK_MEMCG,
#endif
PCPU_NR_CHUNK_TYPES,
PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES
};

/*
* pcpu_block_md is the metadata block struct.
* Each chunk's bitmap is split into a number of full blocks.
Expand Down Expand Up @@ -91,7 +72,7 @@ extern struct list_head *pcpu_chunk_lists;
extern int pcpu_nr_slots;
extern int pcpu_sidelined_slot;
extern int pcpu_to_depopulate_slot;
extern int pcpu_nr_empty_pop_pages[];
extern int pcpu_nr_empty_pop_pages;

extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
Expand Down Expand Up @@ -132,37 +113,6 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
}

#ifdef CONFIG_MEMCG_KMEM
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
{
if (chunk->obj_cgroups)
return PCPU_CHUNK_MEMCG;
return PCPU_CHUNK_ROOT;
}

static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
{
return chunk_type == PCPU_CHUNK_MEMCG;
}

#else
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
{
return PCPU_CHUNK_ROOT;
}

static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
{
return false;
}
#endif

static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type)
{
return &pcpu_chunk_lists[pcpu_nr_slots *
pcpu_is_memcg_chunk(chunk_type)];
}

#ifdef CONFIG_PERCPU_STATS

#include <linux/spinlock.h>
Expand Down
5 changes: 2 additions & 3 deletions mm/percpu-km.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,15 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
/* nada */
}

static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
gfp_t gfp)
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
struct pcpu_chunk *chunk;
struct page *pages;
unsigned long flags;
int i;

chunk = pcpu_alloc_chunk(type, gfp);
chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;

Expand Down
46 changes: 15 additions & 31 deletions mm/percpu-stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,11 @@ static int find_max_nr_alloc(void)
{
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
enum pcpu_chunk_type type;

max_nr_alloc = 0;
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
for (slot = 0; slot < pcpu_nr_slots; slot++)
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
list)
max_nr_alloc = max(max_nr_alloc,
chunk->nr_alloc);
for (slot = 0; slot < pcpu_nr_slots; slot++)
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list)
max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);

return max_nr_alloc;
}
Expand Down Expand Up @@ -133,9 +129,6 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
P("cur_min_alloc", cur_min_alloc);
P("cur_med_alloc", cur_med_alloc);
P("cur_max_alloc", cur_max_alloc);
#ifdef CONFIG_MEMCG_KMEM
P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)));
#endif
seq_putc(m, '\n');
}

Expand All @@ -144,8 +137,6 @@ static int percpu_stats_show(struct seq_file *m, void *v)
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;
int nr_empty_pop_pages;

alloc_buffer:
spin_lock_irq(&pcpu_lock);
Expand All @@ -166,10 +157,6 @@ static int percpu_stats_show(struct seq_file *m, void *v)
goto alloc_buffer;
}

nr_empty_pop_pages = 0;
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];

#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)

Expand Down Expand Up @@ -201,7 +188,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
P("empty_pop_pages", nr_empty_pop_pages);
P("empty_pop_pages", pcpu_nr_empty_pop_pages);
seq_putc(m, '\n');

#undef PU
Expand All @@ -215,20 +202,17 @@ static int percpu_stats_show(struct seq_file *m, void *v)
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
}

for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
for (slot = 0; slot < pcpu_nr_slots; slot++) {
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
list) {
if (chunk == pcpu_first_chunk)
seq_puts(m, "Chunk: <- First Chunk\n");
else if (slot == pcpu_to_depopulate_slot)
seq_puts(m, "Chunk (to_depopulate)\n");
else if (slot == pcpu_sidelined_slot)
seq_puts(m, "Chunk (sidelined):\n");
else
seq_puts(m, "Chunk:\n");
chunk_map_stats(m, chunk, buffer);
}
for (slot = 0; slot < pcpu_nr_slots; slot++) {
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
if (chunk == pcpu_first_chunk)
seq_puts(m, "Chunk: <- First Chunk\n");
else if (slot == pcpu_to_depopulate_slot)
seq_puts(m, "Chunk (to_depopulate)\n");
else if (slot == pcpu_sidelined_slot)
seq_puts(m, "Chunk (sidelined):\n");
else
seq_puts(m, "Chunk:\n");
chunk_map_stats(m, chunk, buffer);
}
}

Expand Down
11 changes: 5 additions & 6 deletions mm/percpu-vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
pcpu_free_pages(chunk, pages, page_start, page_end);
}

static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
gfp_t gfp)
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
struct pcpu_chunk *chunk;
struct vm_struct **vms;

chunk = pcpu_alloc_chunk(type, gfp);
chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;

Expand Down Expand Up @@ -403,7 +402,7 @@ static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk)
* chunk, move it to the to_depopulate list.
*/
return ((chunk->isolated && chunk->nr_empty_pop_pages) ||
(pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] >
PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages &&
chunk->nr_empty_pop_pages >= chunk->nr_pages / 4));
(pcpu_nr_empty_pop_pages >
(PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages) &&
chunk->nr_empty_pop_pages >= chunk->nr_pages / 4));
}
Loading

0 comments on commit faf65dd

Please sign in to comment.