Skip to content

Commit

Permalink
mm: memcg: make sure memory.events is uptodate when waking pollers
Browse files Browse the repository at this point in the history
Commit a983b5e ("mm: memcontrol: fix excessive complexity in
memory.stat reporting") added per-cpu drift to all memory cgroup stats
and events shown in memory.stat and memory.events.

For memory.stat this is acceptable.  But memory.events issues file
notifications, and somebody polling the file for changes will be
confused when the counters in it are unchanged after a wakeup.

Luckily, the events in memory.events - MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX,
MEMCG_OOM - are sufficiently rare and high-level that we don't need
per-cpu buffering for them: MEMCG_HIGH and MEMCG_MAX would be the most
frequent, but they're counting invocations of reclaim, which is a
complex operation that touches many shared cachelines.

This splits memory.events from the generic VM events and tracks them in
their own, unbuffered atomic counters.  That's also cleaner, as it
eliminates the ugly enum nesting of VM and cgroup events.

[[email protected]: "array subscript is above array bounds"]
  Link: http://lkml.kernel.org/r/[email protected]
Link: http://lkml.kernel.org/r/[email protected]
Fixes: a983b5e ("mm: memcontrol: fix excessive complexity in memory.stat reporting")
Signed-off-by: Johannes Weiner <[email protected]>
Reported-by: Tejun Heo <[email protected]>
Acked-by: Tejun Heo <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Roman Gushchin <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Stephen Rothwell <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
hnaz authored and torvalds committed Apr 11, 2018
1 parent a38c015 commit e27be24
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 30 deletions.
35 changes: 18 additions & 17 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,12 @@ enum memcg_stat_item {
MEMCG_NR_STAT,
};

/* Cgroup-specific events, on top of universal VM events */
enum memcg_event_item {
MEMCG_LOW = NR_VM_EVENT_ITEMS,
enum memcg_memory_event {
MEMCG_LOW,
MEMCG_HIGH,
MEMCG_MAX,
MEMCG_OOM,
MEMCG_NR_EVENTS,
MEMCG_NR_MEMORY_EVENTS,
};

struct mem_cgroup_reclaim_cookie {
Expand Down Expand Up @@ -88,7 +87,7 @@ enum mem_cgroup_events_target {

struct mem_cgroup_stat_cpu {
long count[MEMCG_NR_STAT];
unsigned long events[MEMCG_NR_EVENTS];
unsigned long events[NR_VM_EVENT_ITEMS];
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
};
Expand Down Expand Up @@ -205,7 +204,8 @@ struct mem_cgroup {
/* OOM-Killer disable */
int oom_kill_disable;

/* handle for "memory.events" */
/* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
struct cgroup_file events_file;

/* protect arrays of thresholds */
Expand Down Expand Up @@ -234,9 +234,10 @@ struct mem_cgroup {
struct task_struct *move_lock_task;
unsigned long move_lock_flags;

/* memory.stat */
struct mem_cgroup_stat_cpu __percpu *stat_cpu;
atomic_long_t stat[MEMCG_NR_STAT];
atomic_long_t events[MEMCG_NR_EVENTS];
atomic_long_t events[NR_VM_EVENT_ITEMS];

unsigned long socket_pressure;

Expand Down Expand Up @@ -648,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);

/* idx can be of type enum memcg_event_item or vm_event_item */
static inline void __count_memcg_events(struct mem_cgroup *memcg,
int idx, unsigned long count)
enum vm_event_item idx,
unsigned long count)
{
unsigned long x;

Expand All @@ -666,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
}

static inline void count_memcg_events(struct mem_cgroup *memcg,
int idx, unsigned long count)
enum vm_event_item idx,
unsigned long count)
{
unsigned long flags;

Expand All @@ -675,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
local_irq_restore(flags);
}

/* idx can be of type enum memcg_event_item or vm_event_item */
static inline void count_memcg_page_event(struct page *page,
int idx)
enum vm_event_item idx)
{
if (page->mem_cgroup)
count_memcg_events(page->mem_cgroup, idx, 1);
Expand All @@ -701,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_unlock();
}

static inline void mem_cgroup_event(struct mem_cgroup *memcg,
enum memcg_event_item event)
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
count_memcg_events(memcg, event, 1);
atomic_long_inc(&memcg->memory_events[event]);
cgroup_file_notify(&memcg->events_file);
}

Expand All @@ -724,8 +725,8 @@ static inline bool mem_cgroup_disabled(void)
return true;
}

static inline void mem_cgroup_event(struct mem_cgroup *memcg,
enum memcg_event_item event)
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
}

Expand Down
28 changes: 16 additions & 12 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
}
}

for (i = 0; i < MEMCG_NR_EVENTS; i++) {
for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
long x;

x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
Expand All @@ -1858,7 +1858,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
do {
if (page_counter_read(&memcg->memory) <= memcg->high)
continue;
mem_cgroup_event(memcg, MEMCG_HIGH);
memcg_memory_event(memcg, MEMCG_HIGH);
try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
} while ((memcg = parent_mem_cgroup(memcg)));
}
Expand Down Expand Up @@ -1949,7 +1949,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (!gfpflags_allow_blocking(gfp_mask))
goto nomem;

mem_cgroup_event(mem_over_limit, MEMCG_MAX);
memcg_memory_event(mem_over_limit, MEMCG_MAX);

nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap);
Expand Down Expand Up @@ -1992,7 +1992,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
if (fatal_signal_pending(current))
goto force;

mem_cgroup_event(mem_over_limit, MEMCG_OOM);
memcg_memory_event(mem_over_limit, MEMCG_OOM);

mem_cgroup_oom(mem_over_limit, gfp_mask,
get_order(nr_pages * PAGE_SIZE));
Expand Down Expand Up @@ -2688,10 +2688,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
struct mem_cgroup *iter;
int i;

memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);

for_each_mem_cgroup_tree(iter, memcg) {
for (i = 0; i < MEMCG_NR_EVENTS; i++)
for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
events[i] += memcg_sum_events(iter, i);
}
}
Expand Down Expand Up @@ -5178,7 +5178,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
continue;
}

mem_cgroup_event(memcg, MEMCG_OOM);
memcg_memory_event(memcg, MEMCG_OOM);
if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
break;
}
Expand All @@ -5191,10 +5191,14 @@ static int memory_events_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));

seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
seq_printf(m, "low %lu\n",
atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
seq_printf(m, "high %lu\n",
atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
seq_printf(m, "max %lu\n",
atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
seq_printf(m, "oom %lu\n",
atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));

return 0;
Expand All @@ -5204,7 +5208,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
unsigned long stat[MEMCG_NR_STAT];
unsigned long events[MEMCG_NR_EVENTS];
unsigned long events[NR_VM_EVENT_ITEMS];
int i;

/*
Expand Down
2 changes: 1 addition & 1 deletion mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2530,7 +2530,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
sc->memcg_low_skipped = 1;
continue;
}
mem_cgroup_event(memcg, MEMCG_LOW);
memcg_memory_event(memcg, MEMCG_LOW);
}

reclaimed = sc->nr_reclaimed;
Expand Down

0 comments on commit e27be24

Please sign in to comment.