Skip to content

Commit

Permalink
vmscan: per memory cgroup slab shrinkers
Browse files Browse the repository at this point in the history
This patch adds SHRINKER_MEMCG_AWARE flag.  If a shrinker has this flag
set, it will be called per memory cgroup.  The memory cgroup to scan
objects from is passed in shrink_control->memcg.  If the memory cgroup
is NULL, a memcg aware shrinker is supposed to scan objects from the
global list.  Unaware shrinkers are only called on global pressure with
memcg=NULL.

Signed-off-by: Vladimir Davydov <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Greg Thelen <[email protected]>
Cc: Glauber Costa <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Tejun Heo <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Vladimir Davydov authored and torvalds committed Feb 13, 2015
1 parent 4101b62 commit cb731d6
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 50 deletions.
14 changes: 0 additions & 14 deletions fs/drop_caches.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iput(toput_inode);
}

static void drop_slab(void)
{
int nr_objects;

do {
int nid;

nr_objects = 0;
for_each_online_node(nid)
nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
1000, 1000);
} while (nr_objects > 10);
}

int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
Expand Down
7 changes: 7 additions & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void)
return static_key_false(&memcg_kmem_enabled_key);
}

bool memcg_kmem_is_active(struct mem_cgroup *memcg);

/*
* In general, we'll do everything in our power to not incur in any overhead
* for non-memcg users for the kmem functions. Not even a function call, if we
Expand Down Expand Up @@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void)
return false;
}

static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
return false;
}

static inline bool
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
{
Expand Down
5 changes: 2 additions & 3 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
#endif

unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
unsigned long nr_scanned,
unsigned long nr_eligible);
void drop_slab(void);
void drop_slab_node(int nid);

#ifndef CONFIG_MMU
#define randomize_va_space 0
Expand Down
6 changes: 5 additions & 1 deletion include/linux/shrinker.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct shrink_control {

/* current node being shrunk (for NUMA aware shrinkers) */
int nid;

/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
};

#define SHRINK_STOP (~0UL)
Expand Down Expand Up @@ -61,7 +64,8 @@ struct shrinker {
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */

/* Flags */
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)

extern int register_shrinker(struct shrinker *);
extern void unregister_shrinker(struct shrinker *);
Expand Down
2 changes: 1 addition & 1 deletion mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ struct mem_cgroup {
};

#ifdef CONFIG_MEMCG_KMEM
static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
return memcg->kmemcg_id >= 0;
}
Expand Down
11 changes: 2 additions & 9 deletions mm/memory-failure.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,8 @@ void shake_page(struct page *p, int access)
* Only call shrink_node_slabs here (which would also shrink
* other caches) if access is not potentially fatal.
*/
if (access) {
int nr;
int nid = page_to_nid(p);
do {
nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
if (page_count(p) == 1)
break;
} while (nr > 10);
}
if (access)
drop_slab_node(page_to_nid(p));
}
EXPORT_SYMBOL_GPL(shake_page);

Expand Down
85 changes: 63 additions & 22 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);

#define SHRINK_BATCH 128

static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
struct shrinker *shrinker,
unsigned long nr_scanned,
unsigned long nr_eligible)
static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
struct shrinker *shrinker,
unsigned long nr_scanned,
unsigned long nr_eligible)
{
unsigned long freed = 0;
unsigned long long delta;
Expand Down Expand Up @@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
}

/**
* shrink_node_slabs - shrink slab caches of a given node
* shrink_slab - shrink slab caches
* @gfp_mask: allocation context
* @nid: node whose slab caches to target
* @memcg: memory cgroup whose slab caches to target
* @nr_scanned: pressure numerator
* @nr_eligible: pressure denominator
*
Expand All @@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
* unaware shrinkers will receive a node id of 0 instead.
*
* @memcg specifies the memory cgroup to target. If it is not NULL,
* only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
* objects from the memory cgroup specified. Otherwise all shrinkers
* are called, and memcg aware shrinkers are supposed to scan the
* global list then.
*
* @nr_scanned and @nr_eligible form a ratio that indicate how much of
* the available objects should be scanned. Page reclaim for example
* passes the number of pages scanned and the number of pages on the
Expand All @@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
*
* Returns the number of reclaimed slab objects.
*/
unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
unsigned long nr_scanned,
unsigned long nr_eligible)
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg,
unsigned long nr_scanned,
unsigned long nr_eligible)
{
struct shrinker *shrinker;
unsigned long freed = 0;

if (memcg && !memcg_kmem_is_active(memcg))
return 0;

if (nr_scanned == 0)
nr_scanned = SWAP_CLUSTER_MAX;

Expand All @@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
};

if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
continue;

if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
sc.nid = 0;

freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
}

up_read(&shrinker_rwsem);
Expand All @@ -404,6 +419,29 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
return freed;
}

void drop_slab_node(int nid)
{
unsigned long freed;

do {
struct mem_cgroup *memcg = NULL;

freed = 0;
do {
freed += shrink_slab(GFP_KERNEL, nid, memcg,
1000, 1000);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
} while (freed > 10);
}

void drop_slab(void)
{
int nid;

for_each_online_node(nid)
drop_slab_node(nid);
}

static inline int is_page_cache_freeable(struct page *page)
{
/*
Expand Down Expand Up @@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
static bool shrink_zone(struct zone *zone, struct scan_control *sc,
bool is_classzone)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false;

Expand All @@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
unsigned long lru_pages;
unsigned long scanned;
struct lruvec *lruvec;
int swappiness;

Expand All @@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,

lruvec = mem_cgroup_zone_lruvec(zone, memcg);
swappiness = mem_cgroup_swappiness(memcg);
scanned = sc->nr_scanned;

shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
zone_lru_pages += lru_pages;

if (memcg && is_classzone)
shrink_slab(sc->gfp_mask, zone_to_nid(zone),
memcg, sc->nr_scanned - scanned,
lru_pages);

/*
* Direct reclaim and kswapd have to scan all memory
* cgroups to fulfill the overall scan target for the
Expand All @@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
* Shrink the slab caches in the same proportion that
* the eligible LRU pages were scanned.
*/
if (global_reclaim(sc) && is_classzone) {
struct reclaim_state *reclaim_state;

shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
sc->nr_scanned - nr_scanned,
zone_lru_pages);

reclaim_state = current->reclaim_state;
if (reclaim_state) {
sc->nr_reclaimed +=
reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
if (global_reclaim(sc) && is_classzone)
shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
sc->nr_scanned - nr_scanned,
zone_lru_pages);

if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}

vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
Expand Down

0 comments on commit cb731d6

Please sign in to comment.