Skip to content

Commit

Permalink
mm, slab: faster active and free stats
Browse files Browse the repository at this point in the history
Reading /proc/slabinfo or monitoring slabtop(1) can become very
expensive if there are many slab caches and if there are very lengthy
per-node partial and/or free lists.

Commit 07a63c4 ("mm/slab: improve performance of gathering slabinfo
stats") addressed the per-node full lists which showed a significant
improvement when no objects were freed.  This patch has the same
motivation and optimizes the remainder of the usecases where there are
very lengthy partial and free lists.

This patch maintains per-node active_slabs (full and partial) and
free_slabs rather than iterating the lists at runtime when reading
/proc/slabinfo.

When allocating 100GB of slab from a test cache where every slab page is
on the partial list, reading /proc/slabinfo (includes all other slab
caches on the system) takes ~247ms on average with 48 samples.

As a result of this patch, the same read takes ~0.856ms on average.

[[email protected]: changelog]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Greg Thelen <[email protected]>
Signed-off-by: David Rientjes <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gthelen authored and torvalds committed Dec 13, 2016
1 parent e70954f commit f728b0a
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 71 deletions.
117 changes: 47 additions & 70 deletions mm/slab.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,14 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
parent->active_slabs = 0;
parent->free_slabs = 0;
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
parent->num_slabs = 0;
}

#define MAKE_LIST(cachep, listp, slab, nodeid) \
Expand Down Expand Up @@ -1366,7 +1367,6 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
{
#if DEBUG
struct kmem_cache_node *n;
struct page *page;
unsigned long flags;
int node;
static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
Expand All @@ -1381,32 +1381,20 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
cachep->name, cachep->size, cachep->gfporder);

for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
unsigned long num_slabs_partial = 0, num_slabs_free = 0;
unsigned long num_slabs_full;
unsigned long active_objs = 0, free_objs = 0;
unsigned long active_slabs, num_slabs;

spin_lock_irqsave(&n->list_lock, flags);
num_slabs = n->num_slabs;
list_for_each_entry(page, &n->slabs_partial, lru) {
active_objs += page->active;
num_slabs_partial++;
}
list_for_each_entry(page, &n->slabs_free, lru)
num_slabs_free++;
active_slabs = n->active_slabs;
num_slabs = active_slabs + n->free_slabs;

free_objects += n->free_objects;
active_objs += (num_slabs * cachep->num) - n->free_objects;
free_objs += n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);

num_objs = num_slabs * cachep->num;
active_slabs = num_slabs - num_slabs_free;
num_slabs_full = num_slabs -
(num_slabs_partial + num_slabs_free);
active_objs += (num_slabs_full * cachep->num);

pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
node, active_slabs, num_slabs, active_objs, num_objs,
free_objects);
node, active_slabs, num_slabs, active_objs,
num_slabs * cachep->num, free_objs);
}
#endif
}
Expand Down Expand Up @@ -2318,7 +2306,7 @@ static int drain_freelist(struct kmem_cache *cache,

page = list_entry(p, struct page, lru);
list_del(&page->lru);
n->num_slabs--;
n->free_slabs--;
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
Expand Down Expand Up @@ -2753,12 +2741,14 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
n = get_node(cachep, page_to_nid(page));

spin_lock(&n->list_lock);
if (!page->active)
if (!page->active) {
list_add_tail(&page->lru, &(n->slabs_free));
else
n->free_slabs++;
} else {
fixup_slab_list(cachep, n, page, &list);
n->active_slabs++;
}

n->num_slabs++;
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
spin_unlock(&n->list_lock);
Expand Down Expand Up @@ -2884,7 +2874,7 @@ static inline void fixup_slab_list(struct kmem_cache *cachep,

/* Try to find non-pfmemalloc slab if needed */
static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
struct page *page, bool pfmemalloc)
struct page *page, bool *page_is_free, bool pfmemalloc)
{
if (!page)
return NULL;
Expand All @@ -2903,19 +2893,24 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,

/* Move pfmemalloc slab to the end of list to speed up next search */
list_del(&page->lru);
if (!page->active)
if (*page_is_free) {
WARN_ON(page->active);
list_add_tail(&page->lru, &n->slabs_free);
else
*page_is_free = false;
} else
list_add_tail(&page->lru, &n->slabs_partial);

list_for_each_entry(page, &n->slabs_partial, lru) {
if (!PageSlabPfmemalloc(page))
return page;
}

n->free_touched = 1;
list_for_each_entry(page, &n->slabs_free, lru) {
if (!PageSlabPfmemalloc(page))
if (!PageSlabPfmemalloc(page)) {
*page_is_free = true;
return page;
}
}

return NULL;
Expand All @@ -2924,17 +2919,26 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
struct page *page;
bool page_is_free = false;

assert_spin_locked(&n->list_lock);
page = list_first_entry_or_null(&n->slabs_partial,
struct page, lru);
if (!page) {
n->free_touched = 1;
page = list_first_entry_or_null(&n->slabs_free,
struct page, lru);
if (page)
page_is_free = true;
}

if (sk_memalloc_socks())
return get_valid_first_slab(n, page, pfmemalloc);
page = get_valid_first_slab(n, page, &page_is_free, pfmemalloc);

if (page && page_is_free) {
n->active_slabs++;
n->free_slabs--;
}

return page;
}
Expand Down Expand Up @@ -3434,9 +3438,11 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
STATS_DEC_ACTIVE(cachep);

/* fixup slab chains */
if (page->active == 0)
if (page->active == 0) {
list_add(&page->lru, &n->slabs_free);
else {
n->free_slabs++;
n->active_slabs--;
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
Expand All @@ -3450,7 +3456,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,

page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
n->num_slabs--;
n->free_slabs--;
}
}

Expand Down Expand Up @@ -4102,59 +4108,30 @@ static void cache_reap(struct work_struct *w)
#ifdef CONFIG_SLABINFO
void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
{
struct page *page;
unsigned long active_objs;
unsigned long num_objs;
unsigned long active_slabs = 0;
unsigned long num_slabs, free_objects = 0, shared_avail = 0;
unsigned long num_slabs_partial = 0, num_slabs_free = 0;
unsigned long num_slabs_full = 0;
const char *name;
char *error = NULL;
unsigned long active_objs, num_objs, active_slabs;
unsigned long num_slabs = 0, free_objs = 0, shared_avail = 0;
unsigned long num_slabs_free = 0;
int node;
struct kmem_cache_node *n;

active_objs = 0;
num_slabs = 0;
for_each_kmem_cache_node(cachep, node, n) {

check_irq_on();
spin_lock_irq(&n->list_lock);

num_slabs += n->num_slabs;
num_slabs += n->active_slabs + n->free_slabs;
num_slabs_free += n->free_slabs;

list_for_each_entry(page, &n->slabs_partial, lru) {
if (page->active == cachep->num && !error)
error = "slabs_partial accounting error";
if (!page->active && !error)
error = "slabs_partial accounting error";
active_objs += page->active;
num_slabs_partial++;
}
free_objs += n->free_objects;

list_for_each_entry(page, &n->slabs_free, lru) {
if (page->active && !error)
error = "slabs_free accounting error";
num_slabs_free++;
}

free_objects += n->free_objects;
if (n->shared)
shared_avail += n->shared->avail;

spin_unlock_irq(&n->list_lock);
}
num_objs = num_slabs * cachep->num;
active_slabs = num_slabs - num_slabs_free;
num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
active_objs += (num_slabs_full * cachep->num);

if (num_objs - active_objs != free_objects && !error)
error = "free_objects accounting error";

name = cachep->name;
if (error)
pr_err("slab: cache %s error: %s\n", name, error);
active_objs = num_objs - free_objs;

sinfo->active_objs = active_objs;
sinfo->num_objs = num_objs;
Expand Down
3 changes: 2 additions & 1 deletion mm/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,8 @@ struct kmem_cache_node {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long num_slabs;
unsigned long active_slabs; /* length of slabs_partial+slabs_full */
unsigned long free_slabs; /* length of slabs_free */
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
Expand Down

0 comments on commit f728b0a

Please sign in to comment.