Skip to content

Commit

Permalink
mm, workingset: make working set detection node-aware
Browse files Browse the repository at this point in the history
Working set and refault detection is still zone-based, fix it.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Hillf Danton <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Rik van Riel <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gormanm authored and torvalds committed Jul 28, 2016
1 parent ef8f232 commit 1e6b108
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 44 deletions.
6 changes: 3 additions & 3 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ enum zone_stat_item {
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
WORKINGSET_REFAULT,
WORKINGSET_ACTIVATE,
WORKINGSET_NODERECLAIM,
NR_ANON_THPS,
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
Expand All @@ -164,6 +161,9 @@ enum node_stat_item {
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
NR_PAGES_SCANNED, /* pages scanned since last reclaim */
WORKINGSET_REFAULT,
WORKINGSET_ACTIVATE,
WORKINGSET_NODERECLAIM,
NR_VM_NODE_STAT_ITEMS
};

Expand Down
1 change: 0 additions & 1 deletion include/linux/vmstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
void inc_node_page_state(struct page *, enum node_stat_item);
void dec_node_page_state(struct page *, enum node_stat_item);

extern void inc_zone_state(struct zone *, enum zone_stat_item);
extern void inc_node_state(struct pglist_data *, enum node_stat_item);
extern void __inc_zone_state(struct zone *, enum zone_stat_item);
extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
Expand Down
20 changes: 3 additions & 17 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,11 +446,6 @@ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
}
EXPORT_SYMBOL(mod_zone_page_state);

void inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
mod_zone_state(zone, item, 1, 1);
}

void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
mod_zone_state(page_zone(page), item, 1, 1);
Expand Down Expand Up @@ -539,15 +534,6 @@ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
}
EXPORT_SYMBOL(mod_zone_page_state);

void inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
unsigned long flags;

local_irq_save(flags);
__inc_zone_state(zone, item);
local_irq_restore(flags);
}

void inc_zone_page_state(struct page *page, enum zone_stat_item item)
{
unsigned long flags;
Expand Down Expand Up @@ -967,9 +953,6 @@ const char * const vmstat_text[] = {
"numa_local",
"numa_other",
#endif
"workingset_refault",
"workingset_activate",
"workingset_nodereclaim",
"nr_anon_transparent_hugepages",
"nr_shmem_hugepages",
"nr_shmem_pmdmapped",
Expand All @@ -984,6 +967,9 @@ const char * const vmstat_text[] = {
"nr_isolated_anon",
"nr_isolated_file",
"nr_pages_scanned",
"workingset_refault",
"workingset_activate",
"workingset_nodereclaim",

/* enum writeback_stat_item counters */
"nr_dirty_threshold",
Expand Down
43 changes: 20 additions & 23 deletions mm/workingset.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
/*
* Double CLOCK lists
*
* Per zone, two clock lists are maintained for file pages: the
* Per node, two clock lists are maintained for file pages: the
* inactive and the active list. Freshly faulted pages start out at
* the head of the inactive list and page reclaim scans pages from the
* tail. Pages that are accessed multiple times on the inactive list
Expand Down Expand Up @@ -141,19 +141,19 @@
*
* Implementation
*
* For each zone's file LRU lists, a counter for inactive evictions
* and activations is maintained (zone->inactive_age).
* For each node's file LRU lists, a counter for inactive evictions
* and activations is maintained (node->inactive_age).
*
* On eviction, a snapshot of this counter (along with some bits to
* identify the zone) is stored in the now empty page cache radix tree
* identify the node) is stored in the now empty page cache radix tree
* slot of the evicted page. This is called a shadow entry.
*
* On cache misses for which there are shadow entries, an eligible
* refault distance will immediately activate the refaulting page.
*/

#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
ZONES_SHIFT + NODES_SHIFT + \
NODES_SHIFT + \
MEM_CGROUP_ID_SHIFT)
#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)

Expand All @@ -167,33 +167,30 @@
*/
static unsigned int bucket_order __read_mostly;

static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction)
static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
{
eviction >>= bucket_order;
eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone);
eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);

return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
}

static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
unsigned long *evictionp)
{
unsigned long entry = (unsigned long)shadow;
int memcgid, nid, zid;
int memcgid, nid;

entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
zid = entry & ((1UL << ZONES_SHIFT) - 1);
entry >>= ZONES_SHIFT;
nid = entry & ((1UL << NODES_SHIFT) - 1);
entry >>= NODES_SHIFT;
memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
entry >>= MEM_CGROUP_ID_SHIFT;

*memcgidp = memcgid;
*zonep = NODE_DATA(nid)->node_zones + zid;
*pgdat = NODE_DATA(nid);
*evictionp = entry << bucket_order;
}

Expand All @@ -208,7 +205,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
void *workingset_eviction(struct address_space *mapping, struct page *page)
{
struct mem_cgroup *memcg = page_memcg(page);
struct zone *zone = page_zone(page);
struct pglist_data *pgdat = page_pgdat(page);
int memcgid = mem_cgroup_id(memcg);
unsigned long eviction;
struct lruvec *lruvec;
Expand All @@ -218,17 +215,17 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);

lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
lruvec = mem_cgroup_lruvec(pgdat, memcg);
eviction = atomic_long_inc_return(&lruvec->inactive_age);
return pack_shadow(memcgid, zone, eviction);
return pack_shadow(memcgid, pgdat, eviction);
}

/**
* workingset_refault - evaluate the refault of a previously evicted page
* @shadow: shadow entry of the evicted page
*
* Calculates and evaluates the refault distance of the previously
* evicted page in the context of the zone it was allocated in.
* evicted page in the context of the node it was allocated in.
*
* Returns %true if the page should be activated, %false otherwise.
*/
Expand All @@ -240,10 +237,10 @@ bool workingset_refault(void *shadow)
unsigned long eviction;
struct lruvec *lruvec;
unsigned long refault;
struct zone *zone;
struct pglist_data *pgdat;
int memcgid;

unpack_shadow(shadow, &memcgid, &zone, &eviction);
unpack_shadow(shadow, &memcgid, &pgdat, &eviction);

rcu_read_lock();
/*
Expand All @@ -267,7 +264,7 @@ bool workingset_refault(void *shadow)
rcu_read_unlock();
return false;
}
lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
lruvec = mem_cgroup_lruvec(pgdat, memcg);
refault = atomic_long_read(&lruvec->inactive_age);
active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
rcu_read_unlock();
Expand All @@ -290,10 +287,10 @@ bool workingset_refault(void *shadow)
*/
refault_distance = (refault - eviction) & EVICTION_MASK;

inc_zone_state(zone, WORKINGSET_REFAULT);
inc_node_state(pgdat, WORKINGSET_REFAULT);

if (refault_distance <= active_file) {
inc_zone_state(zone, WORKINGSET_ACTIVATE);
inc_node_state(pgdat, WORKINGSET_ACTIVATE);
return true;
}
return false;
Expand Down Expand Up @@ -436,7 +433,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
}
}
BUG_ON(node->count);
inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM);
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();

Expand Down

0 comments on commit 1e6b108

Please sign in to comment.