Skip to content

Commit

Permalink
mm: convert zone_reclaim to node_reclaim
Browse files Browse the repository at this point in the history
As reclaim is now per-node based, convert zone_reclaim to be
node_reclaim.  It is possible that a node will be reclaimed multiple
times if it has multiple zones but this is unavoidable without caching
all nodes traversed so far.  The documentation and interface to
userspace is the same from a configuration perspective and will will be
similar in behaviour unless the node-local allocation requests were also
limited to lower zones.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Hillf Danton <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Rik van Riel <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
gormanm authored and torvalds committed Jul 28, 2016
1 parent 52e9f87 commit a5f5f91
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 69 deletions.
18 changes: 9 additions & 9 deletions include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,14 +372,6 @@ struct zone {
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_NUMA
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif /* CONFIG_NUMA */

/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;

Expand Down Expand Up @@ -525,7 +517,6 @@ struct zone {
} ____cacheline_internodealigned_in_smp;

enum zone_flags {
ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
};

Expand All @@ -540,6 +531,7 @@ enum pgdat_flags {
PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
};

static inline unsigned long zone_end_pfn(const struct zone *zone)
Expand Down Expand Up @@ -688,6 +680,14 @@ typedef struct pglist_data {
*/
unsigned long totalreserve_pages;

#ifdef CONFIG_NUMA
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif /* CONFIG_NUMA */

/* Write-intensive fields used by page reclaim */
ZONE_PADDING(_pad1_)
spinlock_t lru_lock;
Expand Down
9 changes: 5 additions & 4 deletions include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long vm_total_pages;

#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
#else
#define zone_reclaim_mode 0
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
#define node_reclaim_mode 0
static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
unsigned int order)
{
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ int arch_update_cpu_topology(void);
/*
* If the distance between nodes in a system is larger than RECLAIM_DISTANCE
* (in whatever arch specific measurement units returned by node_distance())
* and zone_reclaim_mode is enabled then the VM will only call zone_reclaim()
* and node_reclaim_mode is enabled then the VM will only call node_reclaim()
* on nodes within this distance.
*/
#define RECLAIM_DISTANCE 30
Expand Down
4 changes: 2 additions & 2 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1508,8 +1508,8 @@ static struct ctl_table vm_table[] = {
#ifdef CONFIG_NUMA
{
.procname = "zone_reclaim_mode",
.data = &zone_reclaim_mode,
.maxlen = sizeof(zone_reclaim_mode),
.data = &node_reclaim_mode,
.maxlen = sizeof(node_reclaim_mode),
.mode = 0644,
.proc_handler = proc_dointvec,
.extra1 = &zero,
Expand Down
8 changes: 4 additions & 4 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,10 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
}
#endif /* CONFIG_SPARSEMEM */

#define ZONE_RECLAIM_NOSCAN -2
#define ZONE_RECLAIM_FULL -1
#define ZONE_RECLAIM_SOME 0
#define ZONE_RECLAIM_SUCCESS 1
#define NODE_RECLAIM_NOSCAN -2
#define NODE_RECLAIM_FULL -1
#define NODE_RECLAIM_SOME 0
#define NODE_RECLAIM_SUCCESS 1

extern int hwpoison_filter(struct page *p);

Expand Down
4 changes: 2 additions & 2 deletions mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,10 +672,10 @@ static bool khugepaged_scan_abort(int nid)
int i;

/*
* If zone_reclaim_mode is disabled, then no extra effort is made to
* If node_reclaim_mode is disabled, then no extra effort is made to
* allocate memory locally.
*/
if (!zone_reclaim_mode)
if (!node_reclaim_mode)
return false;

/* If there is a count for this node already, it must be acceptable */
Expand Down
24 changes: 16 additions & 8 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2942,16 +2942,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;

if (zone_reclaim_mode == 0 ||
if (node_reclaim_mode == 0 ||
!zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
continue;

ret = zone_reclaim(zone, gfp_mask, order);
ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
switch (ret) {
case ZONE_RECLAIM_NOSCAN:
case NODE_RECLAIM_NOSCAN:
/* did not scan */
continue;
case ZONE_RECLAIM_FULL:
case NODE_RECLAIM_FULL:
/* scanned but unreclaimable */
continue;
default:
Expand Down Expand Up @@ -5948,9 +5948,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
/ 100;
zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
zone->zone_pgdat = pgdat;
Expand Down Expand Up @@ -6922,31 +6922,39 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct pglist_data *pgdat;
struct zone *zone;
int rc;

rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;

for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;

for_each_zone(zone)
zone->min_unmapped_pages = (zone->managed_pages *
zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
sysctl_min_unmapped_ratio) / 100;
return 0;
}

int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct pglist_data *pgdat;
struct zone *zone;
int rc;

rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;

for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;

for_each_zone(zone)
zone->min_slab_pages = (zone->managed_pages *
zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
sysctl_min_slab_ratio) / 100;
return 0;
}
Expand Down
77 changes: 38 additions & 39 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -3565,27 +3565,27 @@ module_init(kswapd_init)

#ifdef CONFIG_NUMA
/*
* Zone reclaim mode
* Node reclaim mode
*
* If non-zero call zone_reclaim when the number of free pages falls below
* If non-zero call node_reclaim when the number of free pages falls below
* the watermarks.
*/
int zone_reclaim_mode __read_mostly;
int node_reclaim_mode __read_mostly;

#define RECLAIM_OFF 0
#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */

/*
* Priority for ZONE_RECLAIM. This determines the fraction of pages
* Priority for NODE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone.
*/
#define ZONE_RECLAIM_PRIORITY 4
#define NODE_RECLAIM_PRIORITY 4

/*
* Percentage of pages in a zone that must be unmapped for zone_reclaim to
* Percentage of pages in a zone that must be unmapped for node_reclaim to
* occur.
*/
int sysctl_min_unmapped_ratio = 1;
Expand All @@ -3611,7 +3611,7 @@ static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat)
}

/* Work out how many page cache pages we can reclaim in this reclaim_mode */
static unsigned long zone_pagecache_reclaimable(struct zone *zone)
static unsigned long node_pagecache_reclaimable(struct pglist_data *pgdat)
{
unsigned long nr_pagecache_reclaimable;
unsigned long delta = 0;
Expand All @@ -3622,14 +3622,14 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone)
* pages like swapcache and node_unmapped_file_pages() provides
* a better estimate
*/
if (zone_reclaim_mode & RECLAIM_UNMAP)
nr_pagecache_reclaimable = node_page_state(zone->zone_pgdat, NR_FILE_PAGES);
if (node_reclaim_mode & RECLAIM_UNMAP)
nr_pagecache_reclaimable = node_page_state(pgdat, NR_FILE_PAGES);
else
nr_pagecache_reclaimable = node_unmapped_file_pages(zone->zone_pgdat);
nr_pagecache_reclaimable = node_unmapped_file_pages(pgdat);

/* If we can't clean pages, remove dirty pages from consideration */
if (!(zone_reclaim_mode & RECLAIM_WRITE))
delta += node_page_state(zone->zone_pgdat, NR_FILE_DIRTY);
if (!(node_reclaim_mode & RECLAIM_WRITE))
delta += node_page_state(pgdat, NR_FILE_DIRTY);

/* Watch for any possible underflows due to delta */
if (unlikely(delta > nr_pagecache_reclaimable))
Expand All @@ -3639,23 +3639,24 @@ static unsigned long zone_pagecache_reclaimable(struct zone *zone)
}

/*
* Try to free up some pages from this zone through reclaim.
* Try to free up some pages from this node through reclaim.
*/
static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
/* Minimum pages needed in order to stay on node */
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
int classzone_idx = gfp_zone(gfp_mask);
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
.priority = ZONE_RECLAIM_PRIORITY,
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP),
.priority = NODE_RECLAIM_PRIORITY,
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
.reclaim_idx = zone_idx(zone),
.reclaim_idx = classzone_idx,
};

cond_resched();
Expand All @@ -3669,13 +3670,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;

if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
/*
* Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed.
*/
do {
shrink_node(zone->zone_pgdat, &sc, zone_idx(zone));
shrink_node(pgdat, &sc, classzone_idx);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}

Expand All @@ -3685,49 +3686,47 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
return sc.nr_reclaimed >= nr_pages;
}

int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
int node_id;
int ret;

/*
* Zone reclaim reclaims unmapped file backed pages and
* Node reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
* thrown out if the zone is overallocated. So we do not reclaim
* if less than a specified percentage of the zone is used by
* thrown out if the node is overallocated. So we do not reclaim
* if less than a specified percentage of the node is used by
* unmapped file backed pages.
*/
if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
return ZONE_RECLAIM_FULL;
if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL;

if (!pgdat_reclaimable(zone->zone_pgdat))
return ZONE_RECLAIM_FULL;
if (!pgdat_reclaimable(pgdat))
return NODE_RECLAIM_FULL;

/*
* Do not scan if the allocation should not be delayed.
*/
if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
return ZONE_RECLAIM_NOSCAN;
return NODE_RECLAIM_NOSCAN;

/*
* Only run zone reclaim on the local zone or on zones that do not
* Only run node reclaim on the local node or on nodes that do not
* have associated processors. This will favor the local processor
* over remote processors and spread off node memory allocations
* as wide as possible.
*/
node_id = zone_to_nid(zone);
if (node_state(node_id, N_CPU) && node_id != numa_node_id())
return ZONE_RECLAIM_NOSCAN;
if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id())
return NODE_RECLAIM_NOSCAN;

if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags))
return ZONE_RECLAIM_NOSCAN;
if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags))
return NODE_RECLAIM_NOSCAN;

ret = __zone_reclaim(zone, gfp_mask, order);
clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
ret = __node_reclaim(pgdat, gfp_mask, order);
clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);

if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
Expand Down

0 comments on commit a5f5f91

Please sign in to comment.