Skip to content

Commit

Permalink
vmscan: stop kswapd waiting on congestion when the min watermark is n…
Browse files Browse the repository at this point in the history
…ot being met

If reclaim fails to make sufficient progress, the priority is raised.
Once the priority is higher, kswapd starts waiting on congestion.
However, if the zone is below the min watermark then kswapd needs to
continue working without delay as there is a danger of an increased rate
of GFP_ATOMIC allocation failure.

This patch changes the conditions under which kswapd waits on congestion
by only going to sleep if the min watermarks are being met.

[[email protected]: add stats to track how relevant the logic is]
[[email protected]: make kswapd only check its own zones and rename the relevant counters]
Signed-off-by: KOSAKI Motohiro <[email protected]>
Signed-off-by: Mel Gorman <[email protected]>
Reviewed-by: Rik van Riel <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kosaki authored and torvalds committed Dec 15, 2009
1 parent f50de2d commit bb3ab59
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 12 deletions.
3 changes: 2 additions & 1 deletion include/linux/vmstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW,
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
KSWAPD_SKIP_CONGESTION_WAIT,
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
Expand Down
38 changes: 29 additions & 9 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1905,19 +1905,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
#endif

/* is kswapd sleeping prematurely? */
static int sleeping_prematurely(int order, long remaining)
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{
struct zone *zone;
int i;

/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
if (remaining)
return 1;

/* If after HZ/10, a zone is below the high mark, it's premature */
for_each_populated_zone(zone)
for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;

if (!populated_zone(zone))
continue;

if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
0, 0))
return 1;
}

return 0;
}
Expand Down Expand Up @@ -1979,6 +1985,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long lru_pages = 0;
int has_under_min_watermark_zone = 0;

/* The swap token gets in the way of swapout... */
if (!priority)
Expand Down Expand Up @@ -2085,15 +2092,28 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
sc.may_writepage = 1;

/*
* We are still under min water mark. it mean we have
* GFP_ATOMIC allocation failure risk. Hurry up!
*/
if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
end_zone, 0))
has_under_min_watermark_zone = 1;

}
if (all_zones_ok)
break; /* kswapd: all done */
/*
* OK, kswapd is getting into trouble. Take a nap, then take
* another pass across the zones.
*/
if (total_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(BLK_RW_ASYNC, HZ/10);
if (total_scanned && (priority < DEF_PRIORITY - 2)) {
if (has_under_min_watermark_zone)
count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
else
congestion_wait(BLK_RW_ASYNC, HZ/10);
}

/*
* We do this so kswapd doesn't build up large priorities for
Expand Down Expand Up @@ -2207,7 +2227,7 @@ static int kswapd(void *p)
long remaining = 0;

/* Try to sleep for a short interval */
if (!sleeping_prematurely(order, remaining)) {
if (!sleeping_prematurely(pgdat, order, remaining)) {
remaining = schedule_timeout(HZ/10);
finish_wait(&pgdat->kswapd_wait, &wait);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
Expand All @@ -2218,13 +2238,13 @@ static int kswapd(void *p)
* premature sleep. If not, then go fully
* to sleep until explicitly woken up
*/
if (!sleeping_prematurely(order, remaining))
if (!sleeping_prematurely(pgdat, order, remaining))
schedule();
else {
if (remaining)
count_vm_event(KSWAPD_PREMATURE_FAST);
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
else
count_vm_event(KSWAPD_PREMATURE_SLOW);
count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
}
}

Expand Down
5 changes: 3 additions & 2 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,9 @@ static const char * const vmstat_text[] = {
"slabs_scanned",
"kswapd_steal",
"kswapd_inodesteal",
"kswapd_slept_prematurely_fast",
"kswapd_slept_prematurely_slow",
"kswapd_low_wmark_hit_quickly",
"kswapd_high_wmark_hit_quickly",
"kswapd_skip_congestion_wait",
"pageoutrun",
"allocstall",

Expand Down

0 comments on commit bb3ab59

Please sign in to comment.