Skip to content

Commit

Permalink
Unevictable LRU Infrastructure
Browse files Browse the repository at this point in the history
When the system contains lots of mlocked or otherwise unevictable pages,
the pageout code (kswapd) can spend lots of time scanning over these
pages.  Worse still, the presence of lots of unevictable pages can confuse
kswapd into thinking that more aggressive pageout modes are required,
resulting in all kinds of bad behaviour.

Infrastructure to manage pages excluded from reclaim--i.e., hidden from
vmscan.  Based on a patch by Larry Woodman of Red Hat.  Reworked to
maintain "unevictable" pages on a separate per-zone LRU list, to "hide"
them from vmscan.

Kosaki Motohiro added the support for the memory controller unevictable
lru list.

Pages on the unevictable list have both PG_unevictable and PG_lru set.
Thus, PG_unevictable is analogous to and mutually exclusive with
PG_active--it specifies which LRU list the page is on.

The unevictable infrastructure is enabled by a new mm Kconfig option
[CONFIG_]UNEVICTABLE_LRU.

A new function 'page_evictable(page, vma)' in vmscan.c tests whether or
not a page may be evictable.  Subsequent patches will add the various
!evictable tests.  We'll want to keep these tests light-weight for use in
shrink_active_list() and, possibly, the fault path.

To avoid races between tasks putting pages [back] onto an LRU list and
tasks that might be moving the page from non-evictable to evictable state,
the new function 'putback_lru_page()' -- inverse to 'isolate_lru_page()'
-- tests the "evictability" of a page after placing it on the LRU, before
dropping the reference.  If the page has become unevictable,
putback_lru_page() will redo the 'putback', thus moving the page to the
unevictable list.  This way, we avoid "stranding" evictable pages on the
unevictable list.

[[email protected]: fix fallout from out-of-order merge]
[[email protected]: fix UNEVICTABLE_LRU and !PROC_PAGE_MONITOR build]
[[email protected]: remove redundant mapping check]
[[email protected]: unevictable-lru-infrastructure: putback_lru_page()/unevictable page handling rework]
[[email protected]: kill unnecessary lock_page() in vmscan.c]
[[email protected]: revert migration change of unevictable lru infrastructure]
[[email protected]: revert to unevictable-lru-infrastructure-kconfig-fix.patch]
[[email protected]: restore patch failure of vmstat-unevictable-and-mlocked-pages-vm-events.patch]
Signed-off-by: Lee Schermerhorn <[email protected]>
Signed-off-by: Rik van Riel <[email protected]>
Signed-off-by: KOSAKI Motohiro <[email protected]>
Debugged-by: Benjamin Kidwell <[email protected]>
Signed-off-by: Daisuke Nishimura <[email protected]>
Signed-off-by: KAMEZAWA Hiroyuki <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Lee Schermerhorn authored and torvalds committed Oct 20, 2008
1 parent 8a7a854 commit 894bc31
Show file tree
Hide file tree
Showing 13 changed files with 345 additions and 73 deletions.
2 changes: 1 addition & 1 deletion include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
extern void mem_cgroup_uncharge_page(struct page *page);
extern void mem_cgroup_uncharge_cache_page(struct page *page);
extern void mem_cgroup_move_lists(struct page *page, bool active);
extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);

extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
Expand Down
23 changes: 16 additions & 7 deletions include/linux/mm_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,16 @@ del_page_from_lru(struct zone *zone, struct page *page)
enum lru_list l = LRU_BASE;

list_del(&page->lru);
if (PageActive(page)) {
__ClearPageActive(page);
l += LRU_ACTIVE;
if (PageUnevictable(page)) {
__ClearPageUnevictable(page);
l = LRU_UNEVICTABLE;
} else {
if (PageActive(page)) {
__ClearPageActive(page);
l += LRU_ACTIVE;
}
l += page_is_file_cache(page);
}
l += page_is_file_cache(page);
__dec_zone_state(zone, NR_LRU_BASE + l);
}

Expand All @@ -110,9 +115,13 @@ static inline enum lru_list page_lru(struct page *page)
{
enum lru_list lru = LRU_BASE;

if (PageActive(page))
lru += LRU_ACTIVE;
lru += page_is_file_cache(page);
if (PageUnevictable(page))
lru = LRU_UNEVICTABLE;
else {
if (PageActive(page))
lru += LRU_ACTIVE;
lru += page_is_file_cache(page);
}

return lru;
}
Expand Down
24 changes: 23 additions & 1 deletion include/linux/mmzone.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ enum zone_stat_item {
NR_ACTIVE_ANON, /* " " " " " */
NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */
#ifdef CONFIG_UNEVICTABLE_LRU
NR_UNEVICTABLE, /* " " " " " */
#else
NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
#endif
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
Expand Down Expand Up @@ -128,10 +133,18 @@ enum lru_list {
LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
NR_LRU_LISTS };
#ifdef CONFIG_UNEVICTABLE_LRU
LRU_UNEVICTABLE,
#else
LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
#endif
NR_LRU_LISTS
};

#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)

#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)

static inline int is_file_lru(enum lru_list l)
{
return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
Expand All @@ -142,6 +155,15 @@ static inline int is_active_lru(enum lru_list l)
return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
}

static inline int is_unevictable_lru(enum lru_list l)
{
#ifdef CONFIG_UNEVICTABLE_LRU
return (l == LRU_UNEVICTABLE);
#else
return 0;
#endif
}

struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
Expand Down
22 changes: 21 additions & 1 deletion include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ enum pageflags {
PG_reclaim, /* To be reclaimed asap */
PG_buddy, /* Page is free, on buddy lists */
PG_swapbacked, /* Page is backed by RAM/swap */
#ifdef CONFIG_UNEVICTABLE_LRU
PG_unevictable, /* Page is "unevictable" */
#endif
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
PG_uncached, /* Page has been mapped as uncached */
#endif
Expand Down Expand Up @@ -182,6 +185,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
TESTCLEARFLAG(Active, active)
__PAGEFLAG(Slab, slab)
PAGEFLAG(Checked, checked) /* Used by some filesystems */
PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
Expand Down Expand Up @@ -225,6 +229,15 @@ PAGEFLAG(SwapCache, swapcache)
PAGEFLAG_FALSE(SwapCache)
#endif

#ifdef CONFIG_UNEVICTABLE_LRU
PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
TESTCLEARFLAG(Unevictable, unevictable)
#else
PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
__CLEARPAGEFLAG_NOOP(Unevictable)
#endif

#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
PAGEFLAG(Uncached, uncached)
#else
Expand Down Expand Up @@ -340,9 +353,16 @@ static inline void __ClearPageTail(struct page *page)

#endif /* !PAGEFLAGS_EXTENDED */

#ifdef CONFIG_UNEVICTABLE_LRU
#define __PG_UNEVICTABLE (1 << PG_unevictable)
#else
#define __PG_UNEVICTABLE 0
#endif

#define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
1 << PG_buddy | 1 << PG_writeback | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
__PG_UNEVICTABLE)

/*
* Flags checked in bad_page(). Pages on the free list should not have
Expand Down
1 change: 0 additions & 1 deletion include/linux/pagevec.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
}


static inline void pagevec_lru_add_file(struct pagevec *pvec)
{
if (pagevec_count(pvec))
Expand Down
12 changes: 12 additions & 0 deletions include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ extern int lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page);
extern void swap_setup(void);

extern void add_page_to_unevictable_list(struct page *page);

/**
* lru_cache_add: add a page to the page lists
* @page: the page to add
Expand Down Expand Up @@ -228,6 +230,16 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
}
#endif

#ifdef CONFIG_UNEVICTABLE_LRU
extern int page_evictable(struct page *page, struct vm_area_struct *vma);
#else
static inline int page_evictable(struct page *page,
struct vm_area_struct *vma)
{
return 1;
}
#endif

extern int kswapd_run(int nid);

#ifdef CONFIG_MMU
Expand Down
11 changes: 11 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -209,5 +209,16 @@ config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS

config UNEVICTABLE_LRU
bool "Add LRU list to track non-evictable pages"
default y
depends on MMU
help
Keeps unevictable pages off of the active and inactive pageout
lists, so kswapd will not waste CPU time or have its balancing
algorithms thrown off by scanning these pages. Selecting this
will use one page flag and increase the code size a little,
say Y unless you know what you are doing.

config MMU_NOTIFIER
bool
26 changes: 26 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,15 @@ static inline void __put_page(struct page *page)
atomic_dec(&page->_count);
}

/*
* in mm/vmscan.c:
*/
extern int isolate_lru_page(struct page *page);
extern void putback_lru_page(struct page *page);

/*
* in mm/page_alloc.c
*/
extern void __free_pages_bootmem(struct page *page, unsigned int order);

/*
Expand All @@ -54,6 +61,25 @@ static inline unsigned long page_order(struct page *page)
return page_private(page);
}

#ifdef CONFIG_UNEVICTABLE_LRU
/*
* unevictable_migrate_page() called only from migrate_page_copy() to
* migrate unevictable flag to new page.
* Note that the old page has been isolated from the LRU lists at this
* point so we don't need to worry about LRU statistics.
*/
static inline void unevictable_migrate_page(struct page *new, struct page *old)
{
if (TestClearPageUnevictable(old))
SetPageUnevictable(new);
}
#else
static inline void unevictable_migrate_page(struct page *new, struct page *old)
{
}
#endif


/*
* FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
* so all functions starting at paging_init should be marked __init
Expand Down
73 changes: 45 additions & 28 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,10 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
int flags;
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
#define PAGE_CGROUP_FLAG_UNEVICTABLE (0x8) /* page is unevictableable */

static int page_cgroup_nid(struct page_cgroup *pc)
{
Expand Down Expand Up @@ -292,10 +293,14 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
{
int lru = LRU_BASE;

if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
lru += LRU_ACTIVE;
if (pc->flags & PAGE_CGROUP_FLAG_FILE)
lru += LRU_FILE;
if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
lru = LRU_UNEVICTABLE;
else {
if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
lru += LRU_ACTIVE;
if (pc->flags & PAGE_CGROUP_FLAG_FILE)
lru += LRU_FILE;
}

MEM_CGROUP_ZSTAT(mz, lru) -= 1;

Expand All @@ -308,32 +313,46 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
{
int lru = LRU_BASE;

if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
lru += LRU_ACTIVE;
if (pc->flags & PAGE_CGROUP_FLAG_FILE)
lru += LRU_FILE;
if (pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE)
lru = LRU_UNEVICTABLE;
else {
if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
lru += LRU_ACTIVE;
if (pc->flags & PAGE_CGROUP_FLAG_FILE)
lru += LRU_FILE;
}

MEM_CGROUP_ZSTAT(mz, lru) += 1;
list_add(&pc->lru, &mz->lists[lru]);

mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
}

static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
{
struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
int lru = LRU_FILE * !!file + !!from;
int active = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
int unevictable = pc->flags & PAGE_CGROUP_FLAG_UNEVICTABLE;
enum lru_list from = unevictable ? LRU_UNEVICTABLE :
(LRU_FILE * !!file + !!active);

MEM_CGROUP_ZSTAT(mz, lru) -= 1;
if (lru == from)
return;

if (active)
pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
else
MEM_CGROUP_ZSTAT(mz, from) -= 1;

if (is_unevictable_lru(lru)) {
pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
pc->flags |= PAGE_CGROUP_FLAG_UNEVICTABLE;
} else {
if (is_active_lru(lru))
pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
else
pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
pc->flags &= ~PAGE_CGROUP_FLAG_UNEVICTABLE;
}

lru = LRU_FILE * !!file + !!active;
MEM_CGROUP_ZSTAT(mz, lru) += 1;
list_move(&pc->lru, &mz->lists[lru]);
}
Expand All @@ -351,7 +370,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
/*
* This routine assumes that the appropriate zone's lru lock is already held
*/
void mem_cgroup_move_lists(struct page *page, bool active)
void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
{
struct page_cgroup *pc;
struct mem_cgroup_per_zone *mz;
Expand All @@ -374,7 +393,7 @@ void mem_cgroup_move_lists(struct page *page, bool active)
if (pc) {
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_move_lists(pc, active);
__mem_cgroup_move_lists(pc, lru);
spin_unlock_irqrestore(&mz->lru_lock, flags);
}
unlock_page_cgroup(page);
Expand Down Expand Up @@ -472,12 +491,10 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
/*
* TODO: play better with lumpy reclaim, grabbing anything.
*/
if (PageActive(page) && !active) {
__mem_cgroup_move_lists(pc, true);
continue;
}
if (!PageActive(page) && active) {
__mem_cgroup_move_lists(pc, false);
if (PageUnevictable(page) ||
(PageActive(page) && !active) ||
(!PageActive(page) && active)) {
__mem_cgroup_move_lists(pc, page_lru(page));
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -2202,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
if (PageSwapCache(page))
md->swapcache++;

if (PageActive(page))
if (PageActive(page) || PageUnevictable(page))
md->active++;

if (PageWriteback(page))
Expand Down
Loading

0 comments on commit 894bc31

Please sign in to comment.