Skip to content

Commit

Permalink
mm/lru: introduce TestClearPageLRU()
Browse files Browse the repository at this point in the history
Currently lru_lock still guards both lru list and page's lru bit, that's
ok.  but if we want to use specific lruvec lock on the page, we need to
pin down the page's lruvec/memcg during locking.  Just taking lruvec lock
first may be undermined by the page's memcg charge/migration.  To fix this
problem, we will clear the lru bit out of locking and use it as pin down
action to block the page isolation in memcg changing.

So now a standard steps of page isolation is following:
	1, get_page(); 	       #pin the page avoid to be free
	2, TestClearPageLRU(); #block other isolation like memcg change
	3, spin_lock on lru_lock; #serialize lru list access
	4, delete page from lru list;

This patch start with the first part: TestClearPageLRU, which combines
PageLRU check and ClearPageLRU into a macro func TestClearPageLRU.  This
function will be used as page isolation precondition to prevent other
isolations some where else.  Then there are may !PageLRU page on lru list,
need to remove BUG() checking accordingly.

There 2 rules for lru bit now:
1, the lru bit still indicate if a page on lru list, just in some
   temporary moment(isolating), the page may have no lru bit when
   it's on lru list.  but the page still must be on lru list when the
   lru bit set.
2, have to remove lru bit before delete it from lru list.

As Andrew Morton mentioned this change would dirty cacheline for a page
which isn't on the LRU.  But the loss would be acceptable in Rong Chen
<[email protected]> report:
https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/

Link: https://lkml.kernel.org/r/[email protected]
Suggested-by: Johannes Weiner <[email protected]>
Signed-off-by: Alex Shi <[email protected]>
Acked-by: Hugh Dickins <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: Daniel Jordan <[email protected]>
Cc: "Huang, Ying" <[email protected]>
Cc: Jann Horn <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Konstantin Khlebnikov <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Mika Penttilä <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Wei Yang <[email protected]>
Cc: Yang Shi <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
alexshi authored and torvalds committed Dec 15, 2020
1 parent 13805a8 commit d25b5bd
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 22 deletions.
1 change: 1 addition & 0 deletions include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD)
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
TESTCLEARFLAG(LRU, lru, PF_HEAD)
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
TESTCLEARFLAG(Active, active, PF_HEAD)
PAGEFLAG(Workingset, workingset, PF_HEAD)
Expand Down
3 changes: 1 addition & 2 deletions mm/mlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,9 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
* We already have pin from follow_page_mask()
* so we can spare the get_page() here.
*/
if (PageLRU(page)) {
if (TestClearPageLRU(page)) {
struct lruvec *lruvec;

ClearPageLRU(page);
lruvec = mem_cgroup_page_lruvec(page,
page_pgdat(page));
del_page_from_lru_list(page, lruvec,
Expand Down
39 changes: 19 additions & 20 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1541,7 +1541,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
int ret = -EINVAL;
int ret = -EBUSY;

/* Only take pages on the LRU. */
if (!PageLRU(page))
Expand All @@ -1551,8 +1551,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
return ret;

ret = -EBUSY;

/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
Expand Down Expand Up @@ -1599,8 +1597,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
ClearPageLRU(page);
ret = 0;
if (TestClearPageLRU(page))
ret = 0;
else
put_page(page);
}

return ret;
Expand Down Expand Up @@ -1666,8 +1666,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);

VM_BUG_ON_PAGE(!PageLRU(page), page);

nr_pages = compound_nr(page);
total_scan += nr_pages;

Expand Down Expand Up @@ -1764,21 +1762,18 @@ int isolate_lru_page(struct page *page)
VM_BUG_ON_PAGE(!page_count(page), page);
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");

if (PageLRU(page)) {
if (TestClearPageLRU(page)) {
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;

spin_lock_irq(&pgdat->lru_lock);
get_page(page);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (PageLRU(page)) {
int lru = page_lru(page);
get_page(page);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, lru);
ret = 0;
}
spin_lock_irq(&pgdat->lru_lock);
del_page_from_lru_list(page, lruvec, page_lru(page));
spin_unlock_irq(&pgdat->lru_lock);
ret = 0;
}

return ret;
}

Expand Down Expand Up @@ -4289,6 +4284,10 @@ void check_move_unevictable_pages(struct pagevec *pvec)
nr_pages = thp_nr_pages(page);
pgscanned += nr_pages;

/* block memcg migration during page moving between lru */
if (!TestClearPageLRU(page))
continue;

if (pagepgdat != pgdat) {
if (pgdat)
spin_unlock_irq(&pgdat->lru_lock);
Expand All @@ -4297,10 +4296,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
}
lruvec = mem_cgroup_page_lruvec(page, pgdat);

if (!PageLRU(page) || !PageUnevictable(page))
continue;

if (page_evictable(page)) {
if (page_evictable(page) && PageUnevictable(page)) {
enum lru_list lru = page_lru_base_type(page);

VM_BUG_ON_PAGE(PageActive(page), page);
Expand All @@ -4309,12 +4305,15 @@ void check_move_unevictable_pages(struct pagevec *pvec)
add_page_to_lru_list(page, lruvec, lru);
pgrescued += nr_pages;
}
SetPageLRU(page);
}

if (pgdat) {
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
spin_unlock_irq(&pgdat->lru_lock);
} else if (pgscanned) {
count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
}
}
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);

0 comments on commit d25b5bd

Please sign in to comment.