Skip to content

Commit

Permalink
mm: thp: refactor NUMA fault handling
Browse files Browse the repository at this point in the history
When the THP NUMA fault support was added THP migration was not supported
yet.  So the ad hoc THP migration was implemented in NUMA fault handling.
Since v4.14 THP migration has been supported so it doesn't make too much
sense to still keep another THP migration implementation rather than using
the generic migration code.

This patch reworks the NUMA fault handling to use generic migration
implementation to migrate misplaced page.  There is no functional change.

After the refactor the flow of NUMA fault handling looks just like its
PTE counterpart:
  Acquire ptl
  Prepare for migration (elevate page refcount)
  Release ptl
  Isolate page from lru and elevate page refcount
  Migrate the misplaced THP

If migration fails just restore the old normal PMD.

In the old code anon_vma lock was needed to serialize THP migration
against THP split, but since then the THP code has been reworked a lot, it
seems anon_vma lock is not required anymore to avoid the race.

The page refcount elevation when holding ptl should prevent from THP
split.

Use migrate_misplaced_page() for both base page and THP NUMA hinting fault
and remove all the dead and duplicate code.

[[email protected]: fix a double unlock bug]
  Link: https://lkml.kernel.org/r/YLX8uYN01JmfLnlK@mwanda

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Yang Shi <[email protected]>
Signed-off-by: Dan Carpenter <[email protected]>
Acked-by: Mel Gorman <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Zi Yan <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
yang-shi authored and torvalds committed Jul 1, 2021
1 parent f4c0d83 commit c5b5a3d
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 286 deletions.
23 changes: 0 additions & 23 deletions include/linux/migrate.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,39 +99,16 @@ static inline void __ClearPageMovable(struct page *page)
#endif

#ifdef CONFIG_NUMA_BALANCING
extern bool pmd_trans_migrating(pmd_t pmd);
extern int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node);
#else
static inline bool pmd_trans_migrating(pmd_t pmd)
{
return false;
}
static inline int migrate_misplaced_page(struct page *page,
struct vm_area_struct *vma, int node)
{
return -EAGAIN; /* can't migrate now */
}
#endif /* CONFIG_NUMA_BALANCING */

#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
struct vm_area_struct *vma,
pmd_t *pmd, pmd_t entry,
unsigned long address,
struct page *page, int node);
#else
static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
struct vm_area_struct *vma,
pmd_t *pmd, pmd_t entry,
unsigned long address,
struct page *page, int node)
{
return -EAGAIN;
}
#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/


#ifdef CONFIG_MIGRATION

/*
Expand Down
145 changes: 44 additions & 101 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1423,92 +1423,20 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
pmd_t pmd = vmf->orig_pmd;
struct anon_vma *anon_vma = NULL;
pmd_t oldpmd = vmf->orig_pmd;
pmd_t pmd;
struct page *page;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = NUMA_NO_NODE, this_nid = numa_node_id();
int page_nid = NUMA_NO_NODE;
int target_nid, last_cpupid = -1;
bool page_locked;
bool migrated = false;
bool was_writable;
bool was_writable = pmd_savedwrite(oldpmd);
int flags = 0;

vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(pmd, *vmf->pmd)))
goto out_unlock;

/*
* If there are potential migrations, wait for completion and retry
* without disrupting NUMA hinting information. Do not relock and
* check_same as the page may no longer be mapped.
*/
if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
page = pmd_page(*vmf->pmd);
if (!get_page_unless_zero(page))
goto out_unlock;
spin_unlock(vmf->ptl);
put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
goto out;
}

page = pmd_page(pmd);
BUG_ON(is_huge_zero_page(page));
page_nid = page_to_nid(page);
last_cpupid = page_cpupid_last(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
if (page_nid == this_nid) {
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
flags |= TNF_FAULT_LOCAL;
}

/* See similar comment in do_numa_page for explanation */
if (!pmd_savedwrite(pmd))
flags |= TNF_NO_GROUP;

/*
* Acquire the page lock to serialise THP migrations but avoid dropping
* page_table_lock if at all possible
*/
page_locked = trylock_page(page);
target_nid = mpol_misplaced(page, vma, haddr);
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
page_nid = NUMA_NO_NODE;
if (!get_page_unless_zero(page))
goto out_unlock;
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
goto out;
} else if (target_nid == NUMA_NO_NODE) {
/* There are no parallel migrations and page is in the right
* node. Clear the numa hinting info in this pmd.
*/
goto clear_pmdnuma;
}

/*
* Page is misplaced. Page lock serialises migrations. Acquire anon_vma
* to serialises splits
*/
get_page(page);
spin_unlock(vmf->ptl);
anon_vma = page_lock_anon_vma_read(page);

/* Confirm the PMD did not change while page_table_lock was released */
spin_lock(vmf->ptl);
if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
unlock_page(page);
put_page(page);
page_nid = NUMA_NO_NODE;
goto out_unlock;
}

/* Bail if we fail to protect against THP splits for any reason */
if (unlikely(!anon_vma)) {
put_page(page);
page_nid = NUMA_NO_NODE;
goto clear_pmdnuma;
}

/*
Expand Down Expand Up @@ -1537,43 +1465,58 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
haddr + HPAGE_PMD_SIZE);
}

/*
* Migrate the THP to the requested node, returns with page unlocked
* and access rights restored.
*/
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
page = vm_normal_page_pmd(vma, haddr, pmd);
if (!page)
goto out_map;

/* See similar comment in do_numa_page for explanation */
if (!was_writable)
flags |= TNF_NO_GROUP;

page_nid = page_to_nid(page);
last_cpupid = page_cpupid_last(page);
target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
&flags);

if (target_nid == NUMA_NO_NODE) {
put_page(page);
goto out_map;
}

spin_unlock(vmf->ptl);

migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
vmf->pmd, pmd, vmf->address, page, target_nid);
migrated = migrate_misplaced_page(page, vma, target_nid);
if (migrated) {
flags |= TNF_MIGRATED;
page_nid = target_nid;
} else
} else {
flags |= TNF_MIGRATE_FAIL;

goto out;
clear_pmdnuma:
BUG_ON(!PageLocked(page));
was_writable = pmd_savedwrite(pmd);
pmd = pmd_modify(pmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (was_writable)
pmd = pmd_mkwrite(pmd);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
unlock_page(page);
out_unlock:
spin_unlock(vmf->ptl);
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {
spin_unlock(vmf->ptl);
goto out;
}
goto out_map;
}

out:
if (anon_vma)
page_unlock_anon_vma_read(anon_vma);

if (page_nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
flags);

return 0;

out_map:
/* Restore the PMD */
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (was_writable)
pmd = pmd_mkwrite(pmd);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
goto out;
}

/*
Expand Down
18 changes: 0 additions & 18 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,23 +369,6 @@ extern unsigned int munlock_vma_page(struct page *page);
*/
extern void clear_page_mlock(struct page *page);

/*
* mlock_migrate_page - called only from migrate_misplaced_transhuge_page()
* (because that does not go through the full procedure of migration ptes):
* to migrate the Mlocked page flag; update statistics.
*/
static inline void mlock_migrate_page(struct page *newpage, struct page *page)
{
if (TestClearPageMlocked(page)) {
int nr_pages = thp_nr_pages(page);

/* Holding pmd lock, no change in irq context: __mod is safe */
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
SetPageMlocked(newpage);
__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
}
}

extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
Expand Down Expand Up @@ -461,7 +444,6 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
#else /* !CONFIG_MMU */
static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page) { }
static inline void mlock_migrate_page(struct page *new, struct page *old) { }
static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
{
}
Expand Down
Loading

0 comments on commit c5b5a3d

Please sign in to comment.