From 83a8441f8d8e2e47e9bf2aead3aca625ab95d5ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Apr 2022 08:41:39 -0400 Subject: [PATCH 1/6] mm/huge_memory: Avoid calling pmd_page() on a non-leaf PMD Calling try_to_unmap() with TTU_SPLIT_HUGE_PMD and a folio that's not mapped by a PMD causes oopses on arm64 because we now call page_folio() on an invalid page. pmd_page() returns a valid page for non-leaf PMDs on some architectures, so this bug escaped testing before now. Fix this bug by delaying the call to pmd_page() until after we know the PMD is a leaf. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215804 Fixes: af28a988b313 ("mm/huge_memory: Convert __split_huge_pmd() to take a folio") Reported-by: Zorro Lang Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Zorro Lang --- mm/huge_memory.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2fe38212e07c66..c468fee595ffa4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2145,15 +2145,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * pmd against. Otherwise we can end up replacing wrong folio. */ VM_BUG_ON(freeze && !folio); - if (folio) { - VM_WARN_ON_ONCE(!folio_test_locked(folio)); - if (folio != page_folio(pmd_page(*pmd))) - goto out; - } + VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || - is_pmd_migration_entry(*pmd)) + is_pmd_migration_entry(*pmd)) { + if (folio && folio != page_folio(pmd_page(*pmd))) + goto out; __split_huge_pmd_locked(vma, pmd, range.start, freeze); + } out: spin_unlock(ptl); From ffe06786b54039edcecb51a54061ee8d81036a19 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 14:35:04 -0400 Subject: [PATCH 2/6] mm/migrate: Use a folio in alloc_migration_target() This removes an assumption that a large folio is HPAGE_PMD_ORDER as well as letting us remove the call to prep_transhuge_page() and a few hidden calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index de175e2fdba5d8..9894e90db0069b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1520,10 +1520,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, struct page *alloc_migration_target(struct page *page, unsigned long private) { + struct folio *folio = page_folio(page); struct migration_target_control *mtc; gfp_t gfp_mask; unsigned int order = 0; - struct page *new_page = NULL; + struct folio *new_folio = NULL; int nid; int zidx; @@ -1531,34 +1532,31 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) gfp_mask = mtc->gfp_mask; nid = mtc->nid; if (nid == NUMA_NO_NODE) - nid = page_to_nid(page); + nid = folio_nid(folio); - if (PageHuge(page)) { - struct hstate *h = page_hstate(compound_head(page)); + if (folio_test_hugetlb(folio)) { + struct hstate *h = page_hstate(&folio->page); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); } - if (PageTransHuge(page)) { + if (folio_test_large(folio)) { /* * clear __GFP_RECLAIM to make the migration callback * consistent with regular THP allocations. */ gfp_mask &= ~__GFP_RECLAIM; gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; + order = folio_order(folio); } - zidx = zone_idx(page_zone(page)); + zidx = zone_idx(folio_zone(folio)); if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) gfp_mask |= __GFP_HIGHMEM; - new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask); - - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); + new_folio = __folio_alloc(gfp_mask, order, nid, mtc->nmask); - return new_page; + return &new_folio->page; } #ifdef CONFIG_NUMA From c185e494ae0ceb126d89b8e3413ed0a1132e05d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 6 Jul 2021 10:50:39 -0400 Subject: [PATCH 3/6] mm/migrate: Use a folio in migrate_misplaced_transhuge_page() Unify alloc_misplaced_dst_page() and alloc_misplaced_dst_page_thp(). Removes an assumption that compound pages are HPAGE_PMD_ORDER. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/migrate.c | 58 +++++++++++++++------------------------------------- 1 file changed, 16 insertions(+), 42 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9894e90db0069b..6c31ee1e1c9b06 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1997,32 +1997,20 @@ static struct page *alloc_misplaced_dst_page(struct page *page, unsigned long data) { int nid = (int) data; - struct page *newpage; - - newpage = __alloc_pages_node(nid, - (GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE | __GFP_NOMEMALLOC | - __GFP_NORETRY | __GFP_NOWARN) & - ~__GFP_RECLAIM, 0); - - return newpage; -} - -static struct page *alloc_misplaced_dst_page_thp(struct page *page, - unsigned long data) -{ - int nid = (int) data; - struct page *newpage; - - newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), - HPAGE_PMD_ORDER); - if (!newpage) - goto out; - - prep_transhuge_page(newpage); + int order = compound_order(page); + gfp_t gfp = __GFP_THISNODE; + struct folio *new; + + if (order > 0) + gfp |= GFP_TRANSHUGE_LIGHT; + else { + gfp |= GFP_HIGHUSER_MOVABLE | __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN; + gfp &= ~__GFP_RECLAIM; + } + new = __folio_alloc_node(gfp, order, nid); -out: - return newpage; + return &new->page; } static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) @@ -2080,22 +2068,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - new_page_t *new; - bool compound; int nr_pages = thp_nr_pages(page); - /* - * PTE mapped THP or HugeTLB page can't reach here so the page could - * be either base page or THP. And it must be head page if it is - * THP. - */ - compound = PageTransHuge(page); - - if (compound) - new = alloc_misplaced_dst_page_thp; - else - new = alloc_misplaced_dst_page; - /* * Don't migrate file pages that are mapped in multiple processes * with execute permissions as they are probably shared libraries. @@ -2116,9 +2090,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, goto out; list_add(&page->lru, &migratepages); - nr_remaining = migrate_pages(&migratepages, *new, NULL, node, - MIGRATE_ASYNC, MR_NUMA_MISPLACED, - &nr_succeeded); + nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, + NULL, node, MIGRATE_ASYNC, + MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { list_del(&page->lru); From f584b68005ac782097d63a691740cb0dfed072ed Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:11:04 -0400 Subject: [PATCH 4/6] mm: Add vma_alloc_folio() This wrapper around alloc_pages_vma() calls prep_transhuge_page(), removing the obligation from the caller. This is in the same spirit as __folio_alloc(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- include/linux/gfp.h | 8 ++++++-- mm/mempolicy.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 761f8f1885c79e..3e3d36fc210982 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -613,9 +613,11 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); struct folio *folio_alloc(gfp_t gfp, unsigned order); -extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, +struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage); #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, true) #else @@ -627,8 +629,10 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define alloc_pages_vma(gfp_mask, order, vma, addr, false)\ +#define alloc_pages_vma(gfp_mask, order, vma, addr, hugepage) \ alloc_pages(gfp_mask, order) +#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \ + folio_alloc(gfp, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a2516d31db6ca8..ec15f4f4b7144a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2227,6 +2227,19 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, } EXPORT_SYMBOL(alloc_pages_vma); +struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, + unsigned long addr, bool hugepage) +{ + struct folio *folio; + + folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr, + hugepage); + if (folio && order > 1) + prep_transhuge_page(&folio->page); + + return folio; +} + /** * alloc_pages - Allocate pages. * @gfp: GFP flags. From ec4858e07ed62eceb60bac2ded3c0d6e2471c66b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Apr 2022 15:23:39 -0400 Subject: [PATCH 5/6] mm/mempolicy: Use vma_alloc_folio() in new_page() Simplify new_page() by unifying the THP and base page cases, and handle orders other than 0 and HPAGE_PMD_ORDER correctly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: William Kucharski --- mm/mempolicy.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ec15f4f4b7144a..649bd3be8682ac 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1191,8 +1191,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, */ static struct page *new_page(struct page *page, unsigned long start) { + struct folio *dst, *src = page_folio(page); struct vm_area_struct *vma; unsigned long address; + gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL; vma = find_vma(current->mm, start); while (vma) { @@ -1202,24 +1204,19 @@ static struct page *new_page(struct page *page, unsigned long start) vma = vma->vm_next; } - if (PageHuge(page)) { - return alloc_huge_page_vma(page_hstate(compound_head(page)), + if (folio_test_hugetlb(src)) + return alloc_huge_page_vma(page_hstate(&src->page), vma, address); - } else if (PageTransHuge(page)) { - struct page *thp; - thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } + if (folio_test_large(src)) + gfp = GFP_TRANSHUGE; + /* - * if !vma, alloc_page_vma() will use task or system default policy + * if !vma, vma_alloc_folio() will use task or system default policy */ - return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL, - vma, address); + dst = vma_alloc_folio(gfp, folio_order(src), vma, address, + folio_test_large(src)); + return &dst->page; } #else From 98ea02597b9967c0817d29fee2f96d21b9e59ca5 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 7 Apr 2022 14:40:08 +0800 Subject: [PATCH 6/6] mm/rmap: Fix handling of hugetlbfs pages in page_vma_mapped_walk page_mapped_in_vma() sets nr_pages to 1, which is usually correct as we only want to know about the precise page and not about other pages in the folio. However, hugetlbfs does want to know about the entire hpage, and using nr_pages to get the size of the hpage is wrong. We could change page_mapped_in_vma() to special-case hugetlbfs pages, but it's better to ignore nr_pages in page_vma_mapped_walk() and get the size from the VMA instead. Fixes: 2aff7a4755bed ("mm: Convert page_vma_mapped_walk to work on PFNs") Signed-off-by: zhenwei pi Reviewed-by: Muchun Song Signed-off-by: Matthew Wilcox (Oracle) [edit commit message, use hstate directly] --- mm/page_vma_mapped.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 1187f9c1ec5b10..14a5cda73dee62 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -163,7 +163,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); if (unlikely(is_vm_hugetlb_page(vma))) { - unsigned long size = pvmw->nr_pages * PAGE_SIZE; + struct hstate *hstate = hstate_vma(vma); + unsigned long size = huge_page_size(hstate); /* The only possible mapping was handled on last iteration */ if (pvmw->pte) return not_found(pvmw); @@ -173,8 +174,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!pvmw->pte) return false; - pvmw->ptl = huge_pte_lockptr(size_to_hstate(size), mm, - pvmw->pte); + pvmw->ptl = huge_pte_lockptr(hstate, mm, pvmw->pte); spin_lock(pvmw->ptl); if (!check_pte(pvmw)) return not_found(pvmw);