Skip to content

Commit

Permalink
mm: update get_user_pages_longterm to migrate pages allocated from CM…
Browse files Browse the repository at this point in the history
…A region

This patch updates get_user_pages_longterm to migrate pages allocated
out of CMA region.  This makes sure that we don't keep non-movable pages
(due to page reference count) in the CMA area.

This will be used by ppc64 in a later patch to avoid pinning pages in
the CMA region.  ppc64 uses CMA region for allocation of the hardware
page table (hash page table) and not able to migrate pages out of CMA
region results in page table allocation failures.

One case where we hit this easy is when a guest using a VFIO passthrough
device.  VFIO locks all the guest's memory and if the guest memory is
backed by CMA region, it becomes unmovable resulting in fragmenting the
CMA and possibly preventing other guests from allocation a large enough
hash page table.

NOTE: We allocate the new page without using __GFP_THISNODE

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Aneesh Kumar K.V <[email protected]>
Cc: Alexey Kardashevskiy <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: David Gibson <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Mel Gorman <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kvaneesh authored and torvalds committed Mar 6, 2019
1 parent d7fefcc commit 9a4e9f3
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 27 deletions.
2 changes: 2 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);

Expand Down
3 changes: 2 additions & 1 deletion include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1536,7 +1536,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
#ifdef CONFIG_FS_DAX

#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
Expand Down
200 changes: 176 additions & 24 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include <linux/sched/signal.h>
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
#include <linux/migrate.h>
#include <linux/mm_inline.h>
#include <linux/sched/mm.h>

#include <asm/mmu_context.h>
#include <asm/pgtable.h>
Expand Down Expand Up @@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
}
EXPORT_SYMBOL(get_user_pages);

#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)

#ifdef CONFIG_FS_DAX
static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
{
long i;
struct vm_area_struct *vma_prev = NULL;

for (i = 0; i < nr_pages; i++) {
struct vm_area_struct *vma = vmas[i];

if (vma == vma_prev)
continue;

vma_prev = vma;

if (vma_is_fsdax(vma))
return true;
}
return false;
}
#else
static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
{
return false;
}
#endif

#ifdef CONFIG_CMA
static struct page *new_non_cma_page(struct page *page, unsigned long private)
{
/*
* We want to make sure we allocate the new page from the same node
* as the source page.
*/
int nid = page_to_nid(page);
/*
* Trying to allocate a page for migration. Ignore allocation
* failure warnings. We don't force __GFP_THISNODE here because
* this node here is the node where we have CMA reservation and
* in some case these nodes will have really less non movable
* allocation memory.
*/
gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;

if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;

#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
/*
* We don't want to dequeue from the pool because pool pages will
* mostly be from the CMA region.
*/
return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
}
#endif
if (PageTransHuge(page)) {
struct page *thp;
/*
* ignore allocation failure warnings
*/
gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;

/*
* Remove the movable mask so that we don't allocate from
* CMA area again.
*/
thp_gfpmask &= ~__GFP_MOVABLE;
thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
if (!thp)
return NULL;
prep_transhuge_page(thp);
return thp;
}

return __alloc_pages_node(nid, gfp_mask, 0);
}

static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
unsigned int gup_flags,
struct page **pages,
struct vm_area_struct **vmas)
{
long i;
bool drain_allow = true;
bool migrate_allow = true;
LIST_HEAD(cma_page_list);

check_again:
for (i = 0; i < nr_pages; i++) {
/*
* If we get a page from the CMA zone, since we are going to
* be pinning these entries, we might as well move them out
* of the CMA zone if possible.
*/
if (is_migrate_cma_page(pages[i])) {

struct page *head = compound_head(pages[i]);

if (PageHuge(head)) {
isolate_huge_page(head, &cma_page_list);
} else {
if (!PageLRU(head) && drain_allow) {
lru_add_drain_all();
drain_allow = false;
}

if (!isolate_lru_page(head)) {
list_add_tail(&head->lru, &cma_page_list);
mod_node_page_state(page_pgdat(head),
NR_ISOLATED_ANON +
page_is_file_cache(head),
hpage_nr_pages(head));
}
}
}
}

if (!list_empty(&cma_page_list)) {
/*
* drop the above get_user_pages reference.
*/
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);

if (migrate_pages(&cma_page_list, new_non_cma_page,
NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
/*
* some of the pages failed migration. Do get_user_pages
* without migration.
*/
migrate_allow = false;

if (!list_empty(&cma_page_list))
putback_movable_pages(&cma_page_list);
}
/*
* We did migrate all the pages, Try to get the page references again
* migrating any new CMA pages which we failed to isolate earlier.
*/
nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
if ((nr_pages > 0) && migrate_allow) {
drain_allow = true;
goto check_again;
}
}

return nr_pages;
}
#else
static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
unsigned int gup_flags,
struct page **pages,
struct vm_area_struct **vmas)
{
return nr_pages;
}
#endif

/*
* This is the same as get_user_pages() in that it assumes we are
* operating on the current task's mm, but it goes further to validate
Expand All @@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
* Contrast this to iov_iter_get_pages() usages which are transient.
*/
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas_arg)
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas_arg)
{
struct vm_area_struct **vmas = vmas_arg;
struct vm_area_struct *vma_prev = NULL;
unsigned long flags;
long rc, i;

if (!pages)
Expand All @@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
return -ENOMEM;
}

flags = memalloc_nocma_save();
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
memalloc_nocma_restore(flags);
if (rc < 0)
goto out;

for (i = 0; i < rc; i++) {
struct vm_area_struct *vma = vmas[i];

if (vma == vma_prev)
continue;

vma_prev = vma;

if (vma_is_fsdax(vma))
break;
}

/*
* Either get_user_pages() failed, or the vma validation
* succeeded, in either case we don't need to put_page() before
* returning.
*/
if (i >= rc)
if (check_dax_vmas(vmas, rc)) {
for (i = 0; i < rc; i++)
put_page(pages[i]);
rc = -EOPNOTSUPP;
goto out;
}

for (i = 0; i < rc; i++)
put_page(pages[i]);
rc = -EOPNOTSUPP;
rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
out:
if (vmas != vmas_arg)
kfree(vmas);
Expand Down
4 changes: 2 additions & 2 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1587,8 +1587,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}

static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
struct page *page;

Expand Down

0 comments on commit 9a4e9f3

Please sign in to comment.