Skip to content

Commit

Permalink
mm: reinstate ZERO_PAGE
Browse files Browse the repository at this point in the history
KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
using in 2.6.24.  And there were a couple of regression reports on LKML.

Following suggestions from Linus, reinstate do_anonymous_page() use of
the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
with (map)count updates - let vm_normal_page() regard it as abnormal.

Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
most powerpc): that's not essential, but minimizes additional branches
(keeping them in the unlikely pte_special case); and incidentally
excludes mips (some models of which needed eight colours of ZERO_PAGE
to avoid costly exceptions).

Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages()
callers won't want to make exceptions for it, so increment its count
there.  Changes to mlock and migration? happily seems not needed.

In most places it's quicker to check pfn than struct page address:
prepare a __read_mostly zero_pfn for that.  Does get_dump_page()
still need its ZERO_PAGE check? probably not, but keep it anyway.

Signed-off-by: Hugh Dickins <[email protected]>
Acked-by: Rik van Riel <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Minchan Kim <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Hugh Dickins authored and torvalds committed Sep 22, 2009
1 parent 1ac0cb5 commit a13ea5b
Showing 1 changed file with 44 additions and 9 deletions.
53 changes: 44 additions & 9 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,17 @@ static int __init disable_randmaps(char *s)
}
__setup("norandmaps", disable_randmaps);

static unsigned long zero_pfn __read_mostly;

/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
static int __init init_zero_pfn(void)
{
zero_pfn = page_to_pfn(ZERO_PAGE(0));
return 0;
}
core_initcall(init_zero_pfn);

/*
* If a p?d_bad entry is found while walking page tables, report
Expand Down Expand Up @@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
if (HAVE_PTE_SPECIAL) {
if (likely(!pte_special(pte)))
goto check_pfn;
if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (pfn != zero_pfn)
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
Expand Down Expand Up @@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
goto no_page;
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;

page = vm_normal_page(vma, address, pte);
if (unlikely(!page))
goto bad_page;
if (unlikely(!page)) {
if ((flags & FOLL_DUMP) ||
pte_pfn(pte) != zero_pfn)
goto bad_page;
page = pte_page(pte);
}

if (flags & FOLL_GET)
get_page(page);
Expand Down Expand Up @@ -2084,10 +2102,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,

if (unlikely(anon_vma_prepare(vma)))
goto oom;
VM_BUG_ON(old_page == ZERO_PAGE(0));
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (!new_page)
goto oom;

if (pte_pfn(orig_pte) == zero_pfn) {
new_page = alloc_zeroed_user_highpage_movable(vma, address);
if (!new_page)
goto oom;
} else {
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (!new_page)
goto oom;
cow_user_page(new_page, old_page, address, vma);
}
__SetPageUptodate(new_page);

/*
* Don't let another task, with possibly unlocked vma,
* keep the mlocked page.
Expand All @@ -2097,8 +2124,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
clear_page_mlock(old_page);
unlock_page(old_page);
}
cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);

if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new;
Expand Down Expand Up @@ -2639,6 +2664,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
pte_t entry;

if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (!pte_none(*page_table))
goto unlock;
goto setpte;
}

/* Allocate our own private page. */
pte_unmap(page_table);

Expand All @@ -2662,6 +2696,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,

inc_mm_counter(mm, anon_rss);
page_add_new_anon_rmap(page, vma, address);
setpte:
set_pte_at(mm, address, page_table, entry);

/* No need to invalidate - it was non-present before */
Expand Down

0 comments on commit a13ea5b

Please sign in to comment.