Skip to content

Commit

Permalink
sparc64: Support transparent huge pages.
Browse files Browse the repository at this point in the history
This is relatively easy since PMD's now cover exactly 4MB of memory.

Our PMD entries are 32-bits each, so we use a special encoding.  The
lowest bit, PMD_ISHUGE, determines the interpretation.  This is possible
because sparc64's page tables are purely software entities so we can use
whatever encoding scheme we want.  We just have to make the TLB miss
assembler page table walkers aware of the layout.

set_pmd_at() works much like set_pte_at() but it has to operate in two
page from a table of non-huge PTEs, so we have to queue up TLB flushes
based upon what mappings are valid in the PTE table.  In the second regime
we are going from huge-page to non-huge-page, and in that case we need
only queue up a single TLB flush to push out the huge page mapping.

We still have 5 bits remaining in the huge PMD encoding so we can very
likely support any new pieces of THP state tracking that might get added
in the future.

With lots of help from Johannes Weiner.

Signed-off-by: David S. Miller <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
davem330 authored and torvalds committed Oct 9, 2012
1 parent f5c8ad4 commit 9e695d2
Show file tree
Hide file tree
Showing 14 changed files with 582 additions and 110 deletions.
5 changes: 4 additions & 1 deletion arch/sparc/include/asm/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);

void hugetlb_prefault_arch_hook(struct mm_struct *mm);
static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
{
hugetlb_setup(mm);
}

static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc/include/asm/mmu_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ struct tsb_config {

#define MM_TSB_BASE 0

#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define MM_TSB_HUGE 1
#define MM_NUM_TSBS 2
#else
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc/include/asm/mmu_context_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
{
__tsb_context_switch(__pa(mm->pgd),
&mm->context.tsb_block[0],
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
(mm->context.tsb_block[1].tsb ?
&mm->context.tsb_block[1] :
NULL)
Expand Down
7 changes: 6 additions & 1 deletion arch/sparc/include/asm/page_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#define HPAGE_SHIFT 22

#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
Expand All @@ -26,6 +26,11 @@

#ifndef __ASSEMBLY__

#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
struct mm_struct;
extern void hugetlb_setup(struct mm_struct *mm);
#endif

#define WANT_PAGE_VIRTUAL

extern void _clear_page(void *page);
Expand Down
4 changes: 2 additions & 2 deletions arch/sparc/include/asm/pgalloc_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ extern pgtable_t pte_alloc_one(struct mm_struct *mm,
extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);

#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
#define pmd_populate(MM, PMD, PTE) pmd_set(PMD, PTE)
#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))

#define check_pgt_cache() do { } while (0)
Expand Down
177 changes: 171 additions & 6 deletions arch/sparc/include/asm/pgtable_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,31 @@
#error Page table parameters do not cover virtual address space properly.
#endif

#if (PMD_SHIFT != HPAGE_SHIFT)
#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
#endif

/* PMDs point to PTE tables which are 4K aligned. */
#define PMD_PADDR _AC(0xfffffffe,UL)
#define PMD_PADDR_SHIFT _AC(11,UL)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PMD_ISHUGE _AC(0x00000001,UL)

/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
* pages, this frees up a bunch of bits in the layout that we can
* use for the protection settings and software metadata.
*/
#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
#define PMD_HUGE_WRITE _AC(0x00000200,UL)
#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
#define PMD_HUGE_EXEC _AC(0x00000040,UL)
#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
#endif

/* PGDs point to PMD tables which are 8K aligned. */
#define PGD_PADDR _AC(0xfffffffc,UL)
#define PGD_PADDR_SHIFT _AC(11,UL)
Expand Down Expand Up @@ -219,6 +240,19 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
}
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))

extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);

static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
/* Do nothing, mk_pmd() does this part. */
return pmd;
}
#endif

/* This one can be done with two shifts. */
static inline unsigned long pte_pfn(pte_t pte)
{
Expand Down Expand Up @@ -588,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
return pte_val(pte) & _PAGE_SPECIAL;
}

#define pmd_set(pmdp, ptep) \
(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_young(pmd_t pmd)
{
return pmd_val(pmd) & PMD_HUGE_ACCESSED;
}

static inline int pmd_write(pmd_t pmd)
{
return pmd_val(pmd) & PMD_HUGE_WRITE;
}

static inline unsigned long pmd_pfn(pmd_t pmd)
{
unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;

return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
}

static inline int pmd_large(pmd_t pmd)
{
return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
(PMD_ISHUGE | PMD_HUGE_PRESENT);
}

static inline int pmd_trans_splitting(pmd_t pmd)
{
return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
(PMD_ISHUGE|PMD_HUGE_SPLITTING);
}

static inline int pmd_trans_huge(pmd_t pmd)
{
return pmd_val(pmd) & PMD_ISHUGE;
}

#define has_transparent_hugepage() 1

static inline pmd_t pmd_mkold(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
return pmd;
}

static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_WRITE;
return pmd;
}

static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_DIRTY;
return pmd;
}

static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
return pmd;
}

static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_WRITE;
return pmd;
}

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
return pmd;
}

static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
pmd_val(pmd) |= PMD_HUGE_SPLITTING;
return pmd;
}

extern pgprot_t pmd_pgprot(pmd_t entry);
#endif

static inline int pmd_present(pmd_t pmd)
{
return pmd_val(pmd) != 0U;
}

#define pmd_none(pmd) (!pmd_val(pmd))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
#else
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
}
#endif

static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;

pmd_val(*pmdp) = val;
}

#define pud_set(pudp, pmdp) \
(pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
#define __pmd_page(pmd) \
((unsigned long) __va((((unsigned long)pmd_val(pmd))<<PMD_PADDR_SHIFT)))
static inline unsigned long __pmd_page(pmd_t pmd)
{
unsigned long paddr = (unsigned long) pmd_val(pmd);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_val(pmd) & PMD_ISHUGE)
paddr &= PMD_HUGE_PADDR;
#endif
paddr <<= PMD_PADDR_SHIFT;
return ((unsigned long) __va(paddr));
}
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page_vaddr(pud) \
((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
#define pmd_present(pmd) (pmd_val(pmd) != 0U)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
Expand Down Expand Up @@ -634,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm);

#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
unsigned long addr,
pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
set_pmd_at(mm, addr, pmdp, __pmd(0U));
return pmd;
}

static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int fullmm)
{
Expand Down Expand Up @@ -689,6 +844,16 @@ extern void mmu_info(struct seq_file *);

struct vm_area_struct;
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);

#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);

#define __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
#endif

/* Encode and de-code a swap entry */
#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
Expand Down
94 changes: 87 additions & 7 deletions arch/sparc/include/asm/tsb.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,86 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;

/* Do a user page table walk in MMU globals. Leaves physical PTE
* pointer in REG1. Jumps to FAIL_LABEL on early page table walk
* termination. Physical base of page tables is in PHYS_PGD which
* will not be modified.
/* This macro exists only to make the PMD translator below easier
* to read. It hides the ELF section switch for the sun4v code
* patching.
*/
#define OR_PTE_BIT(REG, NAME) \
661: or REG, _PAGE_##NAME##_4U, REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
or REG, _PAGE_##NAME##_4V, REG; \
.previous;

/* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
sethi %uhi(_PAGE_VALID), REG; \
.previous; \
sllx REG, 32, REG; \
661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
.section .sun4v_1insn_patch, "ax"; \
.word 661b; \
or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
.previous;

/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
* then jump to FAIL_LABEL. If it is a HUGE PMD, and it
* translates to a valid PTE, branch to PTE_LABEL.
*
* We translate the PMD by hand, one bit at a time,
* constructing the huge PTE.
*
* So we construct the PTE in REG2 as follows:
*
* 1) Extract the PMD PFN from REG1 and place it into REG2.
*
* 2) Translate PMD protection bits in REG1 into REG2, one bit
* at a time using andcc tests on REG1 and OR's into REG2.
*
* Only two bits to be concerned with here, EXEC and WRITE.
* Now REG1 is freed up and we can use it as a temporary.
*
* 3) Construct the VALID, CACHE, and page size PTE bits in
* REG1, OR with REG2 to form final PTE.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
brz,pn REG1, FAIL_LABEL; \
andcc REG1, PMD_ISHUGE, %g0; \
be,pt %xcc, 700f; \
and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
bne,pn %xcc, FAIL_LABEL; \
andn REG1, PMD_HUGE_PROTBITS, REG2; \
sllx REG2, PMD_PADDR_SHIFT, REG2; \
/* REG2 now holds PFN << PAGE_SHIFT */ \
andcc REG1, PMD_HUGE_EXEC, %g0; \
bne,a,pt %xcc, 1f; \
OR_PTE_BIT(REG2, EXEC); \
1: andcc REG1, PMD_HUGE_WRITE, %g0; \
bne,a,pt %xcc, 1f; \
OR_PTE_BIT(REG2, W); \
/* REG1 can now be clobbered, build final PTE */ \
1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
ba,pt %xcc, PTE_LABEL; \
or REG1, REG2, REG1; \
700:
#else
#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
brz,pn REG1, FAIL_LABEL; \
nop;
#endif

/* Do a user page table walk in MMU globals. Leaves final,
* valid, PTE value in REG1. Jumps to FAIL_LABEL on early
* page table walk termination or if the PTE is not valid.
*
* Physical base of page tables is in PHYS_PGD which will not
* be modified.
*
* VADDR will not be clobbered, but REG1 and REG2 will.
*/
Expand All @@ -175,12 +251,16 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
add REG1, REG2, REG1; \
ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
brgez,pn REG1, FAIL_LABEL; \
nop; \
800:

/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
* If no entry is found, FAIL_LABEL will be branched to. On success
Expand Down
Loading

0 comments on commit 9e695d2

Please sign in to comment.