Skip to content

Commit

Permalink
powerpc/mm: Cleanup management of kmem_caches for pagetables
Browse files Browse the repository at this point in the history
Currently we have a fair bit of rather fiddly code to manage the
various kmem_caches used to store page tables of various levels.  We
generally have two caches holding some combination of PGD, PUD and PMD
tables, plus several more for the special hugepage pagetables.

This patch cleans this all up by taking a different approach.  Rather
than the caches being designated as for PUDs or for hugeptes for 16M
pages, the caches are simply allocated to be a specific size.  Thus
sharing of caches between different types/levels of pagetables happens
naturally.  The pagetable size, where needed, is passed around encoded
in the same way as {PGD,PUD,PMD}_INDEX_SIZE; that is n where the
pagetable contains 2^n pointers.

Signed-off-by: David Gibson <[email protected]>
Signed-off-by: Benjamin Herrenschmidt <[email protected]>
  • Loading branch information
dgibson authored and ozbenh committed Oct 30, 2009
1 parent f71dc17 commit a0668cd
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 122 deletions.
10 changes: 5 additions & 5 deletions arch/powerpc/include/asm/pgalloc-32.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

#include <linux/threads.h>

#define PTE_NONCACHE_NUM 0 /* dummy for now to share code w/ppc64 */
/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
#define MAX_PGTABLE_INDEX_SIZE 0

extern void __bad_pte(pmd_t *pmd);

Expand Down Expand Up @@ -36,11 +37,10 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);

static inline void pgtable_free(pgtable_free_t pgf)
static inline void pgtable_free(void *table, unsigned index_size)
{
void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);

free_page((unsigned long)p);
BUG_ON(index_size); /* 32-bit doesn't use this */
free_page((unsigned long)table);
}

#define check_pgt_cache() do { } while (0)
Expand Down
60 changes: 35 additions & 25 deletions arch/powerpc/include/asm/pgalloc-64.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,39 @@
#include <linux/cpumask.h>
#include <linux/percpu.h>

/*
* Functions that deal with pagetables that could be at any level of
* the table need to be passed an "index_size" so they know how to
* handle allocation. For PTE pages (which are linked to a struct
* page for now, and drawn from the main get_free_pages() pool), the
* allocation size will be (2^index_size * sizeof(pointer)) and
* allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
*
* The maximum index size needs to be big enough to allow any
* pagetable sizes we need, but small enough to fit in the low bits of
* any page table pointer. In other words all pagetables, even tiny
* ones, must be aligned to allow at least enough low 0 bits to
* contain this value. This value is also used as a mask, so it must
* be one less than a power of two.
*/
#define MAX_PGTABLE_INDEX_SIZE 0xf

#ifndef CONFIG_PPC_SUBPAGE_PROT
static inline void subpage_prot_free(pgd_t *pgd) {}
#endif

extern struct kmem_cache *pgtable_cache[];

#define PGD_CACHE_NUM 0
#define PUD_CACHE_NUM 1
#define PMD_CACHE_NUM 1
#define HUGEPTE_CACHE_NUM 2
#define PTE_NONCACHE_NUM 7 /* from GFP rather than kmem_cache */
#define PGT_CACHE(shift) (pgtable_cache[(shift)-1])

static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
}

static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
subpage_prot_free(pgd);
kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
}

#ifndef CONFIG_PPC_64K_PAGES
Expand All @@ -40,13 +52,13 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)

static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
GFP_KERNEL|__GFP_REPEAT);
}

static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
}

static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
Expand Down Expand Up @@ -78,13 +90,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,

static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
GFP_KERNEL|__GFP_REPEAT);
}

static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
}

static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
Expand All @@ -107,24 +119,22 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
return page;
}

static inline void pgtable_free(pgtable_free_t pgf)
static inline void pgtable_free(void *table, unsigned index_size)
{
void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
int cachenum = pgf.val & PGF_CACHENUM_MASK;

if (cachenum == PTE_NONCACHE_NUM)
free_page((unsigned long)p);
else
kmem_cache_free(pgtable_cache[cachenum], p);
if (!index_size)
free_page((unsigned long)table);
else {
BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
kmem_cache_free(PGT_CACHE(index_size), table);
}
}

#define __pmd_free_tlb(tlb, pmd,addr) \
pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)

#endif /* CONFIG_PPC_64K_PAGES */

#define check_pgt_cache() do { } while (0)
Expand Down
30 changes: 4 additions & 26 deletions arch/powerpc/include/asm/pgalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,29 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
__free_page(ptepage);
}

typedef struct pgtable_free {
unsigned long val;
} pgtable_free_t;

/* This needs to be big enough to allow for MMU_PAGE_COUNT + 2 to be stored
* and small enough to fit in the low bits of any naturally aligned page
* table cache entry. Arbitrarily set to 0x1f, that should give us some
* room to grow
*/
#define PGF_CACHENUM_MASK 0x1f

static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
unsigned long mask)
{
BUG_ON(cachenum > PGF_CACHENUM_MASK);

return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
}

#ifdef CONFIG_PPC64
#include <asm/pgalloc-64.h>
#else
#include <asm/pgalloc-32.h>
#endif

#ifdef CONFIG_SMP
extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
extern void pte_free_finish(void);
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(pgf);
pgtable_free(table, shift);
}
static inline void pte_free_finish(void) { }
#endif /* !CONFIG_SMP */

static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
unsigned long address)
{
pgtable_free_t pgf = pgtable_free_cache(page_address(ptepage),
PTE_NONCACHE_NUM,
PTE_TABLE_SIZE-1);
tlb_flush_pgtable(tlb, address);
pgtable_page_dtor(ptepage);
pgtable_free_tlb(tlb, pgf);
pgtable_free_tlb(tlb, page_address(ptepage), 0);
}

#endif /* __KERNEL__ */
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/pgtable-ppc64.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)

void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);

/*
Expand Down
51 changes: 15 additions & 36 deletions arch/powerpc/mm/hugetlbpage.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,14 @@ static unsigned nr_gpages;
unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */

#define hugepte_shift mmu_huge_psizes
#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
#define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)])
#define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize])

#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
+ hugepte_shift[psize])
+ HUGEPTE_INDEX_SIZE(psize))
#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))

/* Subtract one from array size because we don't need a cache for 4K since
* is not a huge page size */
#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])

static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
[MMU_PAGE_64K] = "hugepte_cache_64K",
[MMU_PAGE_1M] = "hugepte_cache_1M",
[MMU_PAGE_16M] = "hugepte_cache_16M",
[MMU_PAGE_16G] = "hugepte_cache_16G",
};

/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
* will choke on pointers to hugepte tables, which is handy for
* catching screwups early. */
Expand Down Expand Up @@ -114,15 +102,15 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
unsigned long address, unsigned int psize)
{
pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
GFP_KERNEL|__GFP_REPEAT);
pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]),
GFP_KERNEL|__GFP_REPEAT);

if (! new)
return -ENOMEM;

spin_lock(&mm->page_table_lock);
if (!hugepd_none(*hpdp))
kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new);
else
hpdp->pd = (unsigned long)new | HUGEPD_OK;
spin_unlock(&mm->page_table_lock);
Expand Down Expand Up @@ -271,9 +259,7 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,

hpdp->pd = 0;
tlb->need_flush = 1;
pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
HUGEPTE_CACHE_NUM+psize-1,
PGF_CACHENUM_MASK));
pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]);
}

static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
Expand Down Expand Up @@ -698,8 +684,6 @@ static void __init set_huge_psize(int psize)
if (mmu_huge_psizes[psize] ||
mmu_psize_defs[psize].shift == PAGE_SHIFT)
return;
if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
return;
hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);

switch (mmu_psize_defs[psize].shift) {
Expand Down Expand Up @@ -753,9 +737,9 @@ static int __init hugetlbpage_init(void)
if (!cpu_has_feature(CPU_FTR_16M_PAGE))
return -ENODEV;

/* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
* and adjust PTE_NONCACHE_NUM if the number of supported huge page
* sizes changes.
/* Add supported huge page sizes. Need to change
* HUGE_MAX_HSTATE if the number of supported huge page sizes
* changes.
*/
set_huge_psize(MMU_PAGE_16M);
set_huge_psize(MMU_PAGE_16G);
Expand All @@ -769,16 +753,11 @@ static int __init hugetlbpage_init(void)

for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
if (mmu_huge_psizes[psize]) {
pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
kmem_cache_create(
HUGEPTE_CACHE_NAME(psize),
HUGEPTE_TABLE_SIZE(psize),
HUGEPTE_TABLE_SIZE(psize),
0,
NULL);
if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
panic("hugetlbpage_init(): could not create %s"\
"\n", HUGEPTE_CACHE_NAME(psize));
pgtable_cache_add(hugepte_shift[psize], NULL);
if (!PGT_CACHE(hugepte_shift[psize]))
panic("hugetlbpage_init(): could not create "
"pgtable cache for %d bit pagesize\n",
mmu_psize_to_shift(psize));
}
}

Expand Down
70 changes: 49 additions & 21 deletions arch/powerpc/mm/init_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
memset(addr, 0, PMD_TABLE_SIZE);
}

static const unsigned int pgtable_cache_size[2] = {
PGD_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
#ifdef CONFIG_PPC_64K_PAGES
"pgd_cache", "pmd_cache",
#else
"pgd_cache", "pud_pmd_cache",
#endif /* CONFIG_PPC_64K_PAGES */
};

#ifdef CONFIG_HUGETLB_PAGE
/* Hugepages need an extra cache per hugepagesize, initialized in
* hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT
* is not compile time constant. */
struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
#else
struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
#endif
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];

/*
* Create a kmem_cache() for pagetables. This is not used for PTE
* pages - they're linked to struct page, come from the normal free
* pages pool and have a different entry size (see real_pte_t) to
* everything else. Caches created by this function are used for all
* the higher level pagetables, and for hugepage pagetables.
*/
void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
{
char *name;
unsigned long table_size = sizeof(void *) << shift;
unsigned long align = table_size;

/* When batching pgtable pointers for RCU freeing, we store
* the index size in the low bits. Table alignment must be
* big enough to fit it */
unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
struct kmem_cache *new;

/* It would be nice if this was a BUILD_BUG_ON(), but at the
* moment, gcc doesn't seem to recognize is_power_of_2 as a
* constant expression, so so much for that. */
BUG_ON(!is_power_of_2(minalign));
BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));

if (PGT_CACHE(shift))
return; /* Already have a cache of this size */

align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
PGT_CACHE(shift) = new;

pr_debug("Allocated pgtable cache for order %d\n", shift);
}


void pgtable_cache_init(void)
{
pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
panic("Couldn't allocate pgtable caches");

/* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index. Verify that the
* initialization above has also created a PUD cache. This
* will need re-examiniation if we add new possibilities for
* the pagetable layout. */
BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
}

#ifdef CONFIG_SPARSEMEM_VMEMMAP
Expand Down
Loading

0 comments on commit a0668cd

Please sign in to comment.