Skip to content

Commit

Permalink
thp: implement split_huge_pmd()
Browse files Browse the repository at this point in the history
Original split_huge_page() combined two operations: splitting PMDs into
tables of PTEs and splitting underlying compound page.  This patch
implements split_huge_pmd() which split given PMD without splitting
other PMDs this page mapped with or underlying compound page.

Without tail page refcounting, implementation of split_huge_pmd() is
pretty straight-forward.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Tested-by: Sasha Levin <[email protected]>
Tested-by: Aneesh Kumar K.V <[email protected]>
Acked-by: Jerome Marchand <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: Steve Capper <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kiryl authored and torvalds committed Jan 16, 2016
1 parent e81c480 commit eef1b3b
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 1 deletion.
11 changes: 10 additions & 1 deletion include/linux/huge_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,16 @@ extern unsigned long transparent_hugepage_flags;

#define split_huge_page_to_list(page, list) BUILD_BUG()
#define split_huge_page(page) BUILD_BUG()
#define split_huge_pmd(__vma, __pmd, __address) BUILD_BUG()

void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address);

#define split_huge_pmd(__vma, __pmd, __address) \
do { \
pmd_t *____pmd = (__pmd); \
if (pmd_trans_huge(*____pmd)) \
__split_huge_pmd(__vma, __pmd, __address); \
} while (0)

#if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
Expand Down
124 changes: 124 additions & 0 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2666,6 +2666,130 @@ static int khugepaged(void *none)
return 0;
}

static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
unsigned long haddr, pmd_t *pmd)
{
struct mm_struct *mm = vma->vm_mm;
pgtable_t pgtable;
pmd_t _pmd;
int i;

/* leave pmd empty until pte is filled */
pmdp_huge_clear_flush_notify(vma, haddr, pmd);

pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);

for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
pte_t *pte, entry;
entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
entry = pte_mkspecial(entry);
pte = pte_offset_map(&_pmd, haddr);
VM_BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
pte_unmap(pte);
}
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
put_huge_zero_page();
}

static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long haddr)
{
struct mm_struct *mm = vma->vm_mm;
struct page *page;
pgtable_t pgtable;
pmd_t _pmd;
bool young, write;
int i;

VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
VM_BUG_ON(!pmd_trans_huge(*pmd));

count_vm_event(THP_SPLIT_PMD);

if (vma_is_dax(vma)) {
pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
if (is_huge_zero_pmd(_pmd))
put_huge_zero_page();
return;
} else if (is_huge_zero_pmd(*pmd)) {
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}

page = pmd_page(*pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
atomic_add(HPAGE_PMD_NR - 1, &page->_count);
write = pmd_write(*pmd);
young = pmd_young(*pmd);

/* leave pmd empty until pte is filled */
pmdp_huge_clear_flush_notify(vma, haddr, pmd);

pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);

for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
pte_t entry, *pte;
/*
* Note that NUMA hinting access restrictions are not
* transferred to avoid any possibility of altering
* permissions across VMAs.
*/
entry = mk_pte(page + i, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!write)
entry = pte_wrprotect(entry);
if (!young)
entry = pte_mkold(entry);
pte = pte_offset_map(&_pmd, haddr);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
atomic_inc(&page[i]._mapcount);
pte_unmap(pte);
}

/*
* Set PG_double_map before dropping compound_mapcount to avoid
* false-negative page_mapped().
*/
if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
for (i = 0; i < HPAGE_PMD_NR; i++)
atomic_inc(&page[i]._mapcount);
}

if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
/* Last compound_mapcount is gone. */
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
if (TestClearPageDoubleMap(page)) {
/* No need in mapcount reference anymore */
for (i = 0; i < HPAGE_PMD_NR; i++)
atomic_dec(&page[i]._mapcount);
}
}

smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
}

void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address)
{
spinlock_t *ptl;
struct mm_struct *mm = vma->vm_mm;
unsigned long haddr = address & HPAGE_PMD_MASK;

mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
ptl = pmd_lock(mm, pmd);
if (likely(pmd_trans_huge(*pmd)))
__split_huge_pmd_locked(vma, pmd, haddr);
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
}

static void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address)
{
Expand Down

0 comments on commit eef1b3b

Please sign in to comment.