Skip to content

Commit

Permalink
mm, thp: remove infrastructure for handling splitting PMDs
Browse files Browse the repository at this point in the history
With new refcounting we don't need to mark PMDs splitting.  Let's drop
code to handle this.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Tested-by: Sasha Levin <[email protected]>
Tested-by: Aneesh Kumar K.V <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Acked-by: Jerome Marchand <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: Steve Capper <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
kiryl authored and torvalds committed Jan 16, 2016
1 parent 1f19617 commit 4b471e8
Show file tree
Hide file tree
Showing 13 changed files with 41 additions and 183 deletions.
40 changes: 0 additions & 40 deletions Documentation/features/vm/pmdp_splitting_flush/arch-support.txt

This file was deleted.

8 changes: 4 additions & 4 deletions fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;

if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
Expand Down Expand Up @@ -913,7 +913,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;

if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
Expand Down Expand Up @@ -1187,7 +1187,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
int err = 0;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;

Expand Down Expand Up @@ -1519,7 +1519,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte;
pte_t *pte;

if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;

Expand Down
9 changes: 0 additions & 9 deletions include/asm-generic/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif

#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
extern void pmdp_splitting_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#endif

#ifndef pmdp_collapse_flush
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
Expand Down Expand Up @@ -627,10 +622,6 @@ static inline int pmd_trans_huge(pmd_t pmd)
{
return 0;
}
static inline int pmd_trans_splitting(pmd_t pmd)
{
return 0;
}
#ifndef __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
Expand Down
21 changes: 6 additions & 15 deletions include/linux/huge_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ extern int zap_huge_pmd(struct mmu_gather *tlb,
extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
extern int move_huge_pmd(struct vm_area_struct *vma,
extern bool move_huge_pmd(struct vm_area_struct *vma,
struct vm_area_struct *new_vma,
unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
Expand All @@ -48,15 +48,9 @@ enum transparent_hugepage_flag {
#endif
};

enum page_check_address_pmd_flag {
PAGE_CHECK_ADDRESS_PMD_FLAG,
PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG,
PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG,
};
extern pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl);

#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
Expand Down Expand Up @@ -100,7 +94,6 @@ extern unsigned long transparent_hugepage_flags;
#define split_huge_page(page) BUILD_BUG()
#define split_huge_pmd(__vma, __pmd, __address) BUILD_BUG()

#define wait_split_huge_page(__anon_vma, __pmd) BUILD_BUG()
#if HPAGE_PMD_ORDER >= MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
Expand All @@ -110,17 +103,17 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next);
extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl);
/* mmap_sem must be held on entry */
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pmd_trans_huge(*pmd))
return __pmd_trans_huge_lock(pmd, vma, ptl);
else
return 0;
return false;
}
static inline int hpage_nr_pages(struct page *page)
{
Expand Down Expand Up @@ -165,8 +158,6 @@ static inline int split_huge_page(struct page *page)
{
return 0;
}
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
static inline int hugepage_madvise(struct vm_area_struct *vma,
Expand All @@ -181,10 +172,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
long adjust_next)
{
}
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
return 0;
return false;
}

static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
Expand Down
12 changes: 1 addition & 11 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags);
}

if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return follow_page_pte(vma, address, pmd, flags);
}

if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
Expand Down Expand Up @@ -1068,9 +1061,6 @@ struct page *get_dump_page(unsigned long addr)
* *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
* pages containing page tables.
*
* *) THP splits will broadcast an IPI, this can be achieved by overriding
* pmdp_splitting_flush.
*
* *) ptes can be read atomically by the architecture.
*
* *) access_ok is sufficient to validate userspace address ranges.
Expand Down Expand Up @@ -1267,7 +1257,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = READ_ONCE(*pmdp);

next = pmd_addr_end(addr, end);
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
if (pmd_none(pmd))
return 0;

if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
Expand Down
67 changes: 16 additions & 51 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,15 +986,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
goto out_unlock;
}

if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
pte_free(dst_mm, pgtable);

wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
goto out;
}
src_page = pmd_page(pmd);
VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
get_page(src_page);
Expand Down Expand Up @@ -1470,7 +1461,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t orig_pmd;
spinlock_t *ptl;

if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
return 0;
/*
* For architectures like ppc64 we look at deposited pgtable
Expand Down Expand Up @@ -1504,13 +1495,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
return 1;
}

int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
bool move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
int ret = 0;
pmd_t pmd;

struct mm_struct *mm = vma->vm_mm;
Expand All @@ -1519,23 +1509,22 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
(new_addr & ~HPAGE_PMD_MASK) ||
old_end - old_addr < HPAGE_PMD_SIZE ||
(new_vma->vm_flags & VM_NOHUGEPAGE))
goto out;
return false;

/*
* The destination pmd shouldn't be established, free_pgtables()
* should have release it.
*/
if (WARN_ON(!pmd_none(*new_pmd))) {
VM_BUG_ON(pmd_trans_huge(*new_pmd));
goto out;
return false;
}

/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock.
*/
ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
if (ret == 1) {
if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
Expand All @@ -1551,9 +1540,9 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
}
out:
return ret;
return false;
}

/*
Expand All @@ -1569,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
spinlock_t *ptl;
int ret = 0;

if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
Expand Down Expand Up @@ -1600,29 +1589,19 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
}

/*
* Returns 1 if a given pmd maps a stable (not under splitting) thp.
* Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
* Returns true if a given pmd maps a thp, false otherwise.
*
* Note that if it returns 1, this routine returns without unlocking page
* table locks. So callers must unlock them.
* Note that if it returns true, this routine returns without unlocking page
* table lock. So callers must unlock it.
*/
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
spinlock_t **ptl)
{
*ptl = pmd_lock(vma->vm_mm, pmd);
if (likely(pmd_trans_huge(*pmd))) {
if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(*ptl);
wait_split_huge_page(vma->anon_vma, pmd);
return -1;
} else {
/* Thp mapped by 'pmd' is stable, so we can
* handle it as it is. */
return 1;
}
}
if (likely(pmd_trans_huge(*pmd)))
return true;
spin_unlock(*ptl);
return 0;
return false;
}

/*
Expand All @@ -1636,7 +1615,6 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl)
{
pgd_t *pgd;
Expand All @@ -1659,21 +1637,8 @@ pmd_t *page_check_address_pmd(struct page *page,
goto unlock;
if (pmd_page(*pmd) != page)
goto unlock;
/*
* split_vma() may create temporary aliased mappings. There is
* no risk as long as all huge pmd are found and have their
* splitting bit set before __split_huge_page_refcount
* runs. Finding the same huge pmd more than once during the
* same rmap walk is not a problem.
*/
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
pmd_trans_splitting(*pmd))
goto unlock;
if (pmd_trans_huge(*pmd)) {
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
!pmd_trans_splitting(*pmd));
if (pmd_trans_huge(*pmd))
return pmd;
}
unlock:
spin_unlock(*ptl);
return NULL;
Expand Down
13 changes: 2 additions & 11 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -4675,7 +4675,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
pte_t *pte;
spinlock_t *ptl;

if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
Expand Down Expand Up @@ -4863,16 +4863,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
union mc_target target;
struct page *page;

/*
* No race with splitting thp happens because:
* - if pmd_trans_huge_lock() returns 1, the relevant thp is not
* under splitting, which means there's no concurrent thp split,
* - if another thread runs into split_huge_page() just after we
* entered this if-block, the thread must wait for page table lock
* to be unlocked in __split_huge_page_splitting(), where the main
* part of thp split is not executed yet.
*/
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl);
return 0;
Expand Down
Loading

0 comments on commit 4b471e8

Please sign in to comment.