Skip to content

Commit

Permalink
mm/mmu_notifier: avoid call to invalidate_range() in range_end()
Browse files Browse the repository at this point in the history
This is an optimization patch that only affect mmu_notifier users which
rely on the invalidate_range() callback.  This patch avoids calling that
callback twice in a row from inside __mmu_notifier_invalidate_range_end

Existing pattern (before this patch):
    mmu_notifier_invalidate_range_start()
        pte/pmd/pud_clear_flush_notify()
            mmu_notifier_invalidate_range()
    mmu_notifier_invalidate_range_end()
        mmu_notifier_invalidate_range()

New pattern (after this patch):
    mmu_notifier_invalidate_range_start()
        pte/pmd/pud_clear_flush_notify()
            mmu_notifier_invalidate_range()
    mmu_notifier_invalidate_range_only_end()

We call the invalidate_range callback after clearing the page table
under the page table lock and we skip the call to invalidate_range
inside the __mmu_notifier_invalidate_range_end() function.

Idea from Andrea Arcangeli

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Jérôme Glisse <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Suravee Suthikulpanit <[email protected]>
Cc: David Woodhouse <[email protected]>
Cc: Alistair Popple <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Stephen Rothwell <[email protected]>
Cc: Andrew Donnellan <[email protected]>
Cc: Nadav Amit <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Jérôme Glisse authored and torvalds committed Nov 16, 2017
1 parent 0f10851 commit 4645b9f
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 12 deletions.
17 changes: 15 additions & 2 deletions include/linux/mmu_notifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ extern void __mmu_notifier_change_pte(struct mm_struct *mm,
extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end);
unsigned long start, unsigned long end,
bool only_end);
extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end);

Expand Down Expand Up @@ -268,7 +269,14 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_range_end(mm, start, end);
__mmu_notifier_invalidate_range_end(mm, start, end, false);
}

static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_range_end(mm, start, end, true);
}

static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
Expand Down Expand Up @@ -439,6 +447,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
{
}

static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}

static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
Expand Down
46 changes: 42 additions & 4 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,12 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
page_remove_rmap(page, true);
spin_unlock(vmf->ptl);

mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above pmdp_huge_clear_flush_notify() did already call it.
*/
mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start,
mmun_end);

ret |= VM_FAULT_WRITE;
put_page(page);
Expand Down Expand Up @@ -1372,7 +1377,12 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
}
spin_unlock(vmf->ptl);
out_mn:
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above pmdp_huge_clear_flush_notify() did already call it.
*/
mmu_notifier_invalidate_range_only_end(vma->vm_mm, mmun_start,
mmun_end);
out:
return ret;
out_unlock:
Expand Down Expand Up @@ -2024,7 +2034,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,

out:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE);
/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above pudp_huge_clear_flush_notify() did already call it.
*/
mmu_notifier_invalidate_range_only_end(mm, haddr, haddr +
HPAGE_PUD_SIZE);
}
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */

Expand Down Expand Up @@ -2099,6 +2114,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
return;
} else if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
* __split_huge_pmd() ?
*
* We are going from a zero huge page write protected to zero
* small page also write protected so it does not seems useful
* to invalidate secondary mmu at this time.
*/
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}

Expand Down Expand Up @@ -2234,7 +2258,21 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
__split_huge_pmd_locked(vma, pmd, haddr, freeze);
out:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
/*
* No need to double call mmu_notifier->invalidate_range() callback.
* They are 3 cases to consider inside __split_huge_pmd_locked():
* 1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious
* 2) __split_huge_zero_page_pmd() read only zero page and any write
* fault will trigger a flush_notify before pointing to a new page
* (it is fine if the secondary mmu keeps pointing to the old zero
* page in the meantime)
* 3) Split a huge pmd into pte pointing to the same page. No need
* to invalidate secondary tlb entry they are all still valid.
* any further changes to individual pte will notify. So no need
* to call mmu_notifier->invalidate_range()
*/
mmu_notifier_invalidate_range_only_end(mm, haddr, haddr +
HPAGE_PMD_SIZE);
}

void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
Expand Down
6 changes: 5 additions & 1 deletion mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2554,7 +2554,11 @@ static int wp_page_copy(struct vm_fault *vmf)
put_page(new_page);

pte_unmap_unlock(vmf->pte, vmf->ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above ptep_clear_flush_notify() did already call it.
*/
mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
if (old_page) {
/*
* Don't let another task, with possibly unlocked vma,
Expand Down
15 changes: 12 additions & 3 deletions mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2089,7 +2089,11 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);

spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above pmdp_huge_clear_flush_notify() did already call it.
*/
mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);

/* Take an "isolate" reference and put new page on the LRU. */
get_page(new_page);
Expand Down Expand Up @@ -2805,9 +2809,14 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
}

/*
* No need to double call mmu_notifier->invalidate_range() callback as
* the above ptep_clear_flush_notify() inside migrate_vma_insert_page()
* did already call it.
*/
if (notified)
mmu_notifier_invalidate_range_end(mm, mmu_start,
migrate->end);
mmu_notifier_invalidate_range_only_end(mm, mmu_start,
migrate->end);
}

/*
Expand Down
11 changes: 9 additions & 2 deletions mm/mmu_notifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);

void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
unsigned long start,
unsigned long end,
bool only_end)
{
struct mmu_notifier *mn;
int id;
Expand All @@ -204,8 +206,13 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
* subsystem registers either invalidate_range_start()/end() or
* invalidate_range(), so this will be no additional overhead
* (besides the pointer check).
*
* We skip call to invalidate_range() if we know it is safe ie
* call site use mmu_notifier_invalidate_range_only_end() which
* is safe to do when we know that a call to invalidate_range()
* already happen under page table lock.
*/
if (mn->ops->invalidate_range)
if (!only_end && mn->ops->invalidate_range)
mn->ops->invalidate_range(mn, mm, start, end);
if (mn->ops->invalidate_range_end)
mn->ops->invalidate_range_end(mn, mm, start, end);
Expand Down

0 comments on commit 4645b9f

Please sign in to comment.