Skip to content

Commit

Permalink
mm/mmu_notifier: contextual information for event triggering invalida…
Browse files Browse the repository at this point in the history
…tion

CPU page table update can happens for many reasons, not only as a result
of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also as
a result of kernel activities (memory compression, reclaim, migration,
...).

Users of mmu notifier API track changes to the CPU page table and take
specific action for them.  While current API only provide range of virtual
address affected by the change, not why the changes is happening.

This patchset do the initial mechanical convertion of all the places that
calls mmu_notifier_range_init to also provide the default MMU_NOTIFY_UNMAP
event as well as the vma if it is know (most invalidation happens against
a given vma).  Passing down the vma allows the users of mmu notifier to
inspect the new vma page protection.

The MMU_NOTIFY_UNMAP is always the safe default as users of mmu notifier
should assume that every for the range is going away when that event
happens.  A latter patch do convert mm call path to use a more appropriate
events for each call.

This is done as 2 patches so that no call site is forgotten especialy
as it uses this following coccinelle patch:

%<----------------------------------------------------------------------
@@
identifier I1, I2, I3, I4;
@@
static inline void mmu_notifier_range_init(struct mmu_notifier_range *I1,
+enum mmu_notifier_event event,
+unsigned flags,
+struct vm_area_struct *vma,
struct mm_struct *I2, unsigned long I3, unsigned long I4) { ... }

@@
@@
-#define mmu_notifier_range_init(range, mm, start, end)
+#define mmu_notifier_range_init(range, event, flags, vma, mm, start, end)

@@
expression E1, E3, E4;
identifier I1;
@@
<...
mmu_notifier_range_init(E1,
+MMU_NOTIFY_UNMAP, 0, I1,
I1->vm_mm, E3, E4)
...>

@@
expression E1, E2, E3, E4;
identifier FN, VMA;
@@
FN(..., struct vm_area_struct *VMA, ...) {
<...
mmu_notifier_range_init(E1,
+MMU_NOTIFY_UNMAP, 0, VMA,
E2, E3, E4)
...> }

@@
expression E1, E2, E3, E4;
identifier FN, VMA;
@@
FN(...) {
struct vm_area_struct *VMA;
<...
mmu_notifier_range_init(E1,
+MMU_NOTIFY_UNMAP, 0, VMA,
E2, E3, E4)
...> }

@@
expression E1, E2, E3, E4;
identifier FN;
@@
FN(...) {
<...
mmu_notifier_range_init(E1,
+MMU_NOTIFY_UNMAP, 0, NULL,
E2, E3, E4)
...> }
---------------------------------------------------------------------->%

Applied with:
spatch --all-includes --sp-file mmu-notifier.spatch fs/proc/task_mmu.c --in-place
spatch --sp-file mmu-notifier.spatch --dir kernel/events/ --in-place
spatch --sp-file mmu-notifier.spatch --dir mm --in-place

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Jérôme Glisse <[email protected]>
Reviewed-by: Ralph Campbell <[email protected]>
Reviewed-by: Ira Weiny <[email protected]>
Cc: Christian König <[email protected]>
Cc: Joonas Lahtinen <[email protected]>
Cc: Jani Nikula <[email protected]>
Cc: Rodrigo Vivi <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Peter Xu <[email protected]>
Cc: Felix Kuehling <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krcmar <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Christian Koenig <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Jérôme Glisse authored and torvalds committed May 14, 2019
1 parent d87f055 commit 6f4f13e
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 30 deletions.
3 changes: 2 additions & 1 deletion fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
break;
}

mmu_notifier_range_init(&range, mm, 0, -1UL);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
Expand Down
5 changes: 4 additions & 1 deletion include/linux/mmu_notifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)


static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
enum mmu_notifier_event event,
unsigned flags,
struct vm_area_struct *vma,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
Expand Down Expand Up @@ -491,7 +494,7 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range,
range->end = end;
}

#define mmu_notifier_range_init(range, mm, start, end) \
#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \
_mmu_notifier_range_init(range, start, end)

static inline bool
Expand Down
3 changes: 2 additions & 1 deletion kernel/events/uprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct mmu_notifier_range range;
struct mem_cgroup *memcg;

mmu_notifier_range_init(&range, mm, addr, addr + PAGE_SIZE);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr,
addr + PAGE_SIZE);

VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);

Expand Down
12 changes: 8 additions & 4 deletions mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,8 @@ static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf,
cond_resched();
}

mmu_notifier_range_init(&range, vma->vm_mm, haddr,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
haddr,
haddr + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);

Expand Down Expand Up @@ -1388,7 +1389,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
vma, HPAGE_PMD_NR);
__SetPageUptodate(new_page);

mmu_notifier_range_init(&range, vma->vm_mm, haddr,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
haddr,
haddr + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);

Expand Down Expand Up @@ -2064,7 +2066,8 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
spinlock_t *ptl;
struct mmu_notifier_range range;

mmu_notifier_range_init(&range, vma->vm_mm, address & HPAGE_PUD_MASK,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
address & HPAGE_PUD_MASK,
(address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE);
mmu_notifier_invalidate_range_start(&range);
ptl = pud_lock(vma->vm_mm, pud);
Expand Down Expand Up @@ -2282,7 +2285,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
spinlock_t *ptl;
struct mmu_notifier_range range;

mmu_notifier_range_init(&range, vma->vm_mm, address & HPAGE_PMD_MASK,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
address & HPAGE_PMD_MASK,
(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
ptl = pmd_lock(vma->vm_mm, pmd);
Expand Down
12 changes: 8 additions & 4 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3294,7 +3294,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;

if (cow) {
mmu_notifier_range_init(&range, src, vma->vm_start,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, src,
vma->vm_start,
vma->vm_end);
mmu_notifier_invalidate_range_start(&range);
}
Expand Down Expand Up @@ -3406,7 +3407,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
/*
* If sharing possible, alert mmu notifiers of worst case.
*/
mmu_notifier_range_init(&range, mm, start, end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
mmu_notifier_invalidate_range_start(&range);
address = start;
Expand Down Expand Up @@ -3673,7 +3675,8 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
pages_per_huge_page(h));
__SetPageUptodate(new_page);

mmu_notifier_range_init(&range, mm, haddr, haddr + huge_page_size(h));
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, haddr,
haddr + huge_page_size(h));
mmu_notifier_invalidate_range_start(&range);

/*
Expand Down Expand Up @@ -4408,7 +4411,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* start/end. Set range.start/range.end to cover the maximum possible
* range if PMD sharing is possible.
*/
mmu_notifier_range_init(&range, mm, start, end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
end);
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);

BUG_ON(address >= end);
Expand Down
3 changes: 2 additions & 1 deletion mm/khugepaged.c
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,8 @@ static void collapse_huge_page(struct mm_struct *mm,
pte = pte_offset_map(pmd, address);
pte_ptl = pte_lockptr(mm, pmd);

mmu_notifier_range_init(&range, mm, address, address + HPAGE_PMD_SIZE);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm,
address, address + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
/*
Expand Down
6 changes: 4 additions & 2 deletions mm/ksm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,

BUG_ON(PageTransCompound(page));

mmu_notifier_range_init(&range, mm, pvmw.address,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm,
pvmw.address,
pvmw.address + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);

Expand Down Expand Up @@ -1154,7 +1155,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
if (!pmd)
goto out;

mmu_notifier_range_init(&range, mm, addr, addr + PAGE_SIZE);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr,
addr + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);

ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
Expand Down
3 changes: 2 additions & 1 deletion mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
range.end = min(vma->vm_end, end_addr);
if (range.end <= vma->vm_start)
return -EINVAL;
mmu_notifier_range_init(&range, mm, range.start, range.end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm,
range.start, range.end);

lru_add_drain();
tlb_gather_mmu(&tlb, mm, range.start, range.end);
Expand Down
25 changes: 16 additions & 9 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
is_cow = is_cow_mapping(vma->vm_flags);

if (is_cow) {
mmu_notifier_range_init(&range, src_mm, addr, end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma,
src_mm, addr, end);
mmu_notifier_invalidate_range_start(&range);
}

Expand Down Expand Up @@ -1334,7 +1335,8 @@ void unmap_vmas(struct mmu_gather *tlb,
{
struct mmu_notifier_range range;

mmu_notifier_range_init(&range, vma->vm_mm, start_addr, end_addr);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
Expand All @@ -1356,7 +1358,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
struct mmu_gather tlb;

lru_add_drain();
mmu_notifier_range_init(&range, vma->vm_mm, start, start + size);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
start, start + size);
tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
Expand All @@ -1382,7 +1385,8 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
struct mmu_gather tlb;

lru_add_drain();
mmu_notifier_range_init(&range, vma->vm_mm, address, address + size);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
address, address + size);
tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
Expand Down Expand Up @@ -2279,7 +2283,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)

__SetPageUptodate(new_page);

mmu_notifier_range_init(&range, mm, vmf->address & PAGE_MASK,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm,
vmf->address & PAGE_MASK,
(vmf->address & PAGE_MASK) + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);

Expand Down Expand Up @@ -4104,8 +4109,9 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
goto out;

if (range) {
mmu_notifier_range_init(range, mm, address & PMD_MASK,
(address & PMD_MASK) + PMD_SIZE);
mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0,
NULL, mm, address & PMD_MASK,
(address & PMD_MASK) + PMD_SIZE);
mmu_notifier_invalidate_range_start(range);
}
*ptlp = pmd_lock(mm, pmd);
Expand All @@ -4122,8 +4128,9 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
goto out;

if (range) {
mmu_notifier_range_init(range, mm, address & PAGE_MASK,
(address & PAGE_MASK) + PAGE_SIZE);
mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0, NULL, mm,
address & PAGE_MASK,
(address & PAGE_MASK) + PAGE_SIZE);
mmu_notifier_invalidate_range_start(range);
}
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
Expand Down
5 changes: 4 additions & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2356,7 +2356,8 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
mm_walk.mm = migrate->vma->vm_mm;
mm_walk.private = migrate;

mmu_notifier_range_init(&range, mm_walk.mm, migrate->start,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm_walk.mm,
migrate->start,
migrate->end);
mmu_notifier_invalidate_range_start(&range);
walk_page_range(migrate->start, migrate->end, &mm_walk);
Expand Down Expand Up @@ -2764,6 +2765,8 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
notified = true;

mmu_notifier_range_init(&range,
MMU_NOTIFY_UNMAP, 0,
NULL,
migrate->vma->vm_mm,
addr, migrate->end);
mmu_notifier_invalidate_range_start(&range);
Expand Down
3 changes: 2 additions & 1 deletion mm/mprotect.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,

/* invoke the mmu notifier if the pmd is populated */
if (!range.start) {
mmu_notifier_range_init(&range, vma->vm_mm, addr, end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
vma, vma->vm_mm, addr, end);
mmu_notifier_invalidate_range_start(&range);
}

Expand Down
3 changes: 2 additions & 1 deletion mm/mremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);

mmu_notifier_range_init(&range, vma->vm_mm, old_addr, old_end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
old_addr, old_end);
mmu_notifier_invalidate_range_start(&range);

for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
Expand Down
3 changes: 2 additions & 1 deletion mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,8 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
struct mmu_notifier_range range;
struct mmu_gather tlb;

mmu_notifier_range_init(&range, mm, vma->vm_start,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
vma, mm, vma->vm_start,
vma->vm_end);
tlb_gather_mmu(&tlb, mm, range.start, range.end);
if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
Expand Down
6 changes: 4 additions & 2 deletions mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,8 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
mmu_notifier_range_init(&range, vma->vm_mm, address,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
address,
min(vma->vm_end, address +
(PAGE_SIZE << compound_order(page))));
mmu_notifier_invalidate_range_start(&range);
Expand Down Expand Up @@ -1371,7 +1372,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
mmu_notifier_range_init(&range, vma->vm_mm, address,
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
address,
min(vma->vm_end, address +
(PAGE_SIZE << compound_order(page))));
if (PageHuge(page)) {
Expand Down

0 comments on commit 6f4f13e

Please sign in to comment.