Skip to content

Commit

Permalink
hugetlb: take PMD sharing into account when flushing tlb/caches
Browse files Browse the repository at this point in the history
When fixing an issue with PMD sharing and migration, it was discovered via
code inspection that other callers of huge_pmd_unshare potentially have an
issue with cache and tlb flushing.

Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst
case ranges for mmu notifiers.  Ensure that this range is flushed if
huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Mike Kravetz <[email protected]>
Acked-by: Kirill A. Shutemov <[email protected]>
Reviewed-by: Naoya Horiguchi <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Jerome Glisse <[email protected]>
Cc: Mike Kravetz <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
  • Loading branch information
mjkravetz authored and gregkh committed Oct 5, 2018
1 parent 017b166 commit dff11ab
Showing 1 changed file with 44 additions and 9 deletions.
53 changes: 44 additions & 9 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
const unsigned long mmun_start = start; /* For mmu_notifiers */
const unsigned long mmun_end = end; /* For mmu_notifiers */
unsigned long mmun_start = start; /* For mmu_notifiers */
unsigned long mmun_end = end; /* For mmu_notifiers */

WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
Expand All @@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
tlb_remove_check_page_size_change(tlb, sz);
tlb_start_vma(tlb, vma);

/*
* If sharing possible, alert mmu notifiers of worst case.
*/
adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
for (; address < end; address += sz) {
Expand All @@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
* The caller's TLB flush range should cover this area.
*/
continue;
}

Expand Down Expand Up @@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm;
struct mmu_gather tlb;
unsigned long tlb_start = start;
unsigned long tlb_end = end;

/*
* If shared PMDs were possibly used within this vma range, adjust
* start/end for worst case tlb flushing.
* Note that we can not be sure if PMDs are shared until we try to
* unmap pages. However, we want to make sure TLB flushing covers
* the largest possible range.
*/
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);

mm = vma->vm_mm;

tlb_gather_mmu(&tlb, mm, start, end);
tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
tlb_finish_mmu(&tlb, start, end);
tlb_finish_mmu(&tlb, tlb_start, tlb_end);
}

/*
Expand Down Expand Up @@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
unsigned long pages = 0;
unsigned long f_start = start;
unsigned long f_end = end;
bool shared_pmd = false;

/*
* In the case of shared PMDs, the area to flush could be beyond
* start/end. Set f_start/f_end to cover the maximum possible
* range if PMD sharing is possible.
*/
adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);

BUG_ON(address >= end);
flush_cache_range(vma, address, end);
flush_cache_range(vma, f_start, f_end);

mmu_notifier_invalidate_range_start(mm, start, end);
mmu_notifier_invalidate_range_start(mm, f_start, f_end);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
Expand All @@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (huge_pmd_unshare(mm, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
continue;
}
pte = huge_ptep_get(ptep);
Expand Down Expand Up @@ -4348,17 +4379,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table:
* once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk.
* and that page table be reused and filled with junk. If we actually
* did unshare a page of pmds, flush the range corresponding to the pud.
*/
flush_hugetlb_tlb_range(vma, start, end);
if (shared_pmd)
flush_hugetlb_tlb_range(vma, f_start, f_end);
else
flush_hugetlb_tlb_range(vma, start, end);
/*
* No need to call mmu_notifier_invalidate_range() we are downgrading
* page table protection not changing it to point to a new page.
*
* See Documentation/vm/mmu_notifier.rst
*/
i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
mmu_notifier_invalidate_range_end(mm, f_start, f_end);

return pages << h->order;
}
Expand Down

0 comments on commit dff11ab

Please sign in to comment.