Skip to content

Commit

Permalink
hugetlbfs: remove call to huge_pte_alloc without i_mmap_rwsem
Browse files Browse the repository at this point in the history
Commit c0d0381 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization") requires callers of huge_pte_alloc to hold i_mmap_rwsem
in at least read mode.  This is because the explicit locking in
huge_pmd_share (called by huge_pte_alloc) was removed.  When restructuring
the code, the call to huge_pte_alloc in the else block at the beginning of
hugetlb_fault was missed.

Unfortunately, that else clause is exercised when there is no page table
entry.  This will likely lead to a call to huge_pmd_share.  If
huge_pmd_share thinks pmd sharing is possible, it will traverse the
mapping tree (i_mmap) without holding i_mmap_rwsem.  If someone else is
modifying the tree, bad things such as addressing exceptions or worse
could happen.

Simply remove the else clause.  It should have been removed previously.
The code following the else will call huge_pte_alloc with the appropriate
locking.

To prevent this type of issue in the future, add routines to assert that
i_mmap_rwsem is held, and call these routines in huge pmd sharing
routines.

Fixes: c0d0381 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Suggested-by: Matthew Wilcox <[email protected]>
Signed-off-by: Mike Kravetz <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: "Aneesh Kumar K.V" <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: "Kirill A.Shutemov" <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Prakash Sangappa <[email protected]>
Cc: <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
mjkravetz authored and torvalds committed Aug 12, 2020
1 parent 1556829 commit 34ae204
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 12 deletions.
10 changes: 10 additions & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping)
up_read(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_assert_locked(struct address_space *mapping)
{
lockdep_assert_held(&mapping->i_mmap_rwsem);
}

static inline void i_mmap_assert_write_locked(struct address_space *mapping)
{
lockdep_assert_held_write(&mapping->i_mmap_rwsem);
}

/*
* Might pages of this file be mapped into userspace?
*/
Expand Down
8 changes: 5 additions & 3 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
Expand Down Expand Up @@ -203,8 +204,9 @@ static inline struct address_space *hugetlb_page_mapping_lock_write(
return NULL;
}

static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
pte_t *ptep)
static inline int huge_pmd_unshare(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long *addr, pte_t *ptep)
{
return 0;
}
Expand Down
15 changes: 7 additions & 8 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3967,7 +3967,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
continue;

ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) {
if (huge_pmd_unshare(mm, vma, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
Expand Down Expand Up @@ -4554,10 +4554,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
} else {
ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
if (!ptep)
return VM_FAULT_OOM;
}

/*
Expand Down Expand Up @@ -5034,7 +5030,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) {
if (huge_pmd_unshare(mm, vma, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
Expand Down Expand Up @@ -5415,12 +5411,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
* returns: 1 successfully unmapped a shared pte page
* 0 the underlying pte page is not shared, or it is the last user
*/
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long *addr, pte_t *ptep)
{
pgd_t *pgd = pgd_offset(mm, *addr);
p4d_t *p4d = p4d_offset(pgd, *addr);
pud_t *pud = pud_offset(p4d, *addr);

i_mmap_assert_write_locked(vma->vm_file->f_mapping);
BUG_ON(page_count(virt_to_page(ptep)) == 0);
if (page_count(virt_to_page(ptep)) == 1)
return 0;
Expand All @@ -5438,7 +5436,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return NULL;
}

int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long *addr, pte_t *ptep)
{
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* do this outside rmap routines.
*/
VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
Expand Down

0 comments on commit 34ae204

Please sign in to comment.