Skip to content

Commit

Permalink
mm: describe mmap_sem rules for __lock_page_or_retry() and callers
Browse files Browse the repository at this point in the history
Add a comment describing the circumstances in which
__lock_page_or_retry() will or will not release the mmap_sem when
returning 0.

Add comments to lock_page_or_retry()'s callers (filemap_fault(),
do_swap_page()) noting the impact on VM_FAULT_RETRY returns.

Add comments on up the call tree, particularly replacing the false "We
return with mmap_sem still held" comments.

Signed-off-by: Paul Cassella <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Paul Cassella authored and torvalds committed Aug 7, 2014
1 parent 4ffeaf3 commit 9a95f3c
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 8 deletions.
3 changes: 2 additions & 1 deletion arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault:
* the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
* we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/
fault = handle_mm_fault(mm, vma, address, flags);

Expand Down
3 changes: 3 additions & 0 deletions include/linux/pagemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
*
* Return value and mmap_sem implications depend on flags; see
* __lock_page_or_retry().
*/
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
Expand Down
23 changes: 23 additions & 0 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
}
EXPORT_SYMBOL_GPL(__lock_page_killable);

/*
* Return values:
* 1 - page is locked; mmap_sem is still held.
* 0 - page is not locked.
* mmap_sem has been released (up_read()), unless flags had both
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
* which case mmap_sem is still held.
*
* If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
* with the page locked and the mmap_sem unperturbed.
*/
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
Expand Down Expand Up @@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
* The goto's are kind of ugly, but this streamlines the normal case of having
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*
* vma->vm_mm->mmap_sem must be held on entry.
*
* If our return value has VM_FAULT_RETRY set, it's because
* lock_page_or_retry() returned 0.
* The mmap_sem has usually been released in this case.
* See __lock_page_or_retry() for the exception.
*
* If our return value does not have VM_FAULT_RETRY set, the mmap_sem
* has not been released.
*
* We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/
int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
Expand Down
18 changes: 15 additions & 3 deletions mm/gup.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
return ret;
}

/*
* mmap_sem must be held on entry. If @nonblocking != NULL and
* *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
* If it is, *@nonblocking will be set to 0 and -EBUSY returned.
*/
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking)
{
Expand Down Expand Up @@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
* Must be called with mmap_sem held for read or write.
* Must be called with mmap_sem held. It may be released. See below.
*
* __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
Expand All @@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
*
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO
* or mmap_sem contention, and if waiting is needed to pin all pages,
* *@nonblocking will be set to 0.
* *@nonblocking will be set to 0. Further, if @gup_flags does not
* include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
* this case.
*
* A caller using such a combination of @nonblocking and @gup_flags
* must therefore hold the mmap_sem for reading only, and recognize
* when it's been released. Otherwise, it must be held for either
* reading or writing and will not be released.
*
* In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if
Expand Down Expand Up @@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
* such architectures, gup() will not be enough to make a subsequent access
* succeed.
*
* This should be called with the mm_sem held for read.
* This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
*/
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags)
Expand Down
34 changes: 31 additions & 3 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
* We return with pte unmapped and unlocked.
*
* We return with the mmap_sem locked or unlocked in the same cases
* as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
Expand Down Expand Up @@ -2688,6 +2691,11 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
}

/*
* The mmap_sem must have been held on entry, and may have been
* released depending on flags and vma->vm_ops->fault() return value.
* See filemap_fault() and __lock_page_retry().
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
{
Expand Down Expand Up @@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}

/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults).
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
Expand All @@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
* We return with pte unmapped and unlocked.
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
Expand Down Expand Up @@ -3172,7 +3188,10 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
* We return with pte unmapped and unlocked.
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
Expand Down Expand Up @@ -3232,6 +3251,9 @@ static int handle_pte_fault(struct mm_struct *mm,

/*
* By the time we get here, we already hold the mm semaphore
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
Expand Down Expand Up @@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}

/*
* By the time we get here, we already hold the mm semaphore
*
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
Expand Down
9 changes: 8 additions & 1 deletion mm/mlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,19 @@ unsigned int munlock_vma_page(struct page *page)
* @vma: target vma
* @start: start address
* @end: end address
* @nonblocking:
*
* This takes care of making the pages present too.
*
* return 0 on success, negative error code on error.
*
* vma->vm_mm->mmap_sem must be held for at least read.
* vma->vm_mm->mmap_sem must be held.
*
* If @nonblocking is NULL, it may be held for read or write and will
* be unperturbed.
*
* If @nonblocking is non-NULL, it must held for read only and may be
* released. If it's released, *@nonblocking will be set to 0.
*/
long __mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *nonblocking)
Expand Down

0 comments on commit 9a95f3c

Please sign in to comment.