Skip to content

Commit

Permalink
mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY …
Browse files Browse the repository at this point in the history
…cleared

For VMAs that don't want write notifications, PTEs created for read faults
have their write bit set.  If the read fault happens after VM_SOFTDIRTY is
cleared, then the PTE's softdirty bit will remain clear after subsequent
writes.

Here's a simple code snippet to demonstrate the bug:

  char* m = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
                 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
  system("echo 4 > /proc/$PPID/clear_refs"); /* clear VM_SOFTDIRTY */
  assert(*m == '\0');     /* new PTE allows write access */
  assert(!soft_dirty(x));
  *m = 'x';               /* should dirty the page */
  assert(soft_dirty(x));  /* fails */

With this patch, write notifications are enabled when VM_SOFTDIRTY is
cleared.  Furthermore, to avoid unnecessary faults, write notifications
are disabled when VM_SOFTDIRTY is set.

As a side effect of enabling and disabling write notifications with
care, this patch fixes a bug in mprotect where vm_page_prot bits set by
drivers were zapped on mprotect.  An analogous bug was fixed in mmap by
commit c9d0bf2 ("mm: uncached vma support with writenotify").

Signed-off-by: Peter Feiner <[email protected]>
Reported-by: Peter Feiner <[email protected]>
Suggested-by: Kirill A. Shutemov <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Jamie Liu <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Naoya Horiguchi <[email protected]>
Cc: Bjorn Helgaas <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
peterfeiner authored and torvalds committed Oct 14, 2014
1 parent 63a12d9 commit 64e4550
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 38 deletions.
19 changes: 14 additions & 5 deletions fs/proc/task_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,21 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.private = &cp,
};
down_read(&mm->mmap_sem);
if (type == CLEAR_REFS_SOFT_DIRTY)
if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!(vma->vm_flags & VM_SOFTDIRTY))
continue;
up_read(&mm->mmap_sem);
down_write(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
}
downgrade_write(&mm->mmap_sem);
break;
}
mmu_notifier_invalidate_range_start(mm, 0, -1);
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cp.vma = vma;
if (is_vm_hugetlb_page(vma))
Expand All @@ -848,10 +861,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
continue;
if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
continue;
if (type == CLEAR_REFS_SOFT_DIRTY) {
if (vma->vm_flags & VM_SOFTDIRTY)
vma->vm_flags &= ~VM_SOFTDIRTY;
}
walk_page_range(vma->vm_start, vma->vm_end,
&clear_refs_walk);
}
Expand Down
14 changes: 14 additions & 0 deletions include/asm-generic/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,20 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
#define pgprot_device pgprot_noncached
#endif

#ifndef pgprot_modify
#define pgprot_modify pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
newprot = pgprot_noncached(newprot);
if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
newprot = pgprot_writecombine(newprot);
if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
newprot = pgprot_device(newprot);
return newprot;
}
#endif

/*
* When walking page tables, get the address of the next boundary,
* or the end address of the range if that comes earlier. Although no
Expand Down
5 changes: 5 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1974,11 +1974,16 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,

#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma);
#else
static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
return __pgprot(0);
}
static inline void vma_set_page_prot(struct vm_area_struct *vma)
{
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
}
#endif

#ifdef CONFIG_NUMA_BALANCING
Expand Down
3 changes: 2 additions & 1 deletion mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
/*
* VM_MIXEDMAP !pfn_valid() case
* VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
* VM_PFNMAP VMA.
*
* We should not cow pages in a shared writeable mapping.
* Just mark the pages writable as we can't do any dirty
Expand Down
45 changes: 28 additions & 17 deletions mm/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
}
EXPORT_SYMBOL(vm_get_page_prot);

static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
}

/* Update vma->vm_page_prot to reflect vma->vm_flags. */
void vma_set_page_prot(struct vm_area_struct *vma)
{
unsigned long vm_flags = vma->vm_flags;

vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
if (vma_wants_writenotify(vma)) {
vm_flags &= ~VM_SHARED;
vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
vm_flags);
}
}


int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
unsigned long sysctl_overcommit_kbytes __read_mostly;
Expand Down Expand Up @@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
if (vma->vm_ops && vma->vm_ops->page_mkwrite)
return 1;

/* The open routine did something to the protections already? */
/* The open routine did something to the protections that pgprot_modify
* won't preserve? */
if (pgprot_val(vma->vm_page_prot) !=
pgprot_val(vm_get_page_prot(vm_flags)))
pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
return 0;

/* Do we need to track softdirty? */
if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
return 1;

/* Specialty mapping? */
if (vm_flags & VM_PFNMAP)
return 0;
Expand Down Expand Up @@ -1615,21 +1639,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
goto free_vma;
}

if (vma_wants_writenotify(vma)) {
pgprot_t pprot = vma->vm_page_prot;

/* Can vma->vm_page_prot have changed??
*
* Answer: Yes, drivers may have changed it in their
* f_op->mmap method.
*
* Ensures that vmas marked as uncached stay that way.
*/
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
}

vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
if (file) {
Expand Down Expand Up @@ -1663,6 +1672,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
*/
vma->vm_flags |= VM_SOFTDIRTY;

vma_set_page_prot(vma);

return addr;

unmap_and_free_vma:
Expand Down
20 changes: 5 additions & 15 deletions mm/mprotect.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,6 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

#ifndef pgprot_modify
static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
{
return newprot;
}
#endif

/*
* For a prot_numa update we only hold mmap_sem for read so there is a
* potential race with faulting where a pmd was temporarily none. This
Expand Down Expand Up @@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* Avoid taking write faults for pages we
* know to be dirty.
*/
if (dirty_accountable && pte_dirty(ptent))
if (dirty_accountable && pte_dirty(ptent) &&
(pte_soft_dirty(ptent) ||
!(vma->vm_flags & VM_SOFTDIRTY)))
ptent = pte_mkwrite(ptent);
ptep_modify_prot_commit(mm, addr, pte, ptent);
updated = true;
Expand Down Expand Up @@ -320,13 +315,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* held in write mode.
*/
vma->vm_flags = newflags;
vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
vm_get_page_prot(newflags));

if (vma_wants_writenotify(vma)) {
vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
dirty_accountable = 1;
}
dirty_accountable = vma_wants_writenotify(vma);
vma_set_page_prot(vma);

change_protection(vma, start, end, vma->vm_page_prot,
dirty_accountable, 0);
Expand Down

0 comments on commit 64e4550

Please sign in to comment.