Skip to content

Commit

Permalink
userfaultfd: wp: add the writeprotect API to userfaultfd ioctl
Browse files Browse the repository at this point in the history
Introduce the new uffd-wp APIs for userspace.

Firstly, we'll allow to do UFFDIO_REGISTER with write protection tracking
using the new UFFDIO_REGISTER_MODE_WP flag.  Note that this flag can
co-exist with the existing UFFDIO_REGISTER_MODE_MISSING, in which case the
userspace program can not only resolve missing page faults, and at the
same time tracking page data changes along the way.

Secondly, we introduced the new UFFDIO_WRITEPROTECT API to do page level
write protection tracking.  Note that we will need to register the memory
region with UFFDIO_REGISTER_MODE_WP before that.

[[email protected]: write up the commit message]
[[email protected]: remove useless block, write commit message, check against
 VM_MAYWRITE rather than VM_WRITE when register]
Signed-off-by: Andrea Arcangeli <[email protected]>
Signed-off-by: Peter Xu <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Jerome Glisse <[email protected]>
Cc: Bobby Powers <[email protected]>
Cc: Brian Geffon <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Denis Plotnikov <[email protected]>
Cc: "Dr . David Alan Gilbert" <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: "Kirill A . Shutemov" <[email protected]>
Cc: Martin Cracauer <[email protected]>
Cc: Marty McFadden <[email protected]>
Cc: Maya Gokhale <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Mike Kravetz <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Shaohua Li <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
aagit authored and torvalds committed Apr 7, 2020
1 parent ffd0579 commit 63b2d41
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 16 deletions.
82 changes: 66 additions & 16 deletions fs/userfaultfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,11 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
if (!pmd_present(_pmd))
goto out;

if (pmd_trans_huge(_pmd))
if (pmd_trans_huge(_pmd)) {
if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
ret = true;
goto out;
}

/*
* the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
Expand All @@ -328,6 +331,8 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
*/
if (pte_none(*pte))
ret = true;
if (!pte_write(*pte) && (reason & VM_UFFD_WP))
ret = true;
pte_unmap(pte);

out:
Expand Down Expand Up @@ -1287,10 +1292,13 @@ static __always_inline int validate_range(struct mm_struct *mm,
return 0;
}

static inline bool vma_can_userfault(struct vm_area_struct *vma)
static inline bool vma_can_userfault(struct vm_area_struct *vma,
unsigned long vm_flags)
{
return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
vma_is_shmem(vma);
/* FIXME: add WP support to hugetlbfs and shmem */
return vma_is_anonymous(vma) ||
((is_vm_hugetlb_page(vma) || vma_is_shmem(vma)) &&
!(vm_flags & VM_UFFD_WP));
}

static int userfaultfd_register(struct userfaultfd_ctx *ctx,
Expand Down Expand Up @@ -1322,15 +1330,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vm_flags = 0;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)
vm_flags |= VM_UFFD_WP;
/*
* FIXME: remove the below error constraint by
* implementing the wprotect tracking mode.
*/
ret = -EINVAL;
goto out;
}

ret = validate_range(mm, &uffdio_register.range.start,
uffdio_register.range.len);
Expand Down Expand Up @@ -1380,7 +1381,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,

/* check not compatible vmas */
ret = -EINVAL;
if (!vma_can_userfault(cur))
if (!vma_can_userfault(cur, vm_flags))
goto out_unlock;

/*
Expand Down Expand Up @@ -1408,6 +1409,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (end & (vma_hpagesize - 1))
goto out_unlock;
}
if ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE))
goto out_unlock;

/*
* Check that this vma isn't already owned by a
Expand Down Expand Up @@ -1437,7 +1440,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
do {
cond_resched();

BUG_ON(!vma_can_userfault(vma));
BUG_ON(!vma_can_userfault(vma, vm_flags));
BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx);
WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
Expand Down Expand Up @@ -1575,7 +1578,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* provides for more strict behavior to notice
* unregistration errors.
*/
if (!vma_can_userfault(cur))
if (!vma_can_userfault(cur, cur->vm_flags))
goto out_unlock;

found = true;
Expand All @@ -1589,7 +1592,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
do {
cond_resched();

BUG_ON(!vma_can_userfault(vma));
BUG_ON(!vma_can_userfault(vma, vma->vm_flags));

/*
* Nothing to do: this vma is already registered into this
Expand Down Expand Up @@ -1802,6 +1805,50 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
return ret;
}

static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
unsigned long arg)
{
int ret;
struct uffdio_writeprotect uffdio_wp;
struct uffdio_writeprotect __user *user_uffdio_wp;
struct userfaultfd_wake_range range;

if (READ_ONCE(ctx->mmap_changing))
return -EAGAIN;

user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;

if (copy_from_user(&uffdio_wp, user_uffdio_wp,
sizeof(struct uffdio_writeprotect)))
return -EFAULT;

ret = validate_range(ctx->mm, &uffdio_wp.range.start,
uffdio_wp.range.len);
if (ret)
return ret;

if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
UFFDIO_WRITEPROTECT_MODE_WP))
return -EINVAL;
if ((uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP) &&
(uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE))
return -EINVAL;

ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len, uffdio_wp.mode &
UFFDIO_WRITEPROTECT_MODE_WP,
&ctx->mmap_changing);
if (ret)
return ret;

if (!(uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE)) {
range.start = uffdio_wp.range.start;
range.len = uffdio_wp.range.len;
wake_userfault(ctx, &range);
}
return ret;
}

static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
Expand Down Expand Up @@ -1883,6 +1930,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
case UFFDIO_ZEROPAGE:
ret = userfaultfd_zeropage(ctx, arg);
break;
case UFFDIO_WRITEPROTECT:
ret = userfaultfd_writeprotect(ctx, arg);
break;
}
return ret;
}
Expand Down
23 changes: 23 additions & 0 deletions include/uapi/linux/userfaultfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#define _UFFDIO_WAKE (0x02)
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_ZEROPAGE (0x04)
#define _UFFDIO_WRITEPROTECT (0x06)
#define _UFFDIO_API (0x3F)

/* userfaultfd ioctl ids */
Expand All @@ -68,6 +69,8 @@
struct uffdio_copy)
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
struct uffdio_zeropage)
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
struct uffdio_writeprotect)

/* read() structure */
struct uffd_msg {
Expand Down Expand Up @@ -232,4 +235,24 @@ struct uffdio_zeropage {
__s64 zeropage;
};

struct uffdio_writeprotect {
struct uffdio_range range;
/*
* UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
* unset the flag to undo protection of a range which was previously
* write protected.
*
* UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
* any wait thread after the operation succeeds.
*
* NOTE: Write protecting a region (WP=1) is unrelated to page faults,
* therefore DONTWAKE flag is meaningless with WP=1. Removing write
* protection (WP=0) in response to a page fault wakes the faulting
* task unless DONTWAKE is set.
*/
#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
__u64 mode;
};

#endif /* _LINUX_USERFAULTFD_H */

0 comments on commit 63b2d41

Please sign in to comment.