Skip to content

Commit

Permalink
userfaultfd: wp: add WP pagetable tracking to x86
Browse files Browse the repository at this point in the history
Accurate userfaultfd WP tracking is possible by tracking exactly which
virtual memory ranges were writeprotected by userland.  We can't relay
only on the RW bit of the mapped pagetable because that information is
destroyed by fork() or KSM or swap.  If we were to relay on that, we'd
need to stay on the safe side and generate false positive wp faults for
every swapped out page.

[[email protected]: append _PAGE_UFD_WP to _PAGE_CHG_MASK]
Signed-off-by: Andrea Arcangeli <[email protected]>
Signed-off-by: Peter Xu <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Jerome Glisse <[email protected]>
Reviewed-by: Mike Rapoport <[email protected]>
Cc: Bobby Powers <[email protected]>
Cc: Brian Geffon <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Denis Plotnikov <[email protected]>
Cc: "Dr . David Alan Gilbert" <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: "Kirill A . Shutemov" <[email protected]>
Cc: Martin Cracauer <[email protected]>
Cc: Marty McFadden <[email protected]>
Cc: Maya Gokhale <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Mike Kravetz <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Shaohua Li <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
aagit authored and torvalds committed Apr 7, 2020
1 parent 529b930 commit 5a28106
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 2 deletions.
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ config X86
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_ASM_MODVERSIONS
Expand Down
52 changes: 52 additions & 0 deletions arch/x86/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <asm/x86_init.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/api.h>
#include <asm-generic/pgtable_uffd.h>

extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
Expand Down Expand Up @@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
return native_make_pte(v & ~clear);
}

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
return pte_flags(pte) & _PAGE_UFFD_WP;
}

static inline pte_t pte_mkuffd_wp(pte_t pte)
{
return pte_set_flags(pte, _PAGE_UFFD_WP);
}

static inline pte_t pte_clear_uffd_wp(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

static inline pte_t pte_mkclean(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_DIRTY);
Expand Down Expand Up @@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
return native_make_pmd(v & ~clear);
}

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pmd_uffd_wp(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_UFFD_WP;
}

static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_UFFD_WP);
}

static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

static inline pmd_t pmd_mkold(pmd_t pmd)
{
return pmd_clear_flags(pmd, _PAGE_ACCESSED);
Expand Down Expand Up @@ -1374,6 +1409,23 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#endif
#endif

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
}

static inline int pte_swp_uffd_wp(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
}

static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

#define PKRU_AD_BIT 0x1
#define PKRU_WD_BIT 0x2
#define PKRU_BITS_PER_PKEY 2
Expand Down
8 changes: 7 additions & 1 deletion arch/x86/include/asm/pgtable_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,23 @@ extern void sync_global_pgds(unsigned long start, unsigned long end);
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
* | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry
* | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
* there. We also need to avoid using A and D because of an
* erratum where they can be incorrectly set by hardware on
* non-present PTEs.
*
* SD Bits 1-4 are not used in non-present format and available for
* special use described below:
*
* SD (1) in swp entry is used to store soft dirty bit, which helps us
* remember soft dirty over page migration
*
* F (2) in swp entry is used to record when a pagetable is
* writeprotected by userfaultfd WP support.
*
* Bit 7 in swp entry should be 0 because pmd_present checks not only P,
* but also L and G.
*
Expand Down
12 changes: 11 additions & 1 deletion arch/x86/include/asm/pgtable_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4

Expand Down Expand Up @@ -100,6 +101,14 @@
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
#endif

#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP)
#define _PAGE_SWP_UFFD_WP _PAGE_USER
#else
#define _PAGE_UFFD_WP (_AT(pteval_t, 0))
#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0))
#endif

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP)
Expand All @@ -118,7 +127,8 @@
*/
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC)
_PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
_PAGE_UFFD_WP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)

/*
Expand Down
1 change: 1 addition & 0 deletions include/asm-generic/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/mm_types.h>
#include <linux/bug.h>
#include <linux/errno.h>
#include <asm-generic/pgtable_uffd.h>

#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
Expand Down
51 changes: 51 additions & 0 deletions include/asm-generic/pgtable_uffd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#ifndef _ASM_GENERIC_PGTABLE_UFFD_H
#define _ASM_GENERIC_PGTABLE_UFFD_H

#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static __always_inline int pte_uffd_wp(pte_t pte)
{
return 0;
}

static __always_inline int pmd_uffd_wp(pmd_t pmd)
{
return 0;
}

static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
{
return pte;
}

static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
{
return pmd;
}

static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
{
return pte;
}

static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
{
return pmd;
}

static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte)
{
return pte;
}

static __always_inline int pte_swp_uffd_wp(pte_t pte)
{
return 0;
}

static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
{
return pte;
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */

#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */
5 changes: 5 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,11 @@ config ADVISE_SYSCALLS
applications use these syscalls, you can disable this option to save
space.

config HAVE_ARCH_USERFAULTFD_WP
bool
help
Arch has userfaultfd write protection support

config MEMBARRIER
bool "Enable membarrier() system call" if EXPERT
default y
Expand Down

0 comments on commit 5a28106

Please sign in to comment.