Skip to content

Commit

Permalink
x86/mm: Split read_cr3() into read_cr3_pa() and __read_cr3()
Browse files Browse the repository at this point in the history
The kernel has several code paths that read CR3.  Most of them assume that
CR3 contains the PGD's physical address, whereas some of them awkwardly
use PHYSICAL_PAGE_MASK to mask off low bits.

Add explicit mask macros for CR3 and convert all of the CR3 readers.
This will keep them from breaking when PCID is enabled.

Signed-off-by: Andy Lutomirski <[email protected]>
Cc: Boris Ostrovsky <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Juergen Gross <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: xen-devel <[email protected]>
Link: http://lkml.kernel.org/r/883f8fb121f4616c1c1427ad87350bb2f5ffeca1.1497288170.git.luto@kernel.org
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
amluto authored and Ingo Molnar committed Jun 13, 2017
1 parent 3f365cf commit 6c690ee
Show file tree
Hide file tree
Showing 20 changed files with 79 additions and 29 deletions.
2 changes: 1 addition & 1 deletion arch/x86/boot/compressed/pagetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void initialize_identity_maps(void)
* and we must append to the existing area instead of entirely
* overwriting it.
*/
level4p = read_cr3();
level4p = read_cr3_pa();
if (level4p == (unsigned long)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/efi.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct efi_scratch {
__kernel_fpu_begin(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = read_cr3(); \
efi_scratch.prev_cr3 = __read_cr3(); \
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,

/*
* This can be used from process context to figure out what the value of
* CR3 is without needing to do a (slow) read_cr3().
* CR3 is without needing to do a (slow) __read_cr3().
*
* It's intended to be used for code like KVM that sneakily changes CR3
* and needs to restore it. It needs to be used very carefully.
Expand All @@ -281,7 +281,7 @@ static inline unsigned long __get_current_cr3_fast(void)
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());

VM_BUG_ON(cr3 != read_cr3());
VM_BUG_ON(cr3 != __read_cr3());
return cr3;
}

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/paravirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
}

static inline unsigned long read_cr3(void)
static inline unsigned long __read_cr3(void)
{
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
}
Expand Down
36 changes: 36 additions & 0 deletions arch/x86/include/asm/processor-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,40 @@
#else
#define X86_VM_MASK 0 /* No VM86 support */
#endif

/*
* CR3's layout varies depending on several things.
*
* If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID.
* If PAE is enabled, then CR3[11:5] is part of the PDPT address
* (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored.
* Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and
* CR3[2:0] and CR3[11:5] are ignored.
*
* In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD.
*
* CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be
* written as 1 to prevent the write to CR3 from flushing the TLB.
*
* On systems with SME, one bit (in a variable position!) is stolen to indicate
* that the top-level paging structure is encrypted.
*
* All of the remaining bits indicate the physical address of the top-level
* paging structure.
*
* CR3_ADDR_MASK is the mask used by read_cr3_pa().
*/
#ifdef CONFIG_X86_64
/* Mask off the address space ID bits. */
#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
#define CR3_PCID_MASK 0xFFFull
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
* a tiny bit of code size by setting all the bits.
*/
#define CR3_ADDR_MASK 0xFFFFFFFFull
#define CR3_PCID_MASK 0ull
#endif

#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
8 changes: 8 additions & 0 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,14 @@ native_cpuid_reg(ebx)
native_cpuid_reg(ecx)
native_cpuid_reg(edx)

/*
* Friendlier CR3 helpers.
*/
static inline unsigned long read_cr3_pa(void)
{
return __read_cr3() & CR3_ADDR_MASK;
}

static inline void load_cr3(pgd_t *pgdir)
{
write_cr3(__pa(pgdir));
Expand Down
10 changes: 7 additions & 3 deletions arch/x86/include/asm/special_insns.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static inline void native_write_cr2(unsigned long val)
asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
}

static inline unsigned long native_read_cr3(void)
static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
Expand Down Expand Up @@ -159,9 +159,13 @@ static inline void write_cr2(unsigned long x)
native_write_cr2(x);
}

static inline unsigned long read_cr3(void)
/*
* Careful! CR3 contains more than just an address. You probably want
* read_cr3_pa() instead.
*/
static inline unsigned long __read_cr3(void)
{
return native_read_cr3();
return __native_read_cr3();
}

static inline void write_cr3(unsigned long x)
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ static inline void __native_flush_tlb(void)
* back:
*/
preempt_disable();
native_write_cr3(native_read_cr3());
native_write_cr3(__native_read_cr3());
preempt_enable();
}

Expand Down Expand Up @@ -264,7 +264,7 @@ static inline void reset_lazy_tlbstate(void)
this_cpu_write(cpu_tlbstate.state, 0);
this_cpu_write(cpu_tlbstate.loaded_mm, &init_mm);

WARN_ON(read_cr3() != __pa_symbol(swapper_pg_dir));
WARN_ON(read_cr3_pa() != __pa_symbol(swapper_pg_dir));
}

static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/head64.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ int __init early_make_pgtable(unsigned long address)
pmdval_t pmd, *pmd_p;

/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
if (physaddr >= MAXMEM ||
read_cr3_pa() != __pa_nodebug(early_level4_pgt))
return -1;

again:
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/paravirt.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {

.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
.read_cr3 = native_read_cr3,
.read_cr3 = __native_read_cr3,
.write_cr3 = native_write_cr3,

.flush_tlb_user = native_flush_tlb,
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ void __show_regs(struct pt_regs *regs, int all)

cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr3 = __read_cr3();
cr4 = __read_cr4();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ void __show_regs(struct pt_regs *regs, int all)

cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr3 = __read_cr3();
cr4 = __read_cr4();

printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -5024,7 +5024,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
* Save the most likely value for this task's CR3 in the VMCS.
* We can't use __get_current_cr3_fast() because we're not atomic.
*/
cr3 = read_cr3();
cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
vmx->host_state.vmcs_host_cr3 = cr3;

Expand Down
10 changes: 5 additions & 5 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ static noinline int vmalloc_fault(unsigned long address)
* Do _not_ use "current" here. We might be inside
* an interrupt in the middle of a task switch..
*/
pgd_paddr = read_cr3();
pgd_paddr = read_cr3_pa();
pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
if (!pmd_k)
return -1;
Expand Down Expand Up @@ -388,7 +388,7 @@ static bool low_pfn(unsigned long pfn)

static void dump_pagetable(unsigned long address)
{
pgd_t *base = __va(read_cr3());
pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(address)];
p4d_t *p4d;
pud_t *pud;
Expand Down Expand Up @@ -451,7 +451,7 @@ static noinline int vmalloc_fault(unsigned long address)
* happen within a race in page table update. In the later
* case just flush:
*/
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
Expand Down Expand Up @@ -555,7 +555,7 @@ static int bad_address(void *p)

static void dump_pagetable(unsigned long address)
{
pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = base + pgd_index(address);
p4d_t *p4d;
pud_t *pud;
Expand Down Expand Up @@ -700,7 +700,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
pgd_t *pgd;
pte_t *pte;

pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd = __va(read_cr3_pa());
pgd += pgd_index(address);

pte = lookup_address_in_pgd(pgd, address, &level);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/ioremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
/* Don't assume we're using swapper_pg_dir at this point */
pgd_t *base = __va(read_cr3());
pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(addr)];
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/platform/efi/efi_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pgd_t * __init efi_call_phys_prolog(void)
int n_pgds, i, j;

if (!efi_enabled(EFI_OLD_MEMMAP)) {
save_pgd = (pgd_t *)read_cr3();
save_pgd = (pgd_t *)__read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
goto out;
}
Expand Down Expand Up @@ -646,7 +646,7 @@ efi_status_t efi_thunk_set_virtual_address_map(
efi_sync_low_kernel_mappings();
local_irq_save(flags);

efi_scratch.prev_cr3 = read_cr3();
efi_scratch.prev_cr3 = __read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
__flush_tlb_all();

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/platform/olpc/olpc-xo1-pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)

asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
void *pgd_addr = __va(read_cr3());
void *pgd_addr = __va(read_cr3_pa());

/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/power/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static void __save_processor_state(struct saved_context *ctxt)
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
ctxt->cr3 = __read_cr3();
ctxt->cr4 = __read_cr4();
#ifdef CONFIG_X86_64
ctxt->cr8 = read_cr8();
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/power/hibernate_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ static int relocate_restore_code(void)
memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);

/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
pgd = (pgd_t *)__va(read_cr3_pa()) +
pgd_index(relocated_restore_code);
p4d = p4d_offset(pgd, relocated_restore_code);
if (p4d_large(*p4d)) {
set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/xen/mmu_pv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2017,7 +2017,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
pmd_t pmd;
pte_t pte;

pa = read_cr3();
pa = read_cr3_pa();
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
sizeof(pgd)));
if (!pgd_present(pgd))
Expand Down Expand Up @@ -2097,7 +2097,7 @@ void __init xen_relocate_p2m(void)
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;

pgd = __va(read_cr3());
pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
idx_p4d = 0;
save_pud = n_pud;
Expand Down Expand Up @@ -2204,7 +2204,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));

BUG_ON(read_cr3() != __pa(initial_page_table));
BUG_ON(read_cr3_pa() != __pa(initial_page_table));
BUG_ON(cr3 != __pa(swapper_pg_dir));

/*
Expand Down

0 comments on commit 6c690ee

Please sign in to comment.