Skip to content

Commit

Permalink
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/tip/tip

Pull x86 perf updates from Ingo Molnar:
 "This series tightens up RDPMC permissions: currently even highly
  sandboxed x86 execution environments (such as seccomp) have permission
  to execute RDPMC, which may leak various perf events / PMU state such
  as timing information and other CPU execution details.

  This 'all is allowed' RDPMC mode is still preserved as the
  (non-default) /sys/devices/cpu/rdpmc=2 setting.  The new default is
  that RDPMC access is only allowed if a perf event is mmap-ed (which is
  needed to correctly interpret RDPMC counter values in any case).

  As a side effect of these changes CR4 handling is cleaned up in the
  x86 code and a shadow copy of the CR4 value is added.

  The extra CR4 manipulation adds ~ <50ns to the context switch cost
  between rdpmc-capable and rdpmc-non-capable mms"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks
  perf/x86: Only allow rdpmc if a perf_event is mapped
  perf: Pass the event to arch_perf_update_userpage()
  perf: Add pmu callbacks to track event mapping and unmapping
  x86: Add a comment clarifying LDT context switching
  x86: Store a per-cpu shadow copy of CR4
  x86: Clean up cr4 manipulation
  • Loading branch information
torvalds committed Feb 16, 2015
2 parents a68fb48 + a667342 commit 3750771
Show file tree
Hide file tree
Showing 35 changed files with 253 additions and 120 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ typedef struct {

struct mutex lock;
void __user *vdso;

atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;

#ifdef CONFIG_SMP
Expand Down
33 changes: 27 additions & 6 deletions arch/x86/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
}
#endif /* !CONFIG_PARAVIRT */

#ifdef CONFIG_PERF_EVENTS
extern struct static_key rdpmc_always_available;

static inline void load_mm_cr4(struct mm_struct *mm)
{
if (static_key_true(&rdpmc_always_available) ||
atomic_read(&mm->context.perf_rdpmc_allowed))
cr4_set_bits(X86_CR4_PCE);
else
cr4_clear_bits(X86_CR4_PCE);
}
#else
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif

/*
* Used for LDT copy/destruction.
*/
Expand Down Expand Up @@ -52,15 +67,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));

/* Load per-mm CR4 state */
load_mm_cr4(next);

/*
* Load the LDT, if the LDT is different.
*
* It's possible leave_mm(prev) has been called. If so,
* then prev->context.ldt could be out of sync with the
* LDT descriptor or the LDT register. This can only happen
* if prev->context.ldt is non-null, since we never free
* an LDT. But LDTs can't be shared across mms, so
* prev->context.ldt won't be equal to next->context.ldt.
* It's possible that prev->context.ldt doesn't match
* the LDT register. This can happen if leave_mm(prev)
* was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we
* never free an LDT while the mm still exists. That
* means that next->context.ldt != prev->context.ldt,
* because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
Expand All @@ -85,6 +105,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_LDT_nolock(&next->context);
}
}
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/include/asm/paravirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}

static inline unsigned long read_cr4(void)
static inline unsigned long __read_cr4(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
}
static inline unsigned long read_cr4_safe(void)
static inline unsigned long __read_cr4_safe(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
}

static inline void write_cr4(unsigned long x)
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
Expand Down
33 changes: 0 additions & 33 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,39 +579,6 @@ static inline void load_sp0(struct tss_struct *tss,
#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT */

/*
* Save the cr4 feature set we're using (ie
* Pentium 4MB enable and PPro Global page
* enable), so that any CPU's that boot up
* after us can get the correct flags.
*/
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;

static inline void set_in_cr4(unsigned long mask)
{
unsigned long cr4;

mmu_cr4_features |= mask;
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 |= mask;
write_cr4(cr4);
}

static inline void clear_in_cr4(unsigned long mask)
{
unsigned long cr4;

mmu_cr4_features &= ~mask;
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 &= ~mask;
write_cr4(cr4);
}

typedef struct {
unsigned long seg;
} mm_segment_t;
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/include/asm/special_insns.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x);
}

static inline unsigned long read_cr4(void)
static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
}

static inline unsigned long read_cr4_safe(void)
static inline unsigned long __read_cr4_safe(void)
{
return native_read_cr4_safe();
}

static inline void write_cr4(unsigned long x)
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
Expand Down
77 changes: 70 additions & 7 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,75 @@
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#endif

struct tlb_state {
#ifdef CONFIG_SMP
struct mm_struct *active_mm;
int state;
#endif

/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
unsigned long cr4;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);

/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}

/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
unsigned long cr4;

cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 | mask) != cr4) {
cr4 |= mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
}

/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask)
{
unsigned long cr4;

cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 & ~mask) != cr4) {
cr4 &= ~mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
}

/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
return this_cpu_read(cpu_tlbstate.cr4);
}

/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
* up after us can get the correct flags. This should only be used
* during boot on the boot cpu.
*/
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;

static inline void cr4_set_bits_and_update_boot(unsigned long mask)
{
mmu_cr4_features |= mask;
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
cr4_set_bits(mask);
}

static inline void __native_flush_tlb(void)
{
native_write_cr3(native_read_cr3());
Expand All @@ -24,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
{
unsigned long cr4;

cr4 = native_read_cr4();
cr4 = this_cpu_read(cpu_tlbstate.cr4);
/* clear PGE */
native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */
Expand Down Expand Up @@ -184,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2

struct tlb_state {
struct mm_struct *active_mm;
int state;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);

static inline void reset_lazy_tlbstate(void)
{
this_cpu_write(cpu_tlbstate.state, 0);
Expand Down
5 changes: 3 additions & 2 deletions arch/x86/include/asm/virtext.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <asm/vmx.h>
#include <asm/svm.h>
#include <asm/tlbflush.h>

/*
* VMX functions:
Expand All @@ -40,12 +41,12 @@ static inline int cpu_has_vmx(void)
static inline void cpu_vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
write_cr4(read_cr4() & ~X86_CR4_VMXE);
cr4_clear_bits(X86_CR4_VMXE);
}

static inline int cpu_vmx_enabled(void)
{
return read_cr4() & X86_CR4_VMXE;
return __read_cr4() & X86_CR4_VMXE;
}

/** Disable VMX if it is enabled on the current CPU
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/acpi/sleep.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)

header->pmode_cr0 = read_cr0();
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
header->pmode_cr4 = read_cr4();
header->pmode_cr4 = __read_cr4();
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
Expand Down
17 changes: 12 additions & 5 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/debugreg.h>
#include <asm/sections.h>
#include <asm/vsyscall.h>
Expand Down Expand Up @@ -278,7 +279,7 @@ __setup("nosmep", setup_disable_smep);
static __always_inline void setup_smep(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_SMEP))
set_in_cr4(X86_CR4_SMEP);
cr4_set_bits(X86_CR4_SMEP);
}

static __init int setup_disable_smap(char *arg)
Expand All @@ -298,9 +299,9 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)

if (cpu_has(c, X86_FEATURE_SMAP)) {
#ifdef CONFIG_X86_SMAP
set_in_cr4(X86_CR4_SMAP);
cr4_set_bits(X86_CR4_SMAP);
#else
clear_in_cr4(X86_CR4_SMAP);
cr4_clear_bits(X86_CR4_SMAP);
#endif
}
}
Expand Down Expand Up @@ -1294,6 +1295,12 @@ void cpu_init(void)

wait_for_master_cpu(cpu);

/*
* Initialize the CR4 shadow before doing anything that could
* try to read it.
*/
cr4_init_shadow();

/*
* Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure.
Expand All @@ -1313,7 +1320,7 @@ void cpu_init(void)

pr_debug("Initializing CPU#%d\n", cpu);

clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);

/*
* Initialize the per-CPU GDT with the boot GDT,
Expand Down Expand Up @@ -1394,7 +1401,7 @@ void cpu_init(void)
printk(KERN_INFO "Initializing CPU#%d\n", cpu);

if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);

load_current_idt();
switch_to_new_gdt(cpu);
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/mcheck/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>

Expand Down Expand Up @@ -1452,7 +1453,7 @@ static void __mcheck_cpu_init_generic(void)
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC | m_fl, &all_banks);

set_in_cr4(X86_CR4_MCE);
cr4_set_bits(X86_CR4_MCE);

rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/mcheck/p5.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>

Expand Down Expand Up @@ -65,7 +66,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
"Intel old style machine check architecture supported.\n");

/* Enable MCE: */
set_in_cr4(X86_CR4_MCE);
cr4_set_bits(X86_CR4_MCE);
printk(KERN_INFO
"Intel old style machine check reporting enabled on CPU#%d.\n",
smp_processor_id());
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/mcheck/winchip.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/msr.h>

Expand Down Expand Up @@ -36,7 +37,7 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);

set_in_cr4(X86_CR4_MCE);
cr4_set_bits(X86_CR4_MCE);

printk(KERN_INFO
"Winchip machine check reporting enabled on CPU#0.\n");
Expand Down
Loading

0 comments on commit 3750771

Please sign in to comment.