Skip to content

Commit

Permalink
Merge tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/…
Browse files Browse the repository at this point in the history
…kernel/git/tip/tip

Pull x86 cpu updates from Ingo Molar:

 - prepare for Intel's new SERIALIZE instruction

 - enable split-lock debugging on more CPUs

 - add more Intel CPU models

 - optimize stack canary initialization a bit

 - simplify the Spectre logic a bit

* tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu: Refactor sync_core() for readability
  x86/cpu: Relocate sync_core() to sync_core.h
  x86/cpufeatures: Add enumeration for SERIALIZE instruction
  x86/split_lock: Enable the split lock feature on Sapphire Rapids and Alder Lake CPUs
  x86/cpu: Add Lakefield, Alder Lake and Rocket Lake models to the to Intel CPU family
  x86/stackprotector: Pre-initialize canary for secondary CPUs
  x86/speculation: Merge one test in spectre_v2_user_select_mitigation()
  • Loading branch information
torvalds committed Aug 4, 2020
2 parents 4ee4810 + f69ca62 commit 335ad94
Show file tree
Hide file tree
Showing 15 changed files with 105 additions and 88 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/include/asm/intel-family.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,15 @@
#define INTEL_FAM6_COMETLAKE 0xA5
#define INTEL_FAM6_COMETLAKE_L 0xA6

#define INTEL_FAM6_ROCKETLAKE 0xA7

#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F

/* Hybrid Core/Atom Processors */

#define INTEL_FAM6_LAKEFIELD 0x8A
#define INTEL_FAM6_ALDERLAKE 0x97

/* "Small Core" Processors (Atom) */

#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
Expand Down
64 changes: 0 additions & 64 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -678,70 +678,6 @@ static inline unsigned int cpuid_edx(unsigned int op)
return edx;
}

/*
* This function forces the icache and prefetched instruction stream to
* catch up with reality in two very specific cases:
*
* a) Text was modified using one virtual address and is about to be executed
* from the same physical page at a different virtual address.
*
* b) Text was modified on a different CPU, may subsequently be
* executed on this CPU, and you want to make sure the new version
* gets executed. This generally means you're calling this in a IPI.
*
* If you're calling this for a different reason, you're probably doing
* it wrong.
*/
static inline void sync_core(void)
{
/*
* There are quite a few ways to do this. IRET-to-self is nice
* because it works on every CPU, at any CPL (so it's compatible
* with paravirtualization), and it never exits to a hypervisor.
* The only down sides are that it's a bit slow (it seems to be
* a bit more than 2x slower than the fastest options) and that
* it unmasks NMIs. The "push %cs" is needed because, in
* paravirtual environments, __KERNEL_CS may not be a valid CS
* value when we do IRET directly.
*
* In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an
* unconditional jump. That sequence also works on all CPUs,
* but it will fault at CPL3 (i.e. Xen PV).
*
* CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a
* hypervisor.
*
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
#ifdef CONFIG_X86_32
asm volatile (
"pushfl\n\t"
"pushl %%cs\n\t"
"pushl $1f\n\t"
"iret\n\t"
"1:"
: ASM_CALL_CONSTRAINT : : "memory");
#else
unsigned int tmp;

asm volatile (
"mov %%ss, %0\n\t"
"pushq %q0\n\t"
"pushq %%rsp\n\t"
"addq $8, (%%rsp)\n\t"
"pushfq\n\t"
"mov %%cs, %0\n\t"
"pushq %q0\n\t"
"pushq $1f\n\t"
"iretq\n\t"
"1:"
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
#endif
}

extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void amd_e400_c1e_apic_setup(void);

Expand Down
1 change: 0 additions & 1 deletion arch/x86/include/asm/special_insns.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ static inline void clwb(volatile void *__p)

#define nop() asm volatile ("nop")


#endif /* __KERNEL__ */

#endif /* _ASM_X86_SPECIAL_INSNS_H */
12 changes: 12 additions & 0 deletions arch/x86/include/asm/stackprotector.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void)
#endif
}

static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
#ifdef CONFIG_X86_64
per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
#else
per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
#endif
}

static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
Expand Down Expand Up @@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void)
static inline void setup_stack_canary_segment(int cpu)
{ }

static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{ }

static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
Expand Down
72 changes: 72 additions & 0 deletions arch/x86/include/asm/sync_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,78 @@
#include <asm/processor.h>
#include <asm/cpufeature.h>

#ifdef CONFIG_X86_32
static inline void iret_to_self(void)
{
asm volatile (
"pushfl\n\t"
"pushl %%cs\n\t"
"pushl $1f\n\t"
"iret\n\t"
"1:"
: ASM_CALL_CONSTRAINT : : "memory");
}
#else
static inline void iret_to_self(void)
{
unsigned int tmp;

asm volatile (
"mov %%ss, %0\n\t"
"pushq %q0\n\t"
"pushq %%rsp\n\t"
"addq $8, (%%rsp)\n\t"
"pushfq\n\t"
"mov %%cs, %0\n\t"
"pushq %q0\n\t"
"pushq $1f\n\t"
"iretq\n\t"
"1:"
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
}
#endif /* CONFIG_X86_32 */

/*
* This function forces the icache and prefetched instruction stream to
* catch up with reality in two very specific cases:
*
* a) Text was modified using one virtual address and is about to be executed
* from the same physical page at a different virtual address.
*
* b) Text was modified on a different CPU, may subsequently be
* executed on this CPU, and you want to make sure the new version
* gets executed. This generally means you're calling this in a IPI.
*
* If you're calling this for a different reason, you're probably doing
* it wrong.
*/
static inline void sync_core(void)
{
/*
* There are quite a few ways to do this. IRET-to-self is nice
* because it works on every CPU, at any CPL (so it's compatible
* with paravirtualization), and it never exits to a hypervisor.
* The only down sides are that it's a bit slow (it seems to be
* a bit more than 2x slower than the fastest options) and that
* it unmasks NMIs. The "push %cs" is needed because, in
* paravirtual environments, __KERNEL_CS may not be a valid CS
* value when we do IRET directly.
*
* In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an
* unconditional jump. That sequence also works on all CPUs,
* but it will fault at CPL3 (i.e. Xen PV).
*
* CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a
* hypervisor.
*
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
iret_to_self();
}

/*
* Ensure that a core serializing instruction is issued before returning
* to user-mode. x86 implements return to user-space through sysexit,
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/alternative.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/kprobes.h>
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
Expand Down
13 changes: 4 additions & 9 deletions arch/x86/kernel/cpu/bugs.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,10 +763,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
}

/*
* If enhanced IBRS is enabled or SMT impossible, STIBP is not
* If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
* required.
*/
if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;

/*
Expand All @@ -778,12 +780,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;

/*
* If STIBP is not available, clear the STIBP mode.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;

spectre_v2_user_stibp = mode;

set_mode:
Expand Down Expand Up @@ -1270,7 +1266,6 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always disabled in strict
* mode. It can neither be enabled if it was force-disabled
* by a previous prctl call.
*/
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/cpu/intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,8 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
{}
};

Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/cpu/mce/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>

Expand Down
14 changes: 2 additions & 12 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
#include <linux/err.h>
#include <linux/nmi.h>
#include <linux/tboot.h>
#include <linux/stackprotector.h>
#include <linux/gfp.h>
#include <linux/cpuidle.h>
#include <linux/numa.h>
Expand Down Expand Up @@ -81,6 +80,7 @@
#include <asm/cpu_device_id.h>
#include <asm/spec-ctrl.h>
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>

/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
Expand Down Expand Up @@ -260,21 +260,10 @@ static void notrace start_secondary(void *unused)
/* enable local interrupts */
local_irq_enable();

/* to prevent fake stack check failure in clock setup */
boot_init_stack_canary();

x86_cpuinit.setup_percpu_clockev();

wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

/*
* Prevent tail call to cpu_startup_entry() because the stack protector
* guard has been changed a couple of function calls up, in
* boot_init_stack_canary() and must not be checked before tail calling
* another function.
*/
prevent_tail_call_optimization();
}

/**
Expand Down Expand Up @@ -1012,6 +1001,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
alternatives_enable_smp();

per_cpu(current_task, cpu) = idle;
cpu_init_stack_canary(cpu, idle);

/* Initialize the interrupt stack(s) */
ret = irq_init_percpu_irqstack(cpu);
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/xen/smp_pv.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ static void cpu_bringup(void)
asmlinkage __visible void cpu_bringup_and_idle(void)
{
cpu_bringup();
boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
prevent_tail_call_optimization();
}

void xen_smp_intr_free_pv(unsigned int cpu)
Expand Down
1 change: 1 addition & 0 deletions drivers/misc/sgi-gru/grufault.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/sync_core.h>
#include <linux/prefetch.h>
#include "gru.h"
#include "grutables.h"
Expand Down
1 change: 1 addition & 0 deletions drivers/misc/sgi-gru/gruhandles.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
#else
#include <linux/sync_core.h>
#include <asm/tsc.h>
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
Expand Down
1 change: 1 addition & 0 deletions drivers/misc/sgi-gru/grukservices.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/sync_core.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/export.h>
Expand Down

0 comments on commit 335ad94

Please sign in to comment.