Skip to content

Commit

Permalink
Merge tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Add support for reporting more bits of the physical address on error,
   on newer AMD CPUs

 - Mask out bits which don't belong to the address of the error being
   reported

* tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Mask out non-address bits from machine check bank
  x86/mce: Add support for Extended Physical Address MCA changes
  x86/mce: Define a function to extract ErrorAddr from MCA_ADDR
  • Loading branch information
torvalds committed Feb 21, 2023
2 parents 89f5349 + 8a01ec9 commit 0246725
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 31 deletions.
3 changes: 3 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@
#define MCI_MISC_ADDR_MEM 3 /* memory address */
#define MCI_MISC_ADDR_GENERIC 7 /* generic */

/* MCi_ADDR register defines */
#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0)

/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN BIT_ULL(30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
Expand Down
12 changes: 3 additions & 9 deletions arch/x86/kernel/cpu/mce/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
if ((low & BIT(5)) && !((high >> 5) & 0x3))
high |= BIT(5);

this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));

wrmsr(smca_config, low, high);
}

Expand Down Expand Up @@ -736,15 +738,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
if (m.status & MCI_STATUS_ADDRV) {
m.addr = addr;

/*
* Extract [55:<lsb>] where lsb is the least significant
* *valid* bit of the address bits.
*/
if (mce_flags.smca) {
u8 lsb = (m.addr >> 56) & 0x3f;

m.addr &= GENMASK_ULL(55, lsb);
}
smca_extract_err_addr(&m);
}

if (mce_flags.smca) {
Expand Down
32 changes: 11 additions & 21 deletions arch/x86/kernel/cpu/mce/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);

DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);

struct mce_bank {
u64 ctl; /* subevents to enable */

__u64 init : 1, /* initialise bank? */
__reserved_1 : 63;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);

#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
Expand Down Expand Up @@ -579,7 +573,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
mce->severity != MCE_DEFERRED_SEVERITY)
return NOTIFY_DONE;

pfn = mce->addr >> PAGE_SHIFT;
pfn = (mce->addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn);
mce->kflags |= MCE_HANDLED_UC;
Expand Down Expand Up @@ -633,15 +627,7 @@ static noinstr void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}

/*
* Extract [55:<lsb>] where lsb is the least significant
* *valid* bit of the address bits.
*/
if (mce_flags.smca) {
u8 lsb = (m->addr >> 56) & 0x3f;

m->addr &= GENMASK_ULL(55, lsb);
}
smca_extract_err_addr(m);
}

if (mce_flags.smca) {
Expand Down Expand Up @@ -1308,6 +1294,7 @@ static void kill_me_maybe(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
int flags = MF_ACTION_REQUIRED;
unsigned long pfn;
int ret;

p->mce_count = 0;
Expand All @@ -1316,9 +1303,10 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;

ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
ret = memory_failure(pfn, flags);
if (!ret) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
set_mce_nospec(pfn);
sync_core();
return;
}
Expand All @@ -1340,11 +1328,13 @@ static void kill_me_maybe(struct callback_head *cb)
static void kill_me_never(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
unsigned long pfn;

p->mce_count = 0;
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
if (!memory_failure(pfn, 0))
set_mce_nospec(pfn);
}

static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
Expand Down
44 changes: 44 additions & 0 deletions arch/x86/kernel/cpu/mce/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,24 @@ struct mce_vendor_flags {

extern struct mce_vendor_flags mce_flags;

struct mce_bank {
/* subevents to enable */
u64 ctl;

/* initialise bank? */
__u64 init : 1,

/*
* (AMD) MCA_CONFIG[McaLsbInStatusSupported]: When set, this bit indicates
* the LSB field is found in MCA_STATUS and not in MCA_ADDR.
*/
lsb_in_status : 1,

__reserved_1 : 62;
};

DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);

enum mca_msr {
MCA_CTL,
MCA_STATUS,
Expand All @@ -189,8 +207,34 @@ extern bool filter_mce(struct mce *m);

#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);

/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
* [56:0] of MCA_STATUS, else in bits [55:0] of MCA_ADDR.
*/
static __always_inline void smca_extract_err_addr(struct mce *m)
{
u8 lsb;

if (!mce_flags.smca)
return;

if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) {
lsb = (m->status >> 24) & 0x3f;

m->addr &= GENMASK_ULL(56, lsb);

return;
}

lsb = (m->addr >> 56) & 0x3f;

m->addr &= GENMASK_ULL(55, lsb);
}

#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline void smca_extract_err_addr(struct mce *m) { }
#endif

#ifdef CONFIG_X86_ANCIENT_MCE
Expand Down
2 changes: 1 addition & 1 deletion drivers/edac/skx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,

memset(&res, 0, sizeof(res));
res.mce = mce;
res.addr = mce->addr;
res.addr = mce->addr & MCI_ADDR_PHYSADDR;

/* Try driver decoder first */
if (!(driver_decode && driver_decode(&res))) {
Expand Down

0 comments on commit 0246725

Please sign in to comment.