Skip to content

Commit

Permalink
Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/ras/ras

Pull x86/mce merge window patches from Tony Luck:
 "Including two that make error_context() checks less sucky"

* tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  x86/mce: Add instruction recovery signatures to mce-severity table
  x86/mce: Fix check for processor context when machine check was taken.
  MCE: Fix vm86 handling for 32bit mce handler
  x86/mce Add validation check before GHES error is recorded
  x86/mce: Avoid reading every machine check bank register twice.
  • Loading branch information
torvalds committed May 25, 2012
2 parents fa2af6e + 37c3459 commit 786f02b
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 12 deletions.
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/mcheck/mce-apei.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m;

/* Only corrected MC is reported */
if (!corrected)
if (!corrected || !(mem_err->validation_bits &
CPER_MEM_VALID_PHYSICAL_ADDRESS))
return;

mce_setup(&m);
Expand Down
26 changes: 20 additions & 6 deletions arch/x86/kernel/cpu/mcheck/mce-severity.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
),
MCESEV(
KEEP, "HT thread notices Action required: instruction fetch error",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
MCGMASK(MCG_STATUS_EIPV, 0)
),
MCESEV(
AR, "Action required: instruction fetch error",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
Expand Down Expand Up @@ -165,15 +175,19 @@ static struct severity {
};

/*
* If the EIPV bit is set, it means the saved IP is the
* instruction which caused the MCE.
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
* to assume the worst case (IN_KERNEL) as we actually
* have no idea what we were executing when the machine
* check hit.
* If we do have a good "m->cs" (or a faked one in the
* case we were executing in VM86 mode) we can use it to
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
static int error_context(struct mce *m)
{
if (m->mcgstatus & MCG_STATUS_EIPV)
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
/* Unknown, assume kernel */
return IN_KERNEL;
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
}

int mce_severity(struct mce *m, int tolerant, char **msg)
Expand Down
24 changes: 19 additions & 5 deletions arch/x86/kernel/cpu/mcheck/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
m->ip = regs->ip;
m->cs = regs->cs;

/*
* When in VM86 mode make the cs look like ring 3
* always. This is a lie, but it's better than passing
* the additional vm86 bit around everywhere.
*/
if (v8086_mode(regs))
m->cs |= 3;
}
/* Use accurate RIP reporting if available. */
if (rip_msr)
Expand Down Expand Up @@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
static int mce_no_way_out(struct mce *m, char **msg)
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
{
int i;
int i, ret = 0;

for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (m->status & MCI_STATUS_VAL)
__set_bit(i, validp);
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
return 1;
ret = 1;
}
return 0;
return ret;
}

/*
Expand Down Expand Up @@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
int kill_it = 0;
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";

atomic_inc(&mce_entry);
Expand All @@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
final = &__get_cpu_var(mces_seen);
*final = m;

no_way_out = mce_no_way_out(&m, &msg);
memset(valid_banks, 0, sizeof(valid_banks));
no_way_out = mce_no_way_out(&m, &msg, valid_banks);

barrier();

Expand All @@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
if (!mce_banks[i].ctl)
continue;

Expand Down

0 comments on commit 786f02b

Please sign in to comment.