Skip to content

Commit

Permalink
powerpc/64s: Fix unrecoverable MCE calling async handler from NMI
Browse files Browse the repository at this point in the history
The machine check handler is not considered NMI on 64s. The early
handler is the true NMI handler, and then it schedules the
machine_check_exception handler to run when interrupts are enabled.

This works fine except the case of an unrecoverable MCE, where the true
NMI is taken when MSR[RI] is clear, it can not recover, so it calls
machine_check_exception directly so something might be done about it.

Calling an async handler from NMI context can result in irq state and
other things getting corrupted. This can also trigger the BUG at
  arch/powerpc/include/asm/interrupt.h:168
  BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));

Fix this by making an _async version of the handler which is called
in the normal case, and a NMI version that is called for unrecoverable
interrupts.

Fixes: 2b43dd7 ("powerpc/64: enable MSR[EE] in irq replay pt_regs")
Signed-off-by: Nicholas Piggin <[email protected]>
Tested-by: Cédric Le Goater <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
npiggin authored and mpe committed Oct 7, 2021
1 parent 768c470 commit f08fb25
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 18 deletions.
5 changes: 2 additions & 3 deletions arch/powerpc/include/asm/interrupt.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,10 +528,9 @@ static __always_inline long ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception);
#else
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
DECLARE_INTERRUPT_HANDLER(SMIException);
DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
DECLARE_INTERRUPT_HANDLER(unknown_exception);
Expand Down
8 changes: 6 additions & 2 deletions arch/powerpc/kernel/exceptions-64s.S
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common)
li r10,MSR_RI
mtmsrd r10,1
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
bl machine_check_exception_async
b interrupt_return_srr


Expand Down Expand Up @@ -1303,7 +1303,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
subi r12,r12,1
sth r12,PACA_IN_MCE(r13)

/* Invoke machine_check_exception to print MCE event and panic. */
/*
* Invoke machine_check_exception to print MCE event and panic.
* This is the NMI version of the handler because we are called from
* the early handler which is a true NMI.
*/
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception

Expand Down
31 changes: 18 additions & 13 deletions arch/powerpc/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,24 +796,22 @@ void die_mce(const char *str, struct pt_regs *regs, long err)
* do_exit() checks for in_interrupt() and panics in that case, so
* exit the irq/nmi before calling die.
*/
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
irq_exit();
else
if (in_nmi())
nmi_exit();
else
irq_exit();
die(str, regs, err);
}

/*
* BOOK3S_64 does not call this handler as a non-maskable interrupt
* BOOK3S_64 does not usually call this handler as a non-maskable interrupt
* (it uses its own early real-mode handler to handle the MCE proper
* and then raises irq_work to call this handler when interrupts are
* enabled).
* enabled). The only time when this is not true is if the early handler
* is unrecoverable, then it does call this directly to try to get a
* message out.
*/
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
#else
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
#endif
static void __machine_check_exception(struct pt_regs *regs)
{
int recover = 0;

Expand Down Expand Up @@ -847,12 +845,19 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
/* Must die if the interrupt is not recoverable */
if (regs_is_unrecoverable(regs))
die_mce("Unrecoverable Machine check", regs, SIGBUS);
}

#ifdef CONFIG_PPC_BOOK3S_64
return;
#else
return 0;
DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
{
__machine_check_exception(regs);
}
#endif
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
{
__machine_check_exception(regs);

return 0;
}

DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
Expand Down

0 comments on commit f08fb25

Please sign in to comment.