Skip to content

Commit

Permalink
Merge tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull x86 RAS updates from Thomas Gleixner:
 "RAS updates from Borislav Petkov:

   - Unmap a whole guest page if an MCE is encountered in it to avoid
     follow-on MCEs leading to the guest crashing, by Tony Luck.

     This change collided with the entry changes and the merge
     resolution would have been rather unpleasant. To avoid that the
     entry branch was merged in before applying this. The resulting code
     did not change over the rebase.

   - AMD MCE error thresholding machinery cleanup and hotplug
     sanitization, by Thomas Gleixner.

   - Change the MCE notifiers to denote whether they have handled the
     error and not break the chain early by returning NOTIFY_STOP, thus
     giving the opportunity for the later handlers in the chain to see
     it. By Tony Luck.

   - Add AMD family 0x17, models 0x60-6f support, by Alexander Monakov.

   - Last but not least, the usual round of fixes and improvements"

* tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86/mce/dev-mcelog: Fix -Wstringop-truncation warning about strncpy()
  x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned
  EDAC/amd64: Add AMD family 17h model 60h PCI IDs
  hwmon: (k10temp) Add AMD family 17h model 60h PCI match
  x86/amd_nb: Add AMD family 17h model 60h PCI IDs
  x86/mcelog: Add compat_ioctl for 32-bit mcelog support
  x86/mce: Drop bogus comment about mce.kflags
  x86/mce: Fixup exception only for the correct MCEs
  EDAC: Drop the EDAC report status checks
  x86/mce: Add mce=print_all option
  x86/mce: Change default MCE logger to check mce->kflags
  x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
  x86/mce: Add a struct mce.kflags field
  x86/mce: Convert the CEC to use the MCE notifier
  x86/mce: Rename "first" function as "early"
  x86/mce/amd, edac: Remove report_gart_errors
  x86/mce/amd: Make threshold bank setting hotplug robust
  x86/mce/amd: Cleanup threshold device remove path
  x86/mce/amd: Straighten CPU hotplug path
  x86/mce/amd: Sanitize thresholding device creation hotplug path
  ...
  • Loading branch information
torvalds committed Jun 13, 2020
2 parents 076f14b + 7ccddc4 commit a942908
Show file tree
Hide file tree
Showing 28 changed files with 306 additions and 320 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/amd_nb.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct threshold_bank {

/* initialized to the number of CPUs on the node sharing this bank */
refcount_t cpus;
unsigned int shared;
};

struct amd_northbridge {
Expand Down
28 changes: 20 additions & 8 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))

#define XEC(x, mask) (((x) >> 16) & mask)

/* mce.kflags flag bits for logging etc. */
#define MCE_HANDLED_CEC BIT_ULL(0)
#define MCE_HANDLED_UC BIT_ULL(1)
#define MCE_HANDLED_EXTLOG BIT_ULL(2)
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)

/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
Expand All @@ -142,14 +153,16 @@ struct mce_log_buffer {
struct mce entry[];
};

/* Highest last */
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_UC = INT_MAX - 1,
MCE_PRIO_EXTLOG = INT_MAX - 2,
MCE_PRIO_NFIT = INT_MAX - 3,
MCE_PRIO_EDAC = INT_MAX - 4,
MCE_PRIO_MCELOG = 1,
MCE_PRIO_LOWEST = 0,
MCE_PRIO_LOWEST,
MCE_PRIO_MCELOG,
MCE_PRIO_EDAC,
MCE_PRIO_NFIT,
MCE_PRIO_EXTLOG,
MCE_PRIO_UC,
MCE_PRIO_EARLY,
MCE_PRIO_CEC
};

struct notifier_block;
Expand Down Expand Up @@ -347,5 +360,4 @@ umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return
#endif

static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }

#endif /* _ASM_X86_MCE_H */
19 changes: 13 additions & 6 deletions arch/x86/include/asm/set_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page);
extern int kernel_set_to_readonly;

#ifdef CONFIG_X86_64
static inline int set_mce_nospec(unsigned long pfn)
/*
* Prevent speculative access to the page by either unmapping
* it (if we do not require access to any part of the page) or
* marking it uncacheable (if we want to try to retrieve data
* from non-poisoned lines in the page).
*/
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
{
unsigned long decoy_addr;
int rc;

/*
* Mark the linear address as UC to make sure we don't log more
* errors because of speculative access to the page.
* We would like to just call:
* set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
* speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
* that looks just like the one we want, but has bit 63 flipped.
* This relies on set_memory_uc() properly sanitizing any __pa()
* This relies on set_memory_XX() properly sanitizing any __pa()
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
*/
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));

rc = set_memory_uc(decoy_addr, 1);
if (unmap)
rc = set_memory_np(decoy_addr, 1);
else
rc = set_memory_uc(decoy_addr, 1);
if (rc)
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
return rc;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct mce {
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
__u32 microcode; /* Microcode revision */
__u64 kflags; /* Internal kernel use */
};

#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
Expand Down
5 changes: 5 additions & 0 deletions arch/x86/kernel/amd_nb.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654

Expand All @@ -33,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
{}
};

Expand All @@ -50,6 +53,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
Expand All @@ -65,6 +69,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
Expand Down
Loading

0 comments on commit a942908

Please sign in to comment.