Skip to content

Commit

Permalink
mce: fix set_mce_nospec to always unmap the whole page
Browse files Browse the repository at this point in the history
The set_memory_uc() approach doesn't work well in all cases.
As Dan pointed out when "The VMM unmapped the bad page from
guest physical space and passed the machine check to the guest."
"The guest gets virtual #MC on an access to that page. When
the guest tries to do set_memory_uc() and instructs cpa_flush()
to do clean caches that results in taking another fault / exception
perhaps because the VMM unmapped the page from the guest."

Since the driver has special knowledge to handle NP or UC,
mark the poisoned page with NP and let driver handle it when
it comes down to repair.

Please refer to discussions here for more details.
https://lore.kernel.org/all/CAPcyv4hrXPb1tASBZUg-GgdVs0OOFKXMXLiHmktg_kFi7YBMyQ@mail.gmail.com/

Now since poisoned page is marked as not-present, in order to
avoid writing to a not-present page and trigger kernel Oops,
also fix pmem_do_write().

Fixes: 284ce40 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()")
Reviewed-by: Christoph Hellwig <[email protected]>
Reviewed-by: Dan Williams <[email protected]>
Signed-off-by: Jane Chu <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: https://lore.kernel.org/r/165272615484.103830.2563950688772226611.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <[email protected]>
  • Loading branch information
jchu314atgithub authored and djbw committed May 16, 2022
1 parent b3fdf93 commit 5898b43
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 40 deletions.
6 changes: 3 additions & 3 deletions arch/x86/kernel/cpu/mce/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,

pfn = mce->addr >> PAGE_SHIFT;
if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn, whole_page(mce));
set_mce_nospec(pfn);
mce->kflags |= MCE_HANDLED_UC;
}

Expand Down Expand Up @@ -1316,7 +1316,7 @@ static void kill_me_maybe(struct callback_head *cb)

ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
if (!ret) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
sync_core();
return;
}
Expand All @@ -1342,7 +1342,7 @@ static void kill_me_never(struct callback_head *cb)
p->mce_count = 0;
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
}

static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
Expand Down
23 changes: 11 additions & 12 deletions arch/x86/mm/pat/set_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1925,14 +1925,9 @@ int set_memory_wb(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_wb);

/*
* Prevent speculative access to the page by either unmapping
* it (if we do not require access to any part of the page) or
* marking it uncacheable (if we want to try to retrieve data
* from non-poisoned lines in the page).
*/
/* Prevent speculative access to a page by marking it not-present */
#ifdef CONFIG_X86_64
int set_mce_nospec(unsigned long pfn, bool unmap)
int set_mce_nospec(unsigned long pfn)
{
unsigned long decoy_addr;
int rc;
Expand All @@ -1954,19 +1949,23 @@ int set_mce_nospec(unsigned long pfn, bool unmap)
*/
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));

if (unmap)
rc = set_memory_np(decoy_addr, 1);
else
rc = set_memory_uc(decoy_addr, 1);
rc = set_memory_np(decoy_addr, 1);
if (rc)
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
return rc;
}

static int set_memory_present(unsigned long *addr, int numpages)
{
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}

/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);

return set_memory_present(&addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
Expand Down
30 changes: 7 additions & 23 deletions drivers/nvdimm/pmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,36 +158,20 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
struct page *page, unsigned int page_off,
sector_t sector, unsigned int len)
{
blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;

if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);

if (rc != BLK_STS_OK)
return rc;
}

/*
* Note that we write the data both before and after
* clearing poison. The write before clear poison
* handles situations where the latest written data is
* preserved and the clear poison operation simply marks
* the address range as valid without changing the data.
* In this case application software can assume that an
* interrupted write will either return the new good
* data or an error.
*
* However, if pmem_clear_poison() leaves the data in an
* indeterminate state we need to perform the write
* after clear poison.
*/
flush_dcache_page(page);
write_pmem(pmem_addr, page, page_off, len);
if (unlikely(bad_pmem)) {
rc = pmem_clear_poison(pmem, pmem_off, len);
write_pmem(pmem_addr, page, page_off, len);
}

return rc;
return BLK_STS_OK;
}

static void pmem_submit_bio(struct bio *bio)
Expand Down
4 changes: 2 additions & 2 deletions include/linux/set_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ static inline bool can_set_direct_map(void)
#endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */

#ifdef CONFIG_X86_64
int set_mce_nospec(unsigned long pfn, bool unmap);
int set_mce_nospec(unsigned long pfn);
int clear_mce_nospec(unsigned long pfn);
#else
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
static inline int set_mce_nospec(unsigned long pfn)
{
return 0;
}
Expand Down

0 comments on commit 5898b43

Please sign in to comment.