Skip to content

Commit

Permalink
mm,hwpoison: send SIGBUS with error virutal address
Browse files Browse the repository at this point in the history
Now an action required MCE in already hwpoisoned address surely sends a
SIGBUS to current process, but the SIGBUS doesn't convey error virtual
address.  That's not optimal for hwpoison-aware applications.

To fix the issue, make memory_failure() call kill_accessing_process(),
that does pagetable walk to find the error virtual address.  It could find
multiple virtual addresses for the same error page, and it seems hard to
tell which virtual address is correct one.  But that's rare and sending
incorrect virtual address could be better than no address.  So let's
report the first found virtual address for now.

[[email protected]: fix walk_page_range() return]
  Link: https://lkml.kernel.org/r/[email protected]

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Naoya Horiguchi <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Aili Yao <[email protected]>
Cc: Oscar Salvador <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Jue Wang <[email protected]>
Cc: Borislav Petkov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
nhoriguchi authored and torvalds committed Jun 29, 2021
1 parent 203c06e commit a3f5d80
Showing 3 changed files with 165 additions and 3 deletions.
13 changes: 11 additions & 2 deletions arch/x86/kernel/cpu/mce/core.c
Original file line number Diff line number Diff line change
@@ -1257,19 +1257,28 @@ static void kill_me_maybe(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
int flags = MF_ACTION_REQUIRED;
int ret;

pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);

if (!p->mce_ripv)
flags |= MF_MUST_KILL;

if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}

/*
* -EHWPOISON from memory_failure() means that it already sent SIGBUS
* to the current process with the proper error info, so no need to
* send SIGBUS here again.
*/
if (ret == -EHWPOISON)
return;

if (p->mce_vaddr != (void __user *)-1l) {
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
} else {
5 changes: 5 additions & 0 deletions include/linux/swapops.h
Original file line number Diff line number Diff line change
@@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
}

static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
{
return swp_offset(entry);
}

static inline void num_poisoned_pages_inc(void)
{
atomic_long_inc(&num_poisoned_pages);
150 changes: 149 additions & 1 deletion mm/memory-failure.c
Original file line number Diff line number Diff line change
@@ -56,6 +56,7 @@
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include "internal.h"
#include "ras/ras_event.h"

@@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
collect_procs_file(page, tokill, force_early);
}

struct hwp_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
};

static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
{
tk->addr = addr;
tk->size_shift = shift;
}

static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
unsigned long poisoned_pfn, struct to_kill *tk)
{
unsigned long pfn = 0;

if (pte_present(pte)) {
pfn = pte_pfn(pte);
} else {
swp_entry_t swp = pte_to_swp_entry(pte);

if (is_hwpoison_entry(swp))
pfn = hwpoison_entry_to_pfn(swp);
}

if (!pfn || pfn != poisoned_pfn)
return 0;

set_to_kill(tk, addr, shift);
return 1;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
struct hwp_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
unsigned long hwpoison_vaddr;

if (!pmd_present(pmd))
return 0;
pfn = pmd_pfn(pmd);
if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
return 1;
}
return 0;
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
struct hwp_walk *hwp)
{
return 0;
}
#endif

static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
int ret = 0;
pte_t *ptep;
spinlock_t *ptl;

ptl = pmd_trans_huge_lock(pmdp, walk->vma);
if (ptl) {
ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
spin_unlock(ptl);
goto out;
}

if (pmd_trans_unstable(pmdp))
goto out;

ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; ptep++, addr += PAGE_SIZE) {
ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
hwp->pfn, &hwp->tk);
if (ret == 1)
break;
}
pte_unmap_unlock(ptep - 1, ptl);
out:
cond_resched();
return ret;
}

#ifdef CONFIG_HUGETLB_PAGE
static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);

return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
hwp->pfn, &hwp->tk);
}
#else
#define hwpoison_hugetlb_range NULL
#endif

static struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
};

/*
* Sends SIGBUS to the current process with error info.
*
* This function is intended to handle "Action Required" MCEs on already
* hardware poisoned pages. They could happen, for example, when
* memory_failure() failed to unmap the error page at the first call, or
* when multiple local machine checks happened on different CPUs.
*
* MCE handler currently has no easy access to the error virtual address,
* so this function walks page table to find it. The returned virtual address
* is proper in most cases, but it could be wrong when the application
* process has multiple entries mapping the error page.
*/
static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
int flags)
{
int ret;
struct hwp_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;

mmap_read_lock(p->mm);
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
mmap_read_unlock(p->mm);
return ret ? -EFAULT : -EHWPOISON;
}

static const char *action_name[] = {
[MF_IGNORED] = "Ignored",
[MF_FAILED] = "Failed",
@@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestSetPageHWPoison(head)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
return -EHWPOISON;
res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, page_to_pfn(head), flags);
return res;
}

num_poisoned_pages_inc();
@@ -1476,6 +1622,8 @@ int memory_failure(unsigned long pfn, int flags)
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, pfn, flags);
goto unlock_mutex;
}

0 comments on commit a3f5d80

Please sign in to comment.