From: Tony Luck <tony.luck@intel.com>
To: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>,
Andrew Morton <akpm@linux-foundation.org>,
Andy Lutomirski <luto@kernel.org>,
Dan Williams <dan.j.williams@intel.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-nvdimm@ml01.01.org, x86@kernel.org
Subject: [PATCHV2 2/3] x86, ras: Extend machine check recovery code to annotated ring0 areas
Date: Thu, 10 Dec 2015 16:14:44 -0800 [thread overview]
Message-ID: <e8029c58c7d4b5094ec274c78dee01d390317d4d.1449861203.git.tony.luck@intel.com> (raw)
In-Reply-To: <cover.1449861203.git.tony.luck@intel.com>
Extend the severity checking code to add a new context IN_KERN_RECOV
which is used to indicate that the machine check was triggered by code
in the kernel with a fixup entry.
Add code to check for this situation and respond by altering the return
IP to the fixup address and changing the regs->ax so that the recovery
code knows the physical address of the error. Note that we also set bit
63 because 0x0 is a legal physical address.
Major re-work to the tail code in do_machine_check() to make all this
readable/maintainable. One functional change is that tolerant=3 no longer
stops recovery actions. Revert to only skipping sending SIGBUS to the
current process.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 22 +++++++++-
arch/x86/kernel/cpu/mcheck/mce.c | 69 ++++++++++++++++---------------
2 files changed, 55 insertions(+), 36 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222071..ac7fbb0689fb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
#include <linux/init.h>
+#include <linux/module.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
@@ -29,7 +30,7 @@
* panic situations)
*/
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@@ -48,6 +49,7 @@ static struct severity {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
+#define KERNEL_RECOV .context = IN_KERNEL_RECOV
#define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER
#define EXCP .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
+ PANIC, "In kernel and no restart IP",
+ EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+ ),
+ MCESEV(
DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
@@ -123,6 +129,11 @@ static struct severity {
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
),
MCESEV(
+ AR, "Action required: data load error recoverable area of kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ KERNEL_RECOV
+ ),
+ MCESEV(
AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
@@ -170,6 +181,9 @@ static struct severity {
) /* always matches. keep at end */
};
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+ (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@ static struct severity {
*/
static int error_context(struct mce *m)
{
- return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+ if ((m->cs & 3) == 3)
+ return IN_USER;
+ if (mc_recoverable(m->mcgstatus) && search_mcexception_tables(m->ip))
+ return IN_KERNEL_RECOV;
+ return IN_KERNEL;
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9d014b82a124..f2f568ad6409 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -31,6 +31,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/init.h>
+#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/poll.h>
#include <linux/nmi.h>
@@ -958,6 +959,20 @@ static void mce_clear_state(unsigned long *toclear)
}
}
+static int do_memory_failure(struct mce *m)
+{
+ int flags = MF_ACTION_REQUIRED;
+ int ret;
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
+ flags |= MF_MUST_KILL;
+ ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+ if (ret)
+ pr_err("Memory error not recovered");
+ return ret;
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -995,8 +1010,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- u64 recover_paddr = ~0ull;
- int flags = MF_ACTION_REQUIRED;
int lmce = 0;
ist_enter(regs);
@@ -1123,22 +1136,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
/*
- * At insane "tolerant" levels we take no action. Otherwise
- * we only die if we have no other choice. For less serious
- * issues we try to recover, or limit damage to the current
- * process.
+ * If tolerant is at an insane level we drop requests to kill
+ * processes and continue even when there is no way out
*/
- if (cfg->tolerant < 3) {
- if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
- if (worst == MCE_AR_SEVERITY) {
- recover_paddr = m.addr;
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
- flags |= MF_MUST_KILL;
- } else if (kill_it) {
- force_sig(SIGBUS, current);
- }
- }
+ if (cfg->tolerant == 3)
+ kill_it = 0;
+ else if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst > 0)
mce_report_event(regs);
@@ -1146,25 +1150,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
out:
sync_core();
- if (recover_paddr == ~0ull)
- goto done;
+ /* Fault was in user mode and we need to take some action */
+ if ((m.cs & 3) == 3 && (worst == MCE_AR_SEVERITY || kill_it)) {
+ ist_begin_non_atomic(regs);
+ local_irq_enable();
- pr_err("Uncorrected hardware memory error in user-access at %llx",
- recover_paddr);
- /*
- * We must call memory_failure() here even if the current process is
- * doomed. We still need to mark the page as poisoned and alert any
- * other users of the page.
- */
- ist_begin_non_atomic(regs);
- local_irq_enable();
- if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
- pr_err("Memory error not recovered");
- force_sig(SIGBUS, current);
+ if (kill_it || do_memory_failure(&m))
+ force_sig(SIGBUS, current);
+ local_irq_disable();
+ ist_end_non_atomic();
}
- local_irq_disable();
- ist_end_non_atomic();
-done:
+
+ /* Fault was in recoverable area of the kernel */
+ if ((m.cs & 3) != 3 && worst == MCE_AR_SEVERITY)
+ if (!fixup_mcexception(regs, m.addr))
+ mce_panic("Failed kernel mode recovery", &m, NULL);
+
ist_exit(regs);
}
EXPORT_SYMBOL_GPL(do_machine_check);
--
2.1.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2015-12-11 19:32 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-12-11 19:13 [PATCHV2 0/3] Machine check recovery when kernel accesses poison Tony Luck
2015-12-10 21:58 ` [PATCHV2 1/3] x86, ras: Add new infrastructure for machine check fixup tables Tony Luck
2015-12-11 20:06 ` Andy Lutomirski
2015-12-11 21:01 ` Luck, Tony
2015-12-12 10:11 ` Borislav Petkov
2015-12-14 17:58 ` Ross Zwisler
2015-12-14 22:27 ` Borislav Petkov
2015-12-15 1:00 ` Luck, Tony
2015-12-15 9:46 ` Borislav Petkov
2015-12-15 10:44 ` Borislav Petkov
2015-12-11 0:14 ` Tony Luck [this message]
2015-12-11 20:08 ` [PATCHV2 2/3] x86, ras: Extend machine check recovery code to annotated ring0 areas Andy Lutomirski
2015-12-15 11:43 ` Borislav Petkov
2015-12-15 23:46 ` Luck, Tony
2015-12-11 0:21 ` [PATCHV2 3/3] x86, ras: Add mcsafe_memcpy() function to recover from machine checks Tony Luck
2015-12-11 20:09 ` Andy Lutomirski
2015-12-11 21:19 ` Luck, Tony
2015-12-11 21:32 ` Konrad Rzeszutek Wilk
2015-12-11 21:50 ` Andy Lutomirski
2015-12-11 22:17 ` Luck, Tony
2015-12-11 22:20 ` Dan Williams
2015-12-11 22:26 ` Andy Lutomirski
2015-12-11 22:35 ` Luck, Tony
2015-12-11 22:38 ` Andy Lutomirski
2015-12-11 22:45 ` Luck, Tony
2015-12-11 22:55 ` Andy Lutomirski
2015-12-14 8:36 ` Ingo Molnar
2015-12-14 19:46 ` Luck, Tony
2015-12-14 20:11 ` Andy Lutomirski
2015-12-15 13:11 ` Borislav Petkov
2015-12-15 17:45 ` Dan Williams
2015-12-15 17:53 ` Luck, Tony
2015-12-15 18:21 ` Borislav Petkov
2015-12-15 18:27 ` Dan Williams
2015-12-15 18:35 ` Dan Williams
2015-12-15 18:39 ` Borislav Petkov
2015-12-15 19:19 ` Elliott, Robert (Persistent Memory)
2015-12-15 19:28 ` Borislav Petkov
2015-12-15 20:25 ` Elliott, Robert (Persistent Memory)
2015-12-21 17:33 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e8029c58c7d4b5094ec274c78dee01d390317d4d.1449861203.git.tony.luck@intel.com \
--to=tony.luck@intel.com \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=dan.j.williams@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@ml01.01.org \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox