blob: 36d2696c9563e88a8e354068d7e8a43d636371d3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Machine check handler.
Ingo Molnare9eee032009-04-08 12:31:17 +02003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02005 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
Andi Kleenb79109c2009-02-12 13:43:23 +01007 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 */
Joe Perchesc767a542012-05-21 19:50:07 -070010
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
Tim Hockine02e68d2007-07-21 17:10:36 +020013#include <linux/thread_info.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020014#include <linux/capability.h>
15#include <linux/miscdevice.h>
Andi Kleen8457c842009-02-12 13:49:33 +010016#include <linux/ratelimit.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020017#include <linux/rcupdate.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020018#include <linux/kobject.h>
Hidetoshi Seto14a02532009-04-30 16:04:51 +090019#include <linux/uaccess.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020020#include <linux/kdebug.h>
21#include <linux/kernel.h>
22#include <linux/percpu.h>
23#include <linux/string.h>
Kay Sievers8a25a2f2011-12-21 14:29:42 -080024#include <linux/device.h>
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +010025#include <linux/syscore_ops.h>
Andi Kleen3c079792009-05-27 21:56:55 +020026#include <linux/delay.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020027#include <linux/ctype.h>
28#include <linux/sched.h>
29#include <linux/sysfs.h>
30#include <linux/types.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020032#include <linux/init.h>
33#include <linux/kmod.h>
34#include <linux/poll.h>
Andi Kleen3c079792009-05-27 21:56:55 +020035#include <linux/nmi.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020036#include <linux/cpu.h>
Borislav Petkov011d8262017-03-27 11:33:02 +020037#include <linux/ras.h>
Hidetoshi Seto14a02532009-04-30 16:04:51 +090038#include <linux/smp.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020039#include <linux/fs.h>
Andi Kleen9b1beaf2009-05-27 21:56:59 +020040#include <linux/mm.h>
Huang Ying5be9ed22009-07-31 09:41:42 +080041#include <linux/debugfs.h>
Hidetoshi Setob77e70b2011-06-08 10:56:02 +090042#include <linux/irq_work.h>
Paul Gortmaker69c60c82011-05-26 12:22:53 -040043#include <linux/export.h>
Tony Luck3637efb2016-09-01 11:39:33 -070044#include <linux/jump_label.h>
Dan Williams284ce402018-07-13 21:50:32 -070045#include <linux/set_memory.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Tony Luck3f5a7892016-11-18 09:48:36 -080047#include <asm/intel-family.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020048#include <asm/processor.h>
Andy Lutomirski95927472014-11-19 17:41:09 -080049#include <asm/traps.h>
Andy Lutomirski375074c2014-10-24 15:58:07 -070050#include <asm/tlbflush.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020051#include <asm/mce.h>
52#include <asm/msr.h>
Xunlei Pang5bc32952017-03-13 10:50:19 +010053#include <asm/reboot.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020054
Andi Kleenbd19a5e2009-05-27 21:56:55 +020055#include "mce-internal.h"
Ingo Molnar711c2e42009-04-08 12:31:26 +020056
Tony Luck5de97c92017-03-27 11:33:03 +020057static DEFINE_MUTEX(mce_log_mutex);
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -080058
Seunghun Hanb3b7c472018-03-06 15:21:43 +010059/* sysfs synchronization */
60static DEFINE_MUTEX(mce_sysfs_mutex);
61
Hidetoshi Seto8968f9d2009-10-13 16:19:41 +090062#define CREATE_TRACE_POINTS
63#include <trace/events/mce.h>
64
Borislav Petkov3f2f0682015-01-13 15:08:51 +010065#define SPINUNIT 100 /* 100ns */
Andi Kleen3c079792009-05-27 21:56:55 +020066
Andi Kleen01ca79f2009-05-27 21:56:52 +020067DEFINE_PER_CPU(unsigned, mce_exception_count);
68
Borislav Petkov14625942012-10-17 12:05:33 +020069struct mce_bank *mce_banks __read_mostly;
Aravind Gopalakrishnanbf80bbd2015-03-23 10:42:52 -050070struct mce_vendor_flags mce_flags __read_mostly;
Andi Kleencebe1822009-07-09 00:31:43 +020071
Borislav Petkovd203f0b2012-10-15 18:03:57 +020072struct mca_config mca_cfg __read_mostly = {
Borislav Petkov84c25592012-10-15 19:59:18 +020073 .bootlog = -1,
Borislav Petkovd203f0b2012-10-15 18:03:57 +020074 /*
75 * Tolerant levels:
76 * 0: always panic on uncorrected errors, log corrected errors
77 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
78 * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
79 * 3: never panic or SIGBUS, log all errors (for testing only)
80 */
Borislav Petkov84c25592012-10-15 19:59:18 +020081 .tolerant = 1,
82 .monarch_timeout = -1
Borislav Petkovd203f0b2012-10-15 18:03:57 +020083};
84
Andi Kleen3c079792009-05-27 21:56:55 +020085static DEFINE_PER_CPU(struct mce, mces_seen);
Tony Luck5de97c92017-03-27 11:33:03 +020086static unsigned long mce_need_notify;
87static int cpu_missing;
Andi Kleen3c079792009-05-27 21:56:55 +020088
Naveen N. Rao06444142013-06-25 23:58:59 +053089/*
90 * MCA banks polled by the period polling timer for corrected events.
91 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
92 */
Andi Kleenee031c32009-02-12 13:49:34 +010093DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
94 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
95};
96
Naveen N. Raoc3d1fb52013-07-01 21:08:47 +053097/*
98 * MCA banks controlled through firmware first for corrected errors.
99 * This is a global list of banks for which we won't enable CMCI and we
100 * won't poll. Firmware controls these banks and is responsible for
101 * reporting corrected errors through GHES. Uncorrected/recoverable
102 * errors are still notified through a machine check.
103 */
104mce_banks_t mce_banks_ce_disabled;
105
Chen, Gong061120a2015-08-12 18:29:35 +0200106static struct work_struct mce_work;
107static struct irq_work mce_irq_work;
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200108
Tony Luck61b0fcc2012-07-19 11:28:46 -0700109static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
110
Borislav Petkov3653ada2011-12-04 15:12:09 +0100111/*
112 * CPU/chipset specific EDAC code can register a notifier call here to print
113 * MCE errors in a human-readable form.
114 */
Vishal Verma0dc9c632017-04-18 20:42:35 +0200115BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
Borislav Petkov3653ada2011-12-04 15:12:09 +0100116
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100117/* Do initial initialization of a struct mce */
118void mce_setup(struct mce *m)
119{
120 memset(m, 0, sizeof(struct mce));
Andi Kleend620c672009-05-27 21:56:56 +0200121 m->cpu = m->extcpu = smp_processor_id();
Arnd Bergmannbc39f012018-06-22 11:54:22 +0200122 /* need the internal __ version to avoid deadlocks */
123 m->time = __ktime_get_real_seconds();
Andi Kleen8ee08342009-05-27 21:56:56 +0200124 m->cpuvendor = boot_cpu_data.x86_vendor;
125 m->cpuid = cpuid_eax(1);
Andi Kleen8ee08342009-05-27 21:56:56 +0200126 m->socketid = cpu_data(m->extcpu).phys_proc_id;
Andi Kleen8ee08342009-05-27 21:56:56 +0200127 m->apicid = cpu_data(m->extcpu).initial_apicid;
128 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
Tony Luck3f5a7892016-11-18 09:48:36 -0800129
130 if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
131 rdmsrl(MSR_PPIN, m->ppin);
Tony Luckfa94d0c2018-03-06 15:21:41 +0100132
133 m->microcode = boot_cpu_data.microcode;
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100134}
135
Andi Kleenea149b32009-04-29 19:31:00 +0200136DEFINE_PER_CPU(struct mce, injectm);
137EXPORT_PER_CPU_SYMBOL_GPL(injectm);
138
Borislav Petkovfe3ed202017-03-27 11:33:00 +0200139void mce_log(struct mce *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140{
Borislav Petkovfe3ed202017-03-27 11:33:00 +0200141 if (!mce_gen_pool_add(m))
Chen, Gongf29a7af2015-08-12 18:29:37 +0200142 irq_work_queue(&mce_irq_work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143}
144
Borislav Petkova79da382015-08-12 18:29:44 +0200145void mce_inject_log(struct mce *m)
Borislav Petkov09371952011-12-08 12:28:33 +0100146{
Tony Luck5de97c92017-03-27 11:33:03 +0200147 mutex_lock(&mce_log_mutex);
Borislav Petkova79da382015-08-12 18:29:44 +0200148 mce_log(m);
Tony Luck5de97c92017-03-27 11:33:03 +0200149 mutex_unlock(&mce_log_mutex);
Borislav Petkov09371952011-12-08 12:28:33 +0100150}
Borislav Petkova79da382015-08-12 18:29:44 +0200151EXPORT_SYMBOL_GPL(mce_inject_log);
Borislav Petkov09371952011-12-08 12:28:33 +0100152
Chen, Gongfd4cf792015-08-12 18:29:36 +0200153static struct notifier_block mce_srao_nb;
Borislav Petkov09371952011-12-08 12:28:33 +0100154
Borislav Petkov011d8262017-03-27 11:33:02 +0200155/*
156 * We run the default notifier if we have only the SRAO, the first and the
157 * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
158 * notifiers registered on the chain.
159 */
160#define NUM_DEFAULT_NOTIFIERS 3
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100161static atomic_t num_notifiers;
162
Borislav Petkov3653ada2011-12-04 15:12:09 +0100163void mce_register_decode_chain(struct notifier_block *nb)
164{
Borislav Petkov415601b2017-04-18 09:33:28 +0200165 if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC))
Borislav Petkov32b40a82017-03-27 11:33:04 +0200166 return;
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100167
Borislav Petkov32b40a82017-03-27 11:33:04 +0200168 atomic_inc(&num_notifiers);
Chen, Gongfd4cf792015-08-12 18:29:36 +0200169
Vishal Verma0dc9c632017-04-18 20:42:35 +0200170 blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
Borislav Petkov3653ada2011-12-04 15:12:09 +0100171}
172EXPORT_SYMBOL_GPL(mce_register_decode_chain);
173
174void mce_unregister_decode_chain(struct notifier_block *nb)
175{
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100176 atomic_dec(&num_notifiers);
177
Vishal Verma0dc9c632017-04-18 20:42:35 +0200178 blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
Borislav Petkov3653ada2011-12-04 15:12:09 +0100179}
180EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
181
Yazen Ghannama9750a32016-04-30 14:33:54 +0200182static inline u32 ctl_reg(int bank)
183{
184 return MSR_IA32_MCx_CTL(bank);
185}
186
187static inline u32 status_reg(int bank)
188{
189 return MSR_IA32_MCx_STATUS(bank);
190}
191
192static inline u32 addr_reg(int bank)
193{
194 return MSR_IA32_MCx_ADDR(bank);
195}
196
197static inline u32 misc_reg(int bank)
198{
199 return MSR_IA32_MCx_MISC(bank);
200}
201
202static inline u32 smca_ctl_reg(int bank)
203{
204 return MSR_AMD64_SMCA_MCx_CTL(bank);
205}
206
207static inline u32 smca_status_reg(int bank)
208{
209 return MSR_AMD64_SMCA_MCx_STATUS(bank);
210}
211
212static inline u32 smca_addr_reg(int bank)
213{
214 return MSR_AMD64_SMCA_MCx_ADDR(bank);
215}
216
217static inline u32 smca_misc_reg(int bank)
218{
219 return MSR_AMD64_SMCA_MCx_MISC(bank);
220}
221
222struct mca_msr_regs msr_ops = {
223 .ctl = ctl_reg,
224 .status = status_reg,
225 .addr = addr_reg,
226 .misc = misc_reg
227};
228
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100229static void __print_mce(struct mce *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230{
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100231 pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
232 m->extcpu,
233 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
234 m->mcgstatus, m->bank, m->status);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200235
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100236 if (m->ip) {
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800237 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200238 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100239 m->cs, m->ip);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 if (m->cs == __KERNEL_CS)
Borislav Petkovc80c5ec2018-02-10 15:53:14 +0100242 pr_cont("{%pS}", (void *)(unsigned long)m->ip);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200243 pr_cont("\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 }
Borislav Petkov549d0422009-07-24 13:51:42 +0200245
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800246 pr_emerg(HW_ERR "TSC %llx ", m->tsc);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200247 if (m->addr)
248 pr_cont("ADDR %llx ", m->addr);
249 if (m->misc)
250 pr_cont("MISC %llx ", m->misc);
251
Yazen Ghannam4b711f92016-09-12 09:59:38 +0200252 if (mce_flags.smca) {
253 if (m->synd)
254 pr_cont("SYND %llx ", m->synd);
255 if (m->ipid)
256 pr_cont("IPID %llx ", m->ipid);
257 }
258
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200259 pr_cont("\n");
Andi Kleen506ed6b2011-10-12 17:46:33 -0700260 /*
261 * Note this output is parsed by external tools and old fields
262 * should not be changed.
263 */
Borislav Petkov881e23e2011-10-17 16:45:10 +0200264 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
Andi Kleen506ed6b2011-10-12 17:46:33 -0700265 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
Tony Luckfa94d0c2018-03-06 15:21:41 +0100266 m->microcode);
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100267}
268
269static void print_mce(struct mce *m)
270{
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100271 __print_mce(m);
Borislav Petkovb2fbf6f2018-02-21 11:18:55 +0100272
Pu Wenac78bd72018-09-23 17:36:04 +0800273 if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
Borislav Petkovb2fbf6f2018-02-21 11:18:55 +0100274 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
Andi Kleen86503562009-05-27 21:56:58 +0200275}
276
Andi Kleenf94b61c2009-05-27 21:56:55 +0200277#define PANIC_TIMEOUT 5 /* 5 seconds */
278
Borislav Petkovc7c9b392014-12-03 22:36:45 +0100279static atomic_t mce_panicked;
Andi Kleenf94b61c2009-05-27 21:56:55 +0200280
Huang Yingbf783f92009-07-31 09:41:43 +0800281static int fake_panic;
Borislav Petkovc7c9b392014-12-03 22:36:45 +0100282static atomic_t mce_fake_panicked;
Huang Yingbf783f92009-07-31 09:41:43 +0800283
Andi Kleenf94b61c2009-05-27 21:56:55 +0200284/* Panic in progress. Enable interrupts and wait for final IPI */
285static void wait_for_panic(void)
286{
287 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200288
Andi Kleenf94b61c2009-05-27 21:56:55 +0200289 preempt_disable();
290 local_irq_enable();
291 while (timeout-- > 0)
292 udelay(1);
Andi Kleen29b0f592009-05-27 21:56:56 +0200293 if (panic_timeout == 0)
Borislav Petkov7af19e42012-10-15 20:25:17 +0200294 panic_timeout = mca_cfg.panic_timeout;
Andi Kleenf94b61c2009-05-27 21:56:55 +0200295 panic("Panicing machine check CPU died");
296}
297
Andy Lutomirski6c80f872014-12-21 08:18:25 -0800298static void mce_panic(const char *msg, struct mce *final, char *exp)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200299{
Tony Luck5541c932016-04-30 14:33:56 +0200300 int apei_err = 0;
301 struct llist_node *pending;
302 struct mce_evt_llist *l;
Tim Hockine02e68d2007-07-21 17:10:36 +0200303
Huang Yingbf783f92009-07-31 09:41:43 +0800304 if (!fake_panic) {
305 /*
306 * Make sure only one CPU runs in machine check panic
307 */
Borislav Petkovc7c9b392014-12-03 22:36:45 +0100308 if (atomic_inc_return(&mce_panicked) > 1)
Huang Yingbf783f92009-07-31 09:41:43 +0800309 wait_for_panic();
310 barrier();
Andi Kleenf94b61c2009-05-27 21:56:55 +0200311
Huang Yingbf783f92009-07-31 09:41:43 +0800312 bust_spinlocks(1);
313 console_verbose();
314 } else {
315 /* Don't log too much for fake panic */
Borislav Petkovc7c9b392014-12-03 22:36:45 +0100316 if (atomic_inc_return(&mce_fake_panicked) > 1)
Huang Yingbf783f92009-07-31 09:41:43 +0800317 return;
318 }
Tony Luck5541c932016-04-30 14:33:56 +0200319 pending = mce_gen_pool_prepare_records();
Andi Kleena0189c72009-05-27 21:56:54 +0200320 /* First print corrected ones that are still unlogged */
Tony Luck5541c932016-04-30 14:33:56 +0200321 llist_for_each_entry(l, pending, llnode) {
322 struct mce *m = &l->mce;
Huang Ying482908b2010-05-18 14:35:22 +0800323 if (!(m->status & MCI_STATUS_UC)) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900324 print_mce(m);
Huang Ying482908b2010-05-18 14:35:22 +0800325 if (!apei_err)
326 apei_err = apei_write_mce(m);
327 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 }
Andi Kleena0189c72009-05-27 21:56:54 +0200329 /* Now print uncorrected but with the final one last */
Tony Luck5541c932016-04-30 14:33:56 +0200330 llist_for_each_entry(l, pending, llnode) {
331 struct mce *m = &l->mce;
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900332 if (!(m->status & MCI_STATUS_UC))
333 continue;
Tony Luck5541c932016-04-30 14:33:56 +0200334 if (!final || mce_cmp(m, final)) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900335 print_mce(m);
Huang Ying482908b2010-05-18 14:35:22 +0800336 if (!apei_err)
337 apei_err = apei_write_mce(m);
338 }
Andi Kleena0189c72009-05-27 21:56:54 +0200339 }
Huang Ying482908b2010-05-18 14:35:22 +0800340 if (final) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900341 print_mce(final);
Huang Ying482908b2010-05-18 14:35:22 +0800342 if (!apei_err)
343 apei_err = apei_write_mce(final);
344 }
Andi Kleen3c079792009-05-27 21:56:55 +0200345 if (cpu_missing)
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800346 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200347 if (exp)
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800348 pr_emerg(HW_ERR "Machine check: %s\n", exp);
Huang Yingbf783f92009-07-31 09:41:43 +0800349 if (!fake_panic) {
350 if (panic_timeout == 0)
Borislav Petkov7af19e42012-10-15 20:25:17 +0200351 panic_timeout = mca_cfg.panic_timeout;
Huang Yingbf783f92009-07-31 09:41:43 +0800352 panic(msg);
353 } else
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800354 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200355}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356
Andi Kleenea149b32009-04-29 19:31:00 +0200357/* Support code for software error injection */
358
359static int msr_to_offset(u32 msr)
360{
Tejun Heo0a3aee02010-12-18 16:28:55 +0100361 unsigned bank = __this_cpu_read(injectm.bank);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200362
Borislav Petkov84c25592012-10-15 19:59:18 +0200363 if (msr == mca_cfg.rip_msr)
Andi Kleenea149b32009-04-29 19:31:00 +0200364 return offsetof(struct mce, ip);
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200365 if (msr == msr_ops.status(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200366 return offsetof(struct mce, status);
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200367 if (msr == msr_ops.addr(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200368 return offsetof(struct mce, addr);
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200369 if (msr == msr_ops.misc(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200370 return offsetof(struct mce, misc);
371 if (msr == MSR_IA32_MCG_STATUS)
372 return offsetof(struct mce, mcgstatus);
373 return -1;
374}
375
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200376/* MSR access wrappers used for error injection */
377static u64 mce_rdmsrl(u32 msr)
378{
379 u64 v;
Ingo Molnar11868a22009-09-23 17:49:55 +0200380
Tejun Heo0a3aee02010-12-18 16:28:55 +0100381 if (__this_cpu_read(injectm.finished)) {
Andi Kleenea149b32009-04-29 19:31:00 +0200382 int offset = msr_to_offset(msr);
Ingo Molnar11868a22009-09-23 17:49:55 +0200383
Andi Kleenea149b32009-04-29 19:31:00 +0200384 if (offset < 0)
385 return 0;
Christoph Lameter89cbc762014-08-17 12:30:40 -0500386 return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
Andi Kleenea149b32009-04-29 19:31:00 +0200387 }
Ingo Molnar11868a22009-09-23 17:49:55 +0200388
389 if (rdmsrl_safe(msr, &v)) {
Borislav Petkov38c54cc2016-07-08 11:09:41 +0200390 WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
Ingo Molnar11868a22009-09-23 17:49:55 +0200391 /*
392 * Return zero in case the access faulted. This should
393 * not happen normally but can happen if the CPU does
394 * something weird, or if the code is buggy.
395 */
396 v = 0;
397 }
398
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200399 return v;
400}
401
402static void mce_wrmsrl(u32 msr, u64 v)
403{
Tejun Heo0a3aee02010-12-18 16:28:55 +0100404 if (__this_cpu_read(injectm.finished)) {
Andi Kleenea149b32009-04-29 19:31:00 +0200405 int offset = msr_to_offset(msr);
Ingo Molnar11868a22009-09-23 17:49:55 +0200406
Andi Kleenea149b32009-04-29 19:31:00 +0200407 if (offset >= 0)
Christoph Lameter89cbc762014-08-17 12:30:40 -0500408 *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
Andi Kleenea149b32009-04-29 19:31:00 +0200409 return;
410 }
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200411 wrmsrl(msr, v);
412}
413
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200414/*
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900415 * Collect all global (w.r.t. this processor) status about this machine
416 * check into our "mce" struct so that we can use it later to assess
417 * the severity of the problem as we read per-bank specific details.
418 */
419static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
420{
421 mce_setup(m);
422
423 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
424 if (regs) {
425 /*
426 * Get the address of the instruction at the time of
427 * the machine check error.
428 */
429 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
430 m->ip = regs->ip;
431 m->cs = regs->cs;
Andi Kleena129a7c2010-11-19 13:16:22 +0100432
433 /*
434 * When in VM86 mode make the cs look like ring 3
435 * always. This is a lie, but it's better than passing
436 * the additional vm86 bit around everywhere.
437 */
438 if (v8086_mode(regs))
439 m->cs |= 3;
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900440 }
441 /* Use accurate RIP reporting if available. */
Borislav Petkov84c25592012-10-15 19:59:18 +0200442 if (mca_cfg.rip_msr)
443 m->ip = mce_rdmsrl(mca_cfg.rip_msr);
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900444 }
445}
446
Andi Kleen88ccbed2009-02-12 13:49:36 +0100447int mce_available(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Borislav Petkov14625942012-10-17 12:05:33 +0200449 if (mca_cfg.disabled)
Andi Kleen5b4408f2009-02-12 13:39:30 +0100450 return 0;
Akinobu Mita3d1712c2006-03-24 03:15:11 -0800451 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452}
453
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200454static void mce_schedule_work(void)
455{
Tejun Heoa2c27272016-09-16 15:49:32 -0400456 if (!mce_gen_pool_empty())
Chen, Gong061120a2015-08-12 18:29:35 +0200457 schedule_work(&mce_work);
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200458}
459
Hidetoshi Setob77e70b2011-06-08 10:56:02 +0900460static void mce_irq_work_cb(struct irq_work *entry)
Andi Kleenccc3c312009-05-27 21:56:54 +0200461{
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200462 mce_schedule_work();
Andi Kleenccc3c312009-05-27 21:56:54 +0200463}
Andi Kleenccc3c312009-05-27 21:56:54 +0200464
465static void mce_report_event(struct pt_regs *regs)
466{
467 if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
Andi Kleen9ff36ee2009-05-27 21:56:58 +0200468 mce_notify_irq();
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200469 /*
470 * Triggering the work queue here is just an insurance
471 * policy in case the syscall exit notify handler
472 * doesn't run soon enough or ends up running on the
473 * wrong CPU (can happen when audit sleeps)
474 */
475 mce_schedule_work();
Andi Kleenccc3c312009-05-27 21:56:54 +0200476 return;
477 }
478
Chen, Gong061120a2015-08-12 18:29:35 +0200479 irq_work_queue(&mce_irq_work);
Andi Kleenccc3c312009-05-27 21:56:54 +0200480}
481
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100482/*
483 * Check if the address reported by the CPU is in a format we can parse.
484 * It would be possible to add code for most other cases, but all would
485 * be somewhat complicated (e.g. segment offset would require an instruction
486 * parser). So only support physical addresses up to page granuality for now.
487 */
Vishal Vermae8a308e2018-10-25 18:37:29 -0600488int mce_usable_address(struct mce *m)
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100489{
Borislav Petkovc6a95832017-04-18 20:39:24 +0200490 if (!(m->status & MCI_STATUS_ADDRV))
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100491 return 0;
492
493 /* Checks after this one are Intel-specific: */
494 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
495 return 1;
496
Borislav Petkovc6a95832017-04-18 20:39:24 +0200497 if (!(m->status & MCI_STATUS_MISCV))
498 return 0;
499
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100500 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
501 return 0;
Borislav Petkovc6a95832017-04-18 20:39:24 +0200502
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100503 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
504 return 0;
Borislav Petkovc6a95832017-04-18 20:39:24 +0200505
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100506 return 1;
507}
Vishal Vermae8a308e2018-10-25 18:37:29 -0600508EXPORT_SYMBOL_GPL(mce_usable_address);
Borislav Petkovfeab21f2015-11-24 08:41:20 +0100509
Borislav Petkov2d1f4062017-05-19 11:39:09 +0200510bool mce_is_memory_error(struct mce *m)
Borislav Petkov011d8262017-03-27 11:33:02 +0200511{
Pu Wenac78bd72018-09-23 17:36:04 +0800512 if (m->cpuvendor == X86_VENDOR_AMD ||
513 m->cpuvendor == X86_VENDOR_HYGON) {
Yazen Ghannamc6708d52017-12-18 12:37:13 +0100514 return amd_mce_is_memory_error(m);
Borislav Petkov2d1f4062017-05-19 11:39:09 +0200515 } else if (m->cpuvendor == X86_VENDOR_INTEL) {
Borislav Petkov011d8262017-03-27 11:33:02 +0200516 /*
517 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
518 *
519 * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
520 * indicating a memory error. Bit 8 is used for indicating a
521 * cache hierarchy error. The combination of bit 2 and bit 3
522 * is used for indicating a `generic' cache hierarchy error
523 * But we can't just blindly check the above bits, because if
524 * bit 11 is set, then it is a bus/interconnect error - and
525 * either way the above bits just gives more detail on what
526 * bus/interconnect error happened. Note that bit 12 can be
527 * ignored, as it's the "filter" bit.
528 */
529 return (m->status & 0xef80) == BIT(7) ||
530 (m->status & 0xef00) == BIT(8) ||
531 (m->status & 0xeffc) == 0xc;
532 }
533
534 return false;
535}
Borislav Petkov2d1f4062017-05-19 11:39:09 +0200536EXPORT_SYMBOL_GPL(mce_is_memory_error);
Borislav Petkov011d8262017-03-27 11:33:02 +0200537
Vishal Verma5d96c932018-10-25 18:37:28 -0600538bool mce_is_correctable(struct mce *m)
Yazen Ghannam179eb852017-12-18 12:37:14 +0100539{
540 if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
541 return false;
542
Pu Wenac78bd72018-09-23 17:36:04 +0800543 if (m->cpuvendor == X86_VENDOR_HYGON && m->status & MCI_STATUS_DEFERRED)
544 return false;
545
Yazen Ghannam179eb852017-12-18 12:37:14 +0100546 if (m->status & MCI_STATUS_UC)
547 return false;
548
549 return true;
550}
Vishal Verma5d96c932018-10-25 18:37:28 -0600551EXPORT_SYMBOL_GPL(mce_is_correctable);
Yazen Ghannam179eb852017-12-18 12:37:14 +0100552
Borislav Petkov011d8262017-03-27 11:33:02 +0200553static bool cec_add_mce(struct mce *m)
554{
555 if (!m)
556 return false;
557
558 /* We eat only correctable DRAM errors with usable addresses. */
Borislav Petkov2d1f4062017-05-19 11:39:09 +0200559 if (mce_is_memory_error(m) &&
Yazen Ghannam179eb852017-12-18 12:37:14 +0100560 mce_is_correctable(m) &&
Borislav Petkov011d8262017-03-27 11:33:02 +0200561 mce_usable_address(m))
562 if (!cec_add_elem(m->addr >> PAGE_SHIFT))
563 return true;
564
565 return false;
566}
567
568static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
569 void *data)
570{
571 struct mce *m = (struct mce *)data;
Borislav Petkov011d8262017-03-27 11:33:02 +0200572
573 if (!m)
574 return NOTIFY_DONE;
575
576 if (cec_add_mce(m))
577 return NOTIFY_STOP;
578
579 /* Emit the trace record: */
580 trace_mce_record(m);
581
Borislav Petkov011d8262017-03-27 11:33:02 +0200582 set_bit(0, &mce_need_notify);
583
584 mce_notify_irq();
585
586 return NOTIFY_DONE;
587}
588
589static struct notifier_block first_nb = {
590 .notifier_call = mce_first_notifier,
591 .priority = MCE_PRIO_FIRST,
592};
593
Chen, Gongfd4cf792015-08-12 18:29:36 +0200594static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
595 void *data)
596{
597 struct mce *mce = (struct mce *)data;
598 unsigned long pfn;
599
600 if (!mce)
601 return NOTIFY_DONE;
602
Borislav Petkovc0ec3822015-11-24 08:41:18 +0100603 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
Chen, Gongfd4cf792015-08-12 18:29:36 +0200604 pfn = mce->addr >> PAGE_SHIFT;
Tony Luckfd0e7862018-01-25 14:23:48 -0800605 if (!memory_failure(pfn, 0))
Dan Williams284ce402018-07-13 21:50:32 -0700606 set_mce_nospec(pfn);
Chen, Gongfd4cf792015-08-12 18:29:36 +0200607 }
608
609 return NOTIFY_OK;
610}
611static struct notifier_block mce_srao_nb = {
612 .notifier_call = srao_decode_notifier,
Borislav Petkov9026cc82017-01-23 19:35:14 +0100613 .priority = MCE_PRIO_SRAO,
Chen, Gongfd4cf792015-08-12 18:29:36 +0200614};
615
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100616static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
617 void *data)
618{
619 struct mce *m = (struct mce *)data;
620
621 if (!m)
622 return NOTIFY_DONE;
623
Borislav Petkov011d8262017-03-27 11:33:02 +0200624 if (atomic_read(&num_notifiers) > NUM_DEFAULT_NOTIFIERS)
Andi Kleencc66afe2017-03-27 11:32:59 +0200625 return NOTIFY_DONE;
626
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100627 __print_mce(m);
628
629 return NOTIFY_DONE;
630}
631
632static struct notifier_block mce_default_nb = {
633 .notifier_call = mce_default_notifier,
634 /* lowest prio, we want it to run last. */
Borislav Petkov9026cc82017-01-23 19:35:14 +0100635 .priority = MCE_PRIO_LOWEST,
Borislav Petkovcd9c57c2016-11-01 12:52:27 +0100636};
637
Tony Luck85f926942011-12-13 09:48:13 -0800638/*
639 * Read ADDR and MISC registers.
640 */
641static void mce_read_aux(struct mce *m, int i)
642{
643 if (m->status & MCI_STATUS_MISCV)
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200644 m->misc = mce_rdmsrl(msr_ops.misc(i));
Yazen Ghannamdb819d62016-09-12 09:59:28 +0200645
Tony Luck85f926942011-12-13 09:48:13 -0800646 if (m->status & MCI_STATUS_ADDRV) {
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200647 m->addr = mce_rdmsrl(msr_ops.addr(i));
Tony Luck85f926942011-12-13 09:48:13 -0800648
649 /*
650 * Mask the reported address by the reported granularity.
651 */
Borislav Petkov14625942012-10-17 12:05:33 +0200652 if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
Tony Luck85f926942011-12-13 09:48:13 -0800653 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
654 m->addr >>= shift;
655 m->addr <<= shift;
656 }
Yazen Ghannam4f29b732016-09-12 09:59:39 +0200657
658 /*
659 * Extract [55:<lsb>] where lsb is the least significant
660 * *valid* bit of the address bits.
661 */
662 if (mce_flags.smca) {
663 u8 lsb = (m->addr >> 56) & 0x3f;
664
665 m->addr &= GENMASK_ULL(55, lsb);
666 }
Tony Luck85f926942011-12-13 09:48:13 -0800667 }
Yazen Ghannamdb819d62016-09-12 09:59:28 +0200668
Yazen Ghannam5828c462016-09-12 09:59:37 +0200669 if (mce_flags.smca) {
670 m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
671
672 if (m->status & MCI_STATUS_SYNDV)
673 m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
674 }
Tony Luck85f926942011-12-13 09:48:13 -0800675}
676
Andi Kleenca84f692009-05-27 21:56:57 +0200677DEFINE_PER_CPU(unsigned, mce_poll_count);
678
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200679/*
Andi Kleenb79109c2009-02-12 13:43:23 +0100680 * Poll for corrected events or events that happened before reset.
681 * Those are just logged through /dev/mcelog.
682 *
683 * This is executed in standard interrupt context.
Andi Kleened7290d2009-05-27 21:56:57 +0200684 *
685 * Note: spec recommends to panic for fatal unsignalled
686 * errors here. However this would be quite problematic --
687 * we would need to reimplement the Monarch handling and
688 * it would mess up the exclusion between exception handler
689 * and poll hander -- * so we skip this for now.
690 * These cases should not happen anyways, or only when the CPU
691 * is already totally * confused. In this case it's likely it will
692 * not fully execute the machine check handler either.
Andi Kleenb79109c2009-02-12 13:43:23 +0100693 */
Borislav Petkov3f2f0682015-01-13 15:08:51 +0100694bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
Andi Kleenb79109c2009-02-12 13:43:23 +0100695{
Tony Luck8b389372015-11-24 08:41:17 +0100696 bool error_seen = false;
Andi Kleenb79109c2009-02-12 13:43:23 +0100697 struct mce m;
698 int i;
699
Alex Shic6ae41e2012-05-11 15:35:27 +0800700 this_cpu_inc(mce_poll_count);
Andi Kleenca84f692009-05-27 21:56:57 +0200701
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900702 mce_gather_info(&m, NULL);
Andi Kleenb79109c2009-02-12 13:43:23 +0100703
Borislav Petkov669c00f2017-01-23 19:35:09 +0100704 if (flags & MCP_TIMESTAMP)
705 m.tsc = rdtsc();
Borislav Petkov54467352016-11-10 14:10:53 +0100706
Borislav Petkovd203f0b2012-10-15 18:03:57 +0200707 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +0200708 if (!mce_banks[i].ctl || !test_bit(i, *b))
Andi Kleenb79109c2009-02-12 13:43:23 +0100709 continue;
710
711 m.misc = 0;
712 m.addr = 0;
713 m.bank = i;
Andi Kleenb79109c2009-02-12 13:43:23 +0100714
715 barrier();
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200716 m.status = mce_rdmsrl(msr_ops.status(i));
Andi Kleenb79109c2009-02-12 13:43:23 +0100717 if (!(m.status & MCI_STATUS_VAL))
718 continue;
719
720 /*
Andi Kleened7290d2009-05-27 21:56:57 +0200721 * Uncorrected or signalled events are handled by the exception
722 * handler when it is enabled, so don't process those here.
Andi Kleenb79109c2009-02-12 13:43:23 +0100723 *
724 * TBD do the same check for MCI_STATUS_EN here?
725 */
Andi Kleened7290d2009-05-27 21:56:57 +0200726 if (!(flags & MCP_UC) &&
Borislav Petkov14625942012-10-17 12:05:33 +0200727 (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
Andi Kleenb79109c2009-02-12 13:43:23 +0100728 continue;
729
Tony Luck8b389372015-11-24 08:41:17 +0100730 error_seen = true;
731
Tony Luck85f926942011-12-13 09:48:13 -0800732 mce_read_aux(&m, i);
Andi Kleenb79109c2009-02-12 13:43:23 +0100733
Yazen Ghanname2de64e2017-06-26 14:35:31 +0200734 m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
Chen Yucongfa92c582014-11-18 10:09:20 +0800735
Andi Kleenb79109c2009-02-12 13:43:23 +0100736 /*
737 * Don't get the IP here because it's unlikely to
738 * have anything to do with the actual error location.
739 */
Tony Luck8b389372015-11-24 08:41:17 +0100740 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
Andi Kleen5679af42009-04-07 17:06:55 +0200741 mce_log(&m);
Borislav Petkovc0ec3822015-11-24 08:41:18 +0100742 else if (mce_usable_address(&m)) {
Tony Luck8b389372015-11-24 08:41:17 +0100743 /*
744 * Although we skipped logging this, we still want
745 * to take action. Add to the pool so the registered
746 * notifiers will see it.
747 */
748 if (!mce_gen_pool_add(&m))
749 mce_schedule_work();
Borislav Petkov3f2f0682015-01-13 15:08:51 +0100750 }
Andi Kleenb79109c2009-02-12 13:43:23 +0100751
752 /*
753 * Clear state for this bank.
754 */
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200755 mce_wrmsrl(msr_ops.status(i), 0);
Andi Kleenb79109c2009-02-12 13:43:23 +0100756 }
757
758 /*
759 * Don't clear MCG_STATUS here because it's only defined for
760 * exceptions.
761 */
Andi Kleen88921be2009-05-27 21:56:51 +0200762
763 sync_core();
Borislav Petkov3f2f0682015-01-13 15:08:51 +0100764
Tony Luck8b389372015-11-24 08:41:17 +0100765 return error_seen;
Andi Kleenb79109c2009-02-12 13:43:23 +0100766}
Andi Kleenea149b32009-04-29 19:31:00 +0200767EXPORT_SYMBOL_GPL(machine_check_poll);
Andi Kleenb79109c2009-02-12 13:43:23 +0100768
769/*
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200770 * Do a quick check if any of the events requires a panic.
771 * This decides if we keep the events around or clear them.
772 */
Tony Luck61b0fcc2012-07-19 11:28:46 -0700773static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
774 struct pt_regs *regs)
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200775{
Borislav Petkov17fea542015-05-18 10:07:17 +0200776 char *tmp;
Borislav Petkov1f74c8a2018-06-22 11:54:28 +0200777 int i;
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200778
Borislav Petkovd203f0b2012-10-15 18:03:57 +0200779 for (i = 0; i < mca_cfg.banks; i++) {
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +0200780 m->status = mce_rdmsrl(msr_ops.status(i));
Borislav Petkov1f74c8a2018-06-22 11:54:28 +0200781 if (!(m->status & MCI_STATUS_VAL))
782 continue;
783
784 __set_bit(i, validp);
785 if (quirk_no_way_out)
786 quirk_no_way_out(i, m, regs);
Borislav Petkov17fea542015-05-18 10:07:17 +0200787
788 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
Borislav Petkov1f74c8a2018-06-22 11:54:28 +0200789 mce_read_aux(m, i);
Borislav Petkov17fea542015-05-18 10:07:17 +0200790 *msg = tmp;
Borislav Petkov1f74c8a2018-06-22 11:54:28 +0200791 return 1;
Borislav Petkov17fea542015-05-18 10:07:17 +0200792 }
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200793 }
Borislav Petkov1f74c8a2018-06-22 11:54:28 +0200794 return 0;
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200795}
796
797/*
Andi Kleen3c079792009-05-27 21:56:55 +0200798 * Variable to establish order between CPUs while scanning.
799 * Each CPU spins initially until executing is equal its number.
800 */
801static atomic_t mce_executing;
802
803/*
804 * Defines order of CPUs on entry. First CPU becomes Monarch.
805 */
806static atomic_t mce_callin;
807
808/*
809 * Check if a timeout waiting for other CPUs happened.
810 */
Andy Lutomirski6c80f872014-12-21 08:18:25 -0800811static int mce_timed_out(u64 *t, const char *msg)
Andi Kleen3c079792009-05-27 21:56:55 +0200812{
813 /*
814 * The others already did panic for some reason.
815 * Bail out like in a timeout.
816 * rmb() to tell the compiler that system_state
817 * might have been modified by someone else.
818 */
819 rmb();
Borislav Petkovc7c9b392014-12-03 22:36:45 +0100820 if (atomic_read(&mce_panicked))
Andi Kleen3c079792009-05-27 21:56:55 +0200821 wait_for_panic();
Borislav Petkov84c25592012-10-15 19:59:18 +0200822 if (!mca_cfg.monarch_timeout)
Andi Kleen3c079792009-05-27 21:56:55 +0200823 goto out;
824 if ((s64)*t < SPINUNIT) {
Borislav Petkov716079f2014-05-23 11:06:35 +0200825 if (mca_cfg.tolerant <= 1)
Andy Lutomirski6c80f872014-12-21 08:18:25 -0800826 mce_panic(msg, NULL, NULL);
Andi Kleen3c079792009-05-27 21:56:55 +0200827 cpu_missing = 1;
828 return 1;
829 }
830 *t -= SPINUNIT;
831out:
832 touch_nmi_watchdog();
833 return 0;
834}
835
836/*
837 * The Monarch's reign. The Monarch is the CPU who entered
838 * the machine check handler first. It waits for the others to
839 * raise the exception too and then grades them. When any
840 * error is fatal panic. Only then let the others continue.
841 *
842 * The other CPUs entering the MCE handler will be controlled by the
843 * Monarch. They are called Subjects.
844 *
845 * This way we prevent any potential data corruption in a unrecoverable case
846 * and also makes sure always all CPU's errors are examined.
847 *
Hidetoshi Seto680b6cf2009-08-26 16:20:36 +0900848 * Also this detects the case of a machine check event coming from outer
Andi Kleen3c079792009-05-27 21:56:55 +0200849 * space (not detected by any CPUs) In this case some external agent wants
850 * us to shut down, so panic too.
851 *
852 * The other CPUs might still decide to panic if the handler happens
853 * in a unrecoverable place, but in this case the system is in a semi-stable
854 * state and won't corrupt anything by itself. It's ok to let the others
855 * continue for a bit first.
856 *
857 * All the spin loops have timeouts; when a timeout happens a CPU
858 * typically elects itself to be Monarch.
859 */
860static void mce_reign(void)
861{
862 int cpu;
863 struct mce *m = NULL;
864 int global_worst = 0;
865 char *msg = NULL;
866 char *nmsg = NULL;
867
868 /*
869 * This CPU is the Monarch and the other CPUs have run
870 * through their handlers.
871 * Grade the severity of the errors of all the CPUs.
872 */
873 for_each_possible_cpu(cpu) {
Borislav Petkovd203f0b2012-10-15 18:03:57 +0200874 int severity = mce_severity(&per_cpu(mces_seen, cpu),
875 mca_cfg.tolerant,
Chen Yuconge3480272014-11-18 10:09:19 +0800876 &nmsg, true);
Andi Kleen3c079792009-05-27 21:56:55 +0200877 if (severity > global_worst) {
878 msg = nmsg;
879 global_worst = severity;
880 m = &per_cpu(mces_seen, cpu);
881 }
882 }
883
884 /*
885 * Cannot recover? Panic here then.
886 * This dumps all the mces in the log buffer and stops the
887 * other CPUs.
888 */
Borislav Petkovd203f0b2012-10-15 18:03:57 +0200889 if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
Derek Che8af70432015-02-02 10:30:21 -0800890 mce_panic("Fatal machine check", m, msg);
Andi Kleen3c079792009-05-27 21:56:55 +0200891
892 /*
893 * For UC somewhere we let the CPU who detects it handle it.
894 * Also must let continue the others, otherwise the handling
895 * CPU could deadlock on a lock.
896 */
897
898 /*
899 * No machine check event found. Must be some external
900 * source or one CPU is hung. Panic.
901 */
Borislav Petkovd203f0b2012-10-15 18:03:57 +0200902 if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
Derek Che8af70432015-02-02 10:30:21 -0800903 mce_panic("Fatal machine check from unknown source", NULL, NULL);
Andi Kleen3c079792009-05-27 21:56:55 +0200904
905 /*
906 * Now clear all the mces_seen so that they don't reappear on
907 * the next mce.
908 */
909 for_each_possible_cpu(cpu)
910 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
911}
912
913static atomic_t global_nwo;
914
915/*
916 * Start of Monarch synchronization. This waits until all CPUs have
917 * entered the exception handler and then determines if any of them
918 * saw a fatal event that requires panic. Then it executes them
919 * in the entry order.
920 * TBD double check parallel CPU hotunplug
921 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900922static int mce_start(int *no_way_out)
Andi Kleen3c079792009-05-27 21:56:55 +0200923{
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900924 int order;
Andi Kleen3c079792009-05-27 21:56:55 +0200925 int cpus = num_online_cpus();
Borislav Petkov84c25592012-10-15 19:59:18 +0200926 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
Andi Kleen3c079792009-05-27 21:56:55 +0200927
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900928 if (!timeout)
929 return -1;
Andi Kleen3c079792009-05-27 21:56:55 +0200930
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900931 atomic_add(*no_way_out, &global_nwo);
Huang Ying184e1fd2009-06-15 15:37:07 +0800932 /*
Davidlohr Buesobf92b1f2016-04-06 10:05:15 +0200933 * Rely on the implied barrier below, such that global_nwo
934 * is updated before mce_callin.
Huang Ying184e1fd2009-06-15 15:37:07 +0800935 */
Borislav Petkova95436e2009-06-20 23:28:22 -0700936 order = atomic_inc_return(&mce_callin);
Andi Kleen3c079792009-05-27 21:56:55 +0200937
938 /*
939 * Wait for everyone.
940 */
941 while (atomic_read(&mce_callin) != cpus) {
Andy Lutomirski6c80f872014-12-21 08:18:25 -0800942 if (mce_timed_out(&timeout,
943 "Timeout: Not all CPUs entered broadcast exception handler")) {
Andi Kleen3c079792009-05-27 21:56:55 +0200944 atomic_set(&global_nwo, 0);
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900945 return -1;
Andi Kleen3c079792009-05-27 21:56:55 +0200946 }
947 ndelay(SPINUNIT);
948 }
949
950 /*
Huang Ying184e1fd2009-06-15 15:37:07 +0800951 * mce_callin should be read before global_nwo
952 */
953 smp_rmb();
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900954
955 if (order == 1) {
956 /*
957 * Monarch: Starts executing now, the others wait.
958 */
959 atomic_set(&mce_executing, 1);
960 } else {
961 /*
962 * Subject: Now start the scanning loop one by one in
963 * the original callin order.
964 * This way when there are any shared banks it will be
965 * only seen by one CPU before cleared, avoiding duplicates.
966 */
967 while (atomic_read(&mce_executing) < order) {
Andy Lutomirski6c80f872014-12-21 08:18:25 -0800968 if (mce_timed_out(&timeout,
969 "Timeout: Subject CPUs unable to finish machine check processing")) {
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900970 atomic_set(&global_nwo, 0);
971 return -1;
972 }
973 ndelay(SPINUNIT);
974 }
975 }
976
Huang Ying184e1fd2009-06-15 15:37:07 +0800977 /*
Andi Kleen3c079792009-05-27 21:56:55 +0200978 * Cache the global no_way_out state.
979 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900980 *no_way_out = atomic_read(&global_nwo);
Andi Kleen3c079792009-05-27 21:56:55 +0200981
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900982 return order;
Andi Kleen3c079792009-05-27 21:56:55 +0200983}
984
985/*
986 * Synchronize between CPUs after main scanning loop.
987 * This invokes the bulk of the Monarch processing.
988 */
989static int mce_end(int order)
990{
991 int ret = -1;
Borislav Petkov84c25592012-10-15 19:59:18 +0200992 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
Andi Kleen3c079792009-05-27 21:56:55 +0200993
994 if (!timeout)
995 goto reset;
996 if (order < 0)
997 goto reset;
998
999 /*
1000 * Allow others to run.
1001 */
1002 atomic_inc(&mce_executing);
1003
1004 if (order == 1) {
1005 /* CHECKME: Can this race with a parallel hotplug? */
1006 int cpus = num_online_cpus();
1007
1008 /*
1009 * Monarch: Wait for everyone to go through their scanning
1010 * loops.
1011 */
1012 while (atomic_read(&mce_executing) <= cpus) {
Andy Lutomirski6c80f872014-12-21 08:18:25 -08001013 if (mce_timed_out(&timeout,
1014 "Timeout: Monarch CPU unable to finish machine check processing"))
Andi Kleen3c079792009-05-27 21:56:55 +02001015 goto reset;
1016 ndelay(SPINUNIT);
1017 }
1018
1019 mce_reign();
1020 barrier();
1021 ret = 0;
1022 } else {
1023 /*
1024 * Subject: Wait for Monarch to finish.
1025 */
1026 while (atomic_read(&mce_executing) != 0) {
Andy Lutomirski6c80f872014-12-21 08:18:25 -08001027 if (mce_timed_out(&timeout,
1028 "Timeout: Monarch CPU did not finish machine check processing"))
Andi Kleen3c079792009-05-27 21:56:55 +02001029 goto reset;
1030 ndelay(SPINUNIT);
1031 }
1032
1033 /*
1034 * Don't reset anything. That's done by the Monarch.
1035 */
1036 return 0;
1037 }
1038
1039 /*
1040 * Reset all global state.
1041 */
1042reset:
1043 atomic_set(&global_nwo, 0);
1044 atomic_set(&mce_callin, 0);
1045 barrier();
1046
1047 /*
1048 * Let others run again.
1049 */
1050 atomic_set(&mce_executing, 0);
1051 return ret;
1052}
1053
1054static void mce_clear_state(unsigned long *toclear)
1055{
1056 int i;
1057
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001058 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleen3c079792009-05-27 21:56:55 +02001059 if (test_bit(i, toclear))
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +02001060 mce_wrmsrl(msr_ops.status(i), 0);
Andi Kleen3c079792009-05-27 21:56:55 +02001061 }
1062}
1063
Tony Luckb2f9d672016-02-17 10:20:13 -08001064static int do_memory_failure(struct mce *m)
1065{
1066 int flags = MF_ACTION_REQUIRED;
1067 int ret;
1068
1069 pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
1070 if (!(m->mcgstatus & MCG_STATUS_RIPV))
1071 flags |= MF_MUST_KILL;
Eric W. Biederman83b57532017-07-09 18:14:01 -05001072 ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
Tony Luckb2f9d672016-02-17 10:20:13 -08001073 if (ret)
1074 pr_err("Memory error not recovered");
Tony Luckfd0e7862018-01-25 14:23:48 -08001075 else
Dan Williams284ce402018-07-13 21:50:32 -07001076 set_mce_nospec(m->addr >> PAGE_SHIFT);
Tony Luckb2f9d672016-02-17 10:20:13 -08001077 return ret;
1078}
1079
Borislav Petkovd3d69232018-06-22 11:54:24 +02001080
1081/*
1082 * Cases where we avoid rendezvous handler timeout:
1083 * 1) If this CPU is offline.
1084 *
1085 * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
1086 * skip those CPUs which remain looping in the 1st kernel - see
1087 * crash_nmi_callback().
1088 *
1089 * Note: there still is a small window between kexec-ing and the new,
1090 * kdump kernel establishing a new #MC handler where a broadcasted MCE
1091 * might not get handled properly.
1092 */
1093static bool __mc_check_crashing_cpu(int cpu)
1094{
1095 if (cpu_is_offline(cpu) ||
1096 (crashing_cpu != -1 && crashing_cpu != cpu)) {
1097 u64 mcgstatus;
1098
1099 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
1100 if (mcgstatus & MCG_STATUS_RIPV) {
1101 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1102 return true;
1103 }
1104 }
1105 return false;
1106}
1107
Borislav Petkovf35565e2018-06-22 11:54:26 +02001108static void __mc_scan_banks(struct mce *m, struct mce *final,
1109 unsigned long *toclear, unsigned long *valid_banks,
1110 int no_way_out, int *worst)
1111{
1112 struct mca_config *cfg = &mca_cfg;
1113 int severity, i;
1114
1115 for (i = 0; i < cfg->banks; i++) {
1116 __clear_bit(i, toclear);
1117 if (!test_bit(i, valid_banks))
1118 continue;
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001119
Borislav Petkovf35565e2018-06-22 11:54:26 +02001120 if (!mce_banks[i].ctl)
1121 continue;
1122
1123 m->misc = 0;
1124 m->addr = 0;
1125 m->bank = i;
1126
1127 m->status = mce_rdmsrl(msr_ops.status(i));
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001128 if (!(m->status & MCI_STATUS_VAL))
Borislav Petkovf35565e2018-06-22 11:54:26 +02001129 continue;
1130
1131 /*
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001132 * Corrected or non-signaled errors are handled by
1133 * machine_check_poll(). Leave them alone, unless this panics.
Borislav Petkovf35565e2018-06-22 11:54:26 +02001134 */
1135 if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1136 !no_way_out)
1137 continue;
1138
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001139 /* Set taint even when machine check was not enabled. */
Borislav Petkovf35565e2018-06-22 11:54:26 +02001140 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1141
1142 severity = mce_severity(m, cfg->tolerant, NULL, true);
1143
1144 /*
1145 * When machine check was for corrected/deferred handler don't
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001146 * touch, unless we're panicking.
Borislav Petkovf35565e2018-06-22 11:54:26 +02001147 */
1148 if ((severity == MCE_KEEP_SEVERITY ||
1149 severity == MCE_UCNA_SEVERITY) && !no_way_out)
1150 continue;
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001151
Borislav Petkovf35565e2018-06-22 11:54:26 +02001152 __set_bit(i, toclear);
Borislav Petkovd5c84ef2018-06-22 11:54:27 +02001153
1154 /* Machine check event was not enabled. Clear, but ignore. */
1155 if (severity == MCE_NO_SEVERITY)
Borislav Petkovf35565e2018-06-22 11:54:26 +02001156 continue;
Borislav Petkovf35565e2018-06-22 11:54:26 +02001157
1158 mce_read_aux(m, i);
1159
1160 /* assuming valid severity level != 0 */
1161 m->severity = severity;
1162
1163 mce_log(m);
1164
1165 if (severity > *worst) {
1166 *final = *m;
1167 *worst = severity;
1168 }
1169 }
1170
1171 /* mce_clear_state will clear *final, save locally for use later */
1172 *m = *final;
1173}
1174
Andi Kleen3c079792009-05-27 21:56:55 +02001175/*
Andi Kleenb79109c2009-02-12 13:43:23 +01001176 * The actual machine check handler. This only handles real
1177 * exceptions when something got corrupted coming in through int 18.
1178 *
1179 * This is executed in NMI context not subject to normal locking rules. This
1180 * implies that most kernel services cannot be safely used. Don't even
1181 * think about putting a printk in there!
Andi Kleen3c079792009-05-27 21:56:55 +02001182 *
1183 * On Intel systems this is entered on all CPUs in parallel through
1184 * MCE broadcast. However some CPUs might be broken beyond repair,
1185 * so be always careful when synchronizing with others.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 */
Ingo Molnare9eee032009-04-08 12:31:17 +02001187void do_machine_check(struct pt_regs *regs, long error_code)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188{
Borislav Petkovd3d69232018-06-22 11:54:24 +02001189 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1190 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
Borislav Petkov14625942012-10-17 12:05:33 +02001191 struct mca_config *cfg = &mca_cfg;
Borislav Petkovd3d69232018-06-22 11:54:24 +02001192 int cpu = smp_processor_id();
1193 char *msg = "Unknown";
Andi Kleen3c079792009-05-27 21:56:55 +02001194 struct mce m, *final;
Andi Kleen3c079792009-05-27 21:56:55 +02001195 int worst = 0;
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001196
Andi Kleen3c079792009-05-27 21:56:55 +02001197 /*
1198 * Establish sequential order between the CPUs entering the machine
1199 * check handler.
1200 */
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001201 int order = -1;
Borislav Petkovd3d69232018-06-22 11:54:24 +02001202
Tim Hockinbd784322007-07-21 17:10:37 +02001203 /*
1204 * If no_way_out gets set, there is no safe way to recover from this
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001205 * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
Tim Hockinbd784322007-07-21 17:10:37 +02001206 */
1207 int no_way_out = 0;
Borislav Petkovd3d69232018-06-22 11:54:24 +02001208
Tim Hockinbd784322007-07-21 17:10:37 +02001209 /*
1210 * If kill_it gets set, there might be a way to recover from this
1211 * error.
1212 */
1213 int kill_it = 0;
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001214
1215 /*
1216 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
1217 * on Intel.
1218 */
1219 int lmce = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
Borislav Petkovd3d69232018-06-22 11:54:24 +02001221 if (__mc_check_crashing_cpu(cpu))
1222 return;
Ashok Rajd90167a2015-12-10 11:12:26 +01001223
Andy Lutomirski8c840142015-07-03 12:44:32 -07001224 ist_enter(regs);
Andy Lutomirski95927472014-11-19 17:41:09 -08001225
Alex Shic6ae41e2012-05-11 15:35:27 +08001226 this_cpu_inc(mce_exception_count);
Andi Kleen01ca79f2009-05-27 21:56:52 +02001227
Hidetoshi Setob8325c52011-06-08 10:57:46 +09001228 mce_gather_info(&m, regs);
Borislav Petkov669c00f2017-01-23 19:35:09 +01001229 m.tsc = rdtsc();
Andi Kleenb5f2fa42009-02-12 13:43:22 +01001230
Christoph Lameter89cbc762014-08-17 12:30:40 -05001231 final = this_cpu_ptr(&mces_seen);
Andi Kleen3c079792009-05-27 21:56:55 +02001232 *final = m;
1233
Tony Luck95022b82012-04-18 15:19:40 -07001234 memset(valid_banks, 0, sizeof(valid_banks));
Tony Luck61b0fcc2012-07-19 11:28:46 -07001235 no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
Hidetoshi Seto680b6cf2009-08-26 16:20:36 +09001236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 barrier();
1238
Andi Kleen3c079792009-05-27 21:56:55 +02001239 /*
Tony Lucka8c321f2012-01-03 11:45:45 -08001240 * When no restart IP might need to kill or panic.
1241 * Assume the worst for now, but if we find the
1242 * severity is MCE_AR_SEVERITY we have other options.
Andi Kleened7290d2009-05-27 21:56:57 +02001243 */
1244 if (!(m.mcgstatus & MCG_STATUS_RIPV))
1245 kill_it = 1;
1246
1247 /*
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001248 * Check if this MCE is signaled to only this logical processor,
1249 * on Intel only.
Andi Kleen3c079792009-05-27 21:56:55 +02001250 */
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001251 if (m.cpuvendor == X86_VENDOR_INTEL)
1252 lmce = m.mcgstatus & MCG_STATUS_LMCES;
1253
1254 /*
Tony Luck40c36e22018-06-22 11:54:23 +02001255 * Local machine check may already know that we have to panic.
1256 * Broadcast machine check begins rendezvous in mce_start()
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001257 * Go through all banks in exclusion of the other CPUs. This way we
1258 * don't report duplicated events on shared banks because the first one
Tony Luck40c36e22018-06-22 11:54:23 +02001259 * to see it will clear it.
Yazen Ghannamfead35c2016-04-30 14:33:57 +02001260 */
Tony Luck40c36e22018-06-22 11:54:23 +02001261 if (lmce) {
1262 if (no_way_out)
1263 mce_panic("Fatal local machine check", &m, msg);
1264 } else {
Ashok Raj243d6572015-06-04 18:55:24 +02001265 order = mce_start(&no_way_out);
Tony Luck40c36e22018-06-22 11:54:23 +02001266 }
Ashok Raj243d6572015-06-04 18:55:24 +02001267
Borislav Petkovf35565e2018-06-22 11:54:26 +02001268 __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
Tony Lucka8c321f2012-01-03 11:45:45 -08001269
Andi Kleen3c079792009-05-27 21:56:55 +02001270 if (!no_way_out)
1271 mce_clear_state(toclear);
1272
Ingo Molnare9eee032009-04-08 12:31:17 +02001273 /*
Andi Kleen3c079792009-05-27 21:56:55 +02001274 * Do most of the synchronization with other CPUs.
1275 * When there's any problem use only local no_way_out state.
Ingo Molnare9eee032009-04-08 12:31:17 +02001276 */
Ashok Raj243d6572015-06-04 18:55:24 +02001277 if (!lmce) {
1278 if (mce_end(order) < 0)
1279 no_way_out = worst >= MCE_PANIC_SEVERITY;
1280 } else {
1281 /*
Tony Luck40c36e22018-06-22 11:54:23 +02001282 * If there was a fatal machine check we should have
1283 * already called mce_panic earlier in this function.
1284 * Since we re-read the banks, we might have found
1285 * something new. Check again to see if we found a
1286 * fatal error. We call "mce_severity()" again to
1287 * make sure we have the right "msg".
Ashok Raj243d6572015-06-04 18:55:24 +02001288 */
Tony Luck40c36e22018-06-22 11:54:23 +02001289 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
1290 mce_severity(&m, cfg->tolerant, &msg, true);
1291 mce_panic("Local fatal machine check!", &m, msg);
1292 }
Ashok Raj243d6572015-06-04 18:55:24 +02001293 }
Tim Hockinbd784322007-07-21 17:10:37 +02001294
1295 /*
Tony Luckb2f9d672016-02-17 10:20:13 -08001296 * If tolerant is at an insane level we drop requests to kill
1297 * processes and continue even when there is no way out.
Tim Hockinbd784322007-07-21 17:10:37 +02001298 */
Tony Luckb2f9d672016-02-17 10:20:13 -08001299 if (cfg->tolerant == 3)
1300 kill_it = 0;
1301 else if (no_way_out)
1302 mce_panic("Fatal machine check on current CPU", &m, msg);
Tim Hockine02e68d2007-07-21 17:10:36 +02001303
Andi Kleen3c079792009-05-27 21:56:55 +02001304 if (worst > 0)
1305 mce_report_event(regs);
Andi Kleen5f8c1a52009-04-29 19:29:12 +02001306 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
Borislav Petkov45deca72018-06-22 11:54:25 +02001307
Andi Kleen88921be2009-05-27 21:56:51 +02001308 sync_core();
Luck, Tonyd4812e12015-01-05 16:44:42 -08001309
Tony Luckb2f9d672016-02-17 10:20:13 -08001310 if (worst != MCE_AR_SEVERITY && !kill_it)
1311 goto out_ist;
Luck, Tonyd4812e12015-01-05 16:44:42 -08001312
Tony Luckb2f9d672016-02-17 10:20:13 -08001313 /* Fault was in user mode and we need to take some action */
1314 if ((m.cs & 3) == 3) {
1315 ist_begin_non_atomic(regs);
1316 local_irq_enable();
1317
1318 if (kill_it || do_memory_failure(&m))
1319 force_sig(SIGBUS, current);
1320 local_irq_disable();
1321 ist_end_non_atomic();
1322 } else {
Jann Horn81fd9c12018-08-28 22:14:19 +02001323 if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
Tony Luckb2f9d672016-02-17 10:20:13 -08001324 mce_panic("Failed kernel mode recovery", &m, NULL);
Luck, Tonyd4812e12015-01-05 16:44:42 -08001325 }
Tony Luckb2f9d672016-02-17 10:20:13 -08001326
1327out_ist:
Andy Lutomirski8c840142015-07-03 12:44:32 -07001328 ist_exit(regs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329}
Andi Kleenea149b32009-04-29 19:31:00 +02001330EXPORT_SYMBOL_GPL(do_machine_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
Tony Luckcd42f4a2011-12-15 10:48:12 -08001332#ifndef CONFIG_MEMORY_FAILURE
Eric W. Biederman83b57532017-07-09 18:14:01 -05001333int memory_failure(unsigned long pfn, int flags)
Andi Kleen9b1beaf2009-05-27 21:56:59 +02001334{
Tony Lucka8c321f2012-01-03 11:45:45 -08001335 /* mce_severity() should not hand us an ACTION_REQUIRED error */
1336 BUG_ON(flags & MF_ACTION_REQUIRED);
Joe Perchesc767a542012-05-21 19:50:07 -07001337 pr_err("Uncorrected memory error in page 0x%lx ignored\n"
1338 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
1339 pfn);
Tony Luckcd42f4a2011-12-15 10:48:12 -08001340
1341 return 0;
Andi Kleen9b1beaf2009-05-27 21:56:59 +02001342}
Tony Luckcd42f4a2011-12-15 10:48:12 -08001343#endif
Andi Kleen9b1beaf2009-05-27 21:56:59 +02001344
1345/*
Tim Hockin8a336b02007-05-02 19:27:19 +02001346 * Periodic polling timer for "silent" machine check errors. If the
1347 * poller finds an MCE, poll 2x faster. When the poller finds no more
1348 * errors, poll 2x slower (up to check_interval seconds).
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 */
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001350static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
Ingo Molnare9eee032009-04-08 12:31:17 +02001351
Thomas Gleixner82f7af02012-05-24 17:54:51 +00001352static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
Andi Kleen52d168e2009-02-12 13:39:29 +01001353static DEFINE_PER_CPU(struct timer_list, mce_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
Chen Gong55babd82012-08-09 11:44:51 -07001355static unsigned long mce_adjust_timer_default(unsigned long interval)
1356{
1357 return interval;
1358}
1359
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001360static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
Chen Gong55babd82012-08-09 11:44:51 -07001361
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001362static void __start_timer(struct timer_list *t, unsigned long interval)
Chen, Gong27f6c572014-03-27 21:24:36 -04001363{
Chen Gong55babd82012-08-09 11:44:51 -07001364 unsigned long when = jiffies + interval;
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001365 unsigned long flags;
1366
1367 local_irq_save(flags);
Chen Gong55babd82012-08-09 11:44:51 -07001368
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001369 if (!timer_pending(t) || time_before(when, t->expires))
1370 mod_timer(t, round_jiffies(when));
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001371
1372 local_irq_restore(flags);
1373}
1374
Kees Cook92bb6cb2017-10-04 17:54:25 -07001375static void mce_timer_fn(struct timer_list *t)
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001376{
Kees Cook92bb6cb2017-10-04 17:54:25 -07001377 struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001378 unsigned long iv;
1379
Kees Cook92bb6cb2017-10-04 17:54:25 -07001380 WARN_ON(cpu_t != t);
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001381
1382 iv = __this_cpu_read(mce_next_interval);
1383
1384 if (mce_available(this_cpu_ptr(&cpu_info))) {
Borislav Petkov54467352016-11-10 14:10:53 +01001385 machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001386
1387 if (mce_intel_cmci_poll()) {
1388 iv = mce_adjust_timer(iv);
1389 goto done;
1390 }
1391 }
1392
1393 /*
1394 * Alert userspace if needed. If we logged an MCE, reduce the polling
1395 * interval, otherwise increase the polling interval.
1396 */
1397 if (mce_notify_irq())
1398 iv = max(iv / 2, (unsigned long) HZ/100);
1399 else
1400 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1401
1402done:
1403 __this_cpu_write(mce_next_interval, iv);
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001404 __start_timer(t, iv);
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001405}
1406
1407/*
1408 * Ensure that the timer is firing in @interval from now.
1409 */
1410void mce_timer_kick(unsigned long interval)
1411{
1412 struct timer_list *t = this_cpu_ptr(&mce_timer);
1413 unsigned long iv = __this_cpu_read(mce_next_interval);
1414
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001415 __start_timer(t, interval);
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001416
Chen Gong55babd82012-08-09 11:44:51 -07001417 if (interval < iv)
1418 __this_cpu_write(mce_next_interval, interval);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419}
1420
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001421/* Must not be called in IRQ context where del_timer_sync() can deadlock */
1422static void mce_timer_delete_all(void)
1423{
1424 int cpu;
1425
1426 for_each_online_cpu(cpu)
1427 del_timer_sync(&per_cpu(mce_timer, cpu));
1428}
1429
Tim Hockine02e68d2007-07-21 17:10:36 +02001430/*
Andi Kleen9bd98402009-02-12 13:39:28 +01001431 * Notify the user(s) about new machine check events.
1432 * Can be called from interrupt context, but not from machine check/NMI
1433 * context.
Tim Hockine02e68d2007-07-21 17:10:36 +02001434 */
Andi Kleen9ff36ee2009-05-27 21:56:58 +02001435int mce_notify_irq(void)
Tim Hockine02e68d2007-07-21 17:10:36 +02001436{
Andi Kleen8457c842009-02-12 13:49:33 +01001437 /* Not more than two messages every minute */
1438 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1439
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001440 if (test_and_clear_bit(0, &mce_need_notify)) {
Tony Luck5de97c92017-03-27 11:33:03 +02001441 mce_work_trigger();
Tim Hockine02e68d2007-07-21 17:10:36 +02001442
Andi Kleen8457c842009-02-12 13:49:33 +01001443 if (__ratelimit(&ratelimit))
Huang Yinga2d7b0d2010-06-08 14:35:39 +08001444 pr_info(HW_ERR "Machine check events logged\n");
Tim Hockine02e68d2007-07-21 17:10:36 +02001445
1446 return 1;
1447 }
1448 return 0;
1449}
Andi Kleen9ff36ee2009-05-27 21:56:58 +02001450EXPORT_SYMBOL_GPL(mce_notify_irq);
Tim Hockine02e68d2007-07-21 17:10:36 +02001451
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04001452static int __mcheck_cpu_mce_banks_init(void)
Andi Kleencebe1822009-07-09 00:31:43 +02001453{
1454 int i;
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001455 u8 num_banks = mca_cfg.banks;
Andi Kleencebe1822009-07-09 00:31:43 +02001456
Kees Cook6396bb22018-06-12 14:03:40 -07001457 mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
Andi Kleencebe1822009-07-09 00:31:43 +02001458 if (!mce_banks)
1459 return -ENOMEM;
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001460
1461 for (i = 0; i < num_banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02001462 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001463
Andi Kleencebe1822009-07-09 00:31:43 +02001464 b->ctl = -1ULL;
1465 b->init = 1;
1466 }
1467 return 0;
1468}
1469
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001470/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 * Initialize Machine Checks for a CPU.
1472 */
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04001473static int __mcheck_cpu_cap_init(void)
Andi Kleen0d7482e32009-02-17 23:07:13 +01001474{
Andi Kleen0d7482e32009-02-17 23:07:13 +01001475 unsigned b;
Ingo Molnare9eee032009-04-08 12:31:17 +02001476 u64 cap;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001477
1478 rdmsrl(MSR_IA32_MCG_CAP, cap);
Thomas Gleixner01c66802009-04-08 12:31:24 +02001479
1480 b = cap & MCG_BANKCNT_MASK;
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001481 if (!mca_cfg.banks)
Joe Perchesc767a542012-05-21 19:50:07 -07001482 pr_info("CPU supports %d MCE banks\n", b);
Ingo Molnarb6592942009-04-08 12:31:27 +02001483
Andi Kleen0d7482e32009-02-17 23:07:13 +01001484 if (b > MAX_NR_BANKS) {
Joe Perchesc767a542012-05-21 19:50:07 -07001485 pr_warn("Using only %u machine check banks out of %u\n",
Andi Kleen0d7482e32009-02-17 23:07:13 +01001486 MAX_NR_BANKS, b);
1487 b = MAX_NR_BANKS;
1488 }
1489
1490 /* Don't support asymmetric configurations today */
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001491 WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
1492 mca_cfg.banks = b;
1493
Andi Kleencebe1822009-07-09 00:31:43 +02001494 if (!mce_banks) {
Hidetoshi Setocffd3772009-11-12 15:52:40 +09001495 int err = __mcheck_cpu_mce_banks_init();
Ingo Molnar11868a22009-09-23 17:49:55 +02001496
Andi Kleencebe1822009-07-09 00:31:43 +02001497 if (err)
1498 return err;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001499 }
1500
1501 /* Use accurate RIP reporting if available. */
Thomas Gleixner01c66802009-04-08 12:31:24 +02001502 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
Borislav Petkov84c25592012-10-15 19:59:18 +02001503 mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001504
Andi Kleened7290d2009-05-27 21:56:57 +02001505 if (cap & MCG_SER_P)
Borislav Petkov09933942018-02-21 11:18:54 +01001506 mca_cfg.ser = 1;
Andi Kleened7290d2009-05-27 21:56:57 +02001507
Andi Kleen0d7482e32009-02-17 23:07:13 +01001508 return 0;
1509}
1510
Borislav Petkov5e099542009-10-16 12:31:32 +02001511static void __mcheck_cpu_init_generic(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512{
Borislav Petkov84c25592012-10-15 19:59:18 +02001513 enum mcp_flags m_fl = 0;
Ingo Molnare9eee032009-04-08 12:31:17 +02001514 mce_banks_t all_banks;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 u64 cap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516
Borislav Petkov84c25592012-10-15 19:59:18 +02001517 if (!mca_cfg.bootlog)
1518 m_fl = MCP_DONTLOG;
1519
Andi Kleenb79109c2009-02-12 13:43:23 +01001520 /*
1521 * Log the machine checks left over from the previous reset.
1522 */
Andi Kleenee031c32009-02-12 13:49:34 +01001523 bitmap_fill(all_banks, MAX_NR_BANKS);
Borislav Petkov84c25592012-10-15 19:59:18 +02001524 machine_check_poll(MCP_UC | m_fl, &all_banks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525
Andy Lutomirski375074c2014-10-24 15:58:07 -07001526 cr4_set_bits(X86_CR4_MCE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527
Andi Kleen0d7482e32009-02-17 23:07:13 +01001528 rdmsrl(MSR_IA32_MCG_CAP, cap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 if (cap & MCG_CTL_P)
1530 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
Aravind Gopalakrishnanbb91f8c2016-04-30 14:33:53 +02001531}
1532
1533static void __mcheck_cpu_init_clear_banks(void)
1534{
1535 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001537 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02001538 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001539
Andi Kleencebe1822009-07-09 00:31:43 +02001540 if (!b->init)
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001541 continue;
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +02001542 wrmsrl(msr_ops.ctl(i), b->ctl);
1543 wrmsrl(msr_ops.status(i), 0);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001544 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545}
1546
Tony Luck61b0fcc2012-07-19 11:28:46 -07001547/*
1548 * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
1549 * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
1550 * Vol 3B Table 15-20). But this confuses both the code that determines
1551 * whether the machine check occurred in kernel or user mode, and also
1552 * the severity assessment code. Pretend that EIPV was set, and take the
1553 * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
1554 */
1555static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1556{
1557 if (bank != 0)
1558 return;
1559 if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
1560 return;
1561 if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
1562 MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
1563 MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
1564 MCACOD)) !=
1565 (MCI_STATUS_UC|MCI_STATUS_EN|
1566 MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
1567 MCI_STATUS_AR|MCACOD_INSTR))
1568 return;
1569
1570 m->mcgstatus |= MCG_STATUS_EIPV;
1571 m->ip = regs->ip;
1572 m->cs = regs->cs;
1573}
1574
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575/* Add per CPU specific workarounds here */
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04001576static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001577{
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001578 struct mca_config *cfg = &mca_cfg;
1579
Ingo Molnare412cd22009-08-17 10:19:00 +02001580 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
Joe Perchesc767a542012-05-21 19:50:07 -07001581 pr_info("unknown CPU type - not enabling MCE support\n");
Ingo Molnare412cd22009-08-17 10:19:00 +02001582 return -EOPNOTSUPP;
1583 }
1584
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 /* This should be disabled by the BIOS, but isn't always */
Jan Beulich911f6a72008-04-22 16:22:21 +01001586 if (c->x86_vendor == X86_VENDOR_AMD) {
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001587 if (c->x86 == 15 && cfg->banks > 4) {
Ingo Molnare9eee032009-04-08 12:31:17 +02001588 /*
1589 * disable GART TBL walk error reporting, which
1590 * trips off incorrectly with the IOMMU & 3ware
1591 * & Cerberus:
1592 */
Andi Kleencebe1822009-07-09 00:31:43 +02001593 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
Ingo Molnare9eee032009-04-08 12:31:17 +02001594 }
Yazen Ghannam60570772017-06-13 18:28:35 +02001595 if (c->x86 < 0x11 && cfg->bootlog < 0) {
Ingo Molnare9eee032009-04-08 12:31:17 +02001596 /*
1597 * Lots of broken BIOS around that don't clear them
1598 * by default and leave crap in there. Don't log:
1599 */
Borislav Petkov84c25592012-10-15 19:59:18 +02001600 cfg->bootlog = 0;
Ingo Molnare9eee032009-04-08 12:31:17 +02001601 }
Andi Kleen2e6f6942009-04-27 18:42:48 +02001602 /*
1603 * Various K7s with broken bank 0 around. Always disable
1604 * by default.
1605 */
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001606 if (c->x86 == 6 && cfg->banks > 0)
Andi Kleencebe1822009-07-09 00:31:43 +02001607 mce_banks[0].ctl = 0;
Borislav Petkov575203b2012-04-20 18:01:34 +02001608
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001609 /*
Aravind Gopalakrishnanbf80bbd2015-03-23 10:42:52 -05001610 * overflow_recov is supported for F15h Models 00h-0fh
1611 * even though we don't have a CPUID bit for it.
1612 */
1613 if (c->x86 == 0x15 && c->x86_model <= 0xf)
1614 mce_flags.overflow_recov = 1;
1615
1616 /*
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001617 * Turn off MC4_MISC thresholding banks on those models since
1618 * they're not supported there.
1619 */
1620 if (c->x86 == 0x15 &&
1621 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1622 int i;
1623 u64 hwcr;
1624 bool need_toggle;
1625 u32 msrs[] = {
Borislav Petkov575203b2012-04-20 18:01:34 +02001626 0x00000413, /* MC4_MISC0 */
1627 0xc0000408, /* MC4_MISC1 */
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001628 };
Borislav Petkov575203b2012-04-20 18:01:34 +02001629
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001630 rdmsrl(MSR_K7_HWCR, hwcr);
Borislav Petkov575203b2012-04-20 18:01:34 +02001631
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001632 /* McStatusWrEn has to be set */
1633 need_toggle = !(hwcr & BIT(18));
Borislav Petkov575203b2012-04-20 18:01:34 +02001634
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001635 if (need_toggle)
1636 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
Borislav Petkov575203b2012-04-20 18:01:34 +02001637
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001638 /* Clear CntP bit safely */
1639 for (i = 0; i < ARRAY_SIZE(msrs); i++)
1640 msr_clear_bit(msrs[i], 62);
Borislav Petkov575203b2012-04-20 18:01:34 +02001641
Borislav Petkovc9ce8712015-03-13 23:30:47 +01001642 /* restore old settings */
1643 if (need_toggle)
1644 wrmsrl(MSR_K7_HWCR, hwcr);
1645 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 }
Andi Kleene5835382005-11-05 17:25:54 +01001647
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001648 if (c->x86_vendor == X86_VENDOR_INTEL) {
1649 /*
1650 * SDM documents that on family 6 bank 0 should not be written
1651 * because it aliases to another special BIOS controlled
1652 * register.
1653 * But it's not aliased anymore on model 0x1a+
1654 * Don't ignore bank 0 completely because there could be a
1655 * valid event later, merely don't write CTL0.
1656 */
1657
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001658 if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
Andi Kleencebe1822009-07-09 00:31:43 +02001659 mce_banks[0].init = 0;
Andi Kleen3c079792009-05-27 21:56:55 +02001660
1661 /*
1662 * All newer Intel systems support MCE broadcasting. Enable
1663 * synchronization with a one second timeout.
1664 */
1665 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
Borislav Petkov84c25592012-10-15 19:59:18 +02001666 cfg->monarch_timeout < 0)
1667 cfg->monarch_timeout = USEC_PER_SEC;
Bartlomiej Zolnierkiewiczc7f6fa42009-07-28 23:52:54 +02001668
Ingo Molnare412cd22009-08-17 10:19:00 +02001669 /*
1670 * There are also broken BIOSes on some Pentium M and
1671 * earlier systems:
1672 */
Borislav Petkov84c25592012-10-15 19:59:18 +02001673 if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
1674 cfg->bootlog = 0;
Tony Luck61b0fcc2012-07-19 11:28:46 -07001675
1676 if (c->x86 == 6 && c->x86_model == 45)
1677 quirk_no_way_out = quirk_sandybridge_ifu;
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001678 }
Borislav Petkov84c25592012-10-15 19:59:18 +02001679 if (cfg->monarch_timeout < 0)
1680 cfg->monarch_timeout = 0;
1681 if (cfg->bootlog != 0)
Borislav Petkov7af19e42012-10-15 20:25:17 +02001682 cfg->panic_timeout = 30;
Ingo Molnare412cd22009-08-17 10:19:00 +02001683
1684 return 0;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001685}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04001687static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
Andi Kleen4efc0672009-04-28 19:07:31 +02001688{
1689 if (c->x86 != 5)
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001690 return 0;
1691
Andi Kleen4efc0672009-04-28 19:07:31 +02001692 switch (c->x86_vendor) {
1693 case X86_VENDOR_INTEL:
Hidetoshi Setoc6978362009-06-15 17:22:49 +09001694 intel_p5_mcheck_init(c);
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001695 return 1;
Andi Kleen4efc0672009-04-28 19:07:31 +02001696 break;
1697 case X86_VENDOR_CENTAUR:
1698 winchip_mcheck_init(c);
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001699 return 1;
Andi Kleen4efc0672009-04-28 19:07:31 +02001700 break;
Borislav Petkovdc34bdd2015-10-30 13:11:38 +01001701 default:
1702 return 0;
Andi Kleen4efc0672009-04-28 19:07:31 +02001703 }
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001704
1705 return 0;
Andi Kleen4efc0672009-04-28 19:07:31 +02001706}
1707
Yazen Ghannam5204bf12017-03-15 12:30:55 -05001708/*
1709 * Init basic CPU features needed for early decoding of MCEs.
1710 */
1711static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
1712{
Pu Wenac78bd72018-09-23 17:36:04 +08001713 if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
Yazen Ghannam5204bf12017-03-15 12:30:55 -05001714 mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
1715 mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
1716 mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
1717
1718 if (mce_flags.smca) {
1719 msr_ops.ctl = smca_ctl_reg;
1720 msr_ops.status = smca_status_reg;
1721 msr_ops.addr = smca_addr_reg;
1722 msr_ops.misc = smca_misc_reg;
1723 }
1724 }
1725}
1726
David Wang13e85822018-04-25 18:33:39 +08001727static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
1728{
1729 struct mca_config *cfg = &mca_cfg;
1730
1731 /*
1732 * All newer Centaur CPUs support MCE broadcasting. Enable
1733 * synchronization with a one second timeout.
1734 */
1735 if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
1736 c->x86 > 6) {
1737 if (cfg->monarch_timeout < 0)
1738 cfg->monarch_timeout = USEC_PER_SEC;
1739 }
1740}
1741
Borislav Petkov5e099542009-10-16 12:31:32 +02001742static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743{
1744 switch (c->x86_vendor) {
1745 case X86_VENDOR_INTEL:
1746 mce_intel_feature_init(c);
Borislav Petkov3f2f0682015-01-13 15:08:51 +01001747 mce_adjust_timer = cmci_intel_adjust_timer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 break;
Aravind Gopalakrishnan7559e132015-05-06 06:58:55 -05001749
1750 case X86_VENDOR_AMD: {
Aravind Gopalakrishnanbfbe0ee2016-01-25 20:41:48 +01001751 mce_amd_feature_init(c);
Jacob Shin89b831e2005-11-05 17:25:53 +01001752 break;
Aravind Gopalakrishnan7559e132015-05-06 06:58:55 -05001753 }
Pu Wenac78bd72018-09-23 17:36:04 +08001754
1755 case X86_VENDOR_HYGON:
1756 mce_hygon_feature_init(c);
1757 break;
1758
David Wang13e85822018-04-25 18:33:39 +08001759 case X86_VENDOR_CENTAUR:
1760 mce_centaur_feature_init(c);
1761 break;
Aravind Gopalakrishnan7559e132015-05-06 06:58:55 -05001762
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 default:
1764 break;
1765 }
1766}
1767
Ashok Raj8838eb62015-08-12 18:29:40 +02001768static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
1769{
1770 switch (c->x86_vendor) {
1771 case X86_VENDOR_INTEL:
1772 mce_intel_feature_clear(c);
1773 break;
1774 default:
1775 break;
1776 }
1777}
1778
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001779static void mce_start_timer(struct timer_list *t)
Thomas Gleixner26c3c282012-07-19 13:59:39 -04001780{
Borislav Petkov4f75d842013-12-23 18:05:02 +01001781 unsigned long iv = check_interval * HZ;
Thomas Gleixner26c3c282012-07-19 13:59:39 -04001782
Borislav Petkov7af19e42012-10-15 20:25:17 +02001783 if (mca_cfg.ignore_ce || !iv)
Thomas Gleixner26c3c282012-07-19 13:59:39 -04001784 return;
1785
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001786 this_cpu_write(mce_next_interval, iv);
1787 __start_timer(t, iv);
Thomas Gleixner26c3c282012-07-19 13:59:39 -04001788}
1789
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01001790static void __mcheck_cpu_setup_timer(void)
1791{
1792 struct timer_list *t = this_cpu_ptr(&mce_timer);
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01001793
Kees Cook92bb6cb2017-10-04 17:54:25 -07001794 timer_setup(t, mce_timer_fn, TIMER_PINNED);
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01001795}
1796
Borislav Petkov5e099542009-10-16 12:31:32 +02001797static void __mcheck_cpu_init_timer(void)
Andi Kleen52d168e2009-02-12 13:39:29 +01001798{
Christoph Lameter89cbc762014-08-17 12:30:40 -05001799 struct timer_list *t = this_cpu_ptr(&mce_timer);
Andi Kleen52d168e2009-02-12 13:39:29 +01001800
Kees Cook92bb6cb2017-10-04 17:54:25 -07001801 timer_setup(t, mce_timer_fn, TIMER_PINNED);
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01001802 mce_start_timer(t);
Andi Kleen52d168e2009-02-12 13:39:29 +01001803}
1804
Andi Kleen9eda8cb2009-07-09 00:31:42 +02001805/* Handle unconfigured int18 (should never happen) */
1806static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1807{
Joe Perchesc767a542012-05-21 19:50:07 -07001808 pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
Andi Kleen9eda8cb2009-07-09 00:31:42 +02001809 smp_processor_id());
1810}
1811
1812/* Call the installed machine check handler for this CPU setup. */
1813void (*machine_check_vector)(struct pt_regs *, long error_code) =
1814 unexpected_machine_check;
1815
Thomas Gleixner6f41c342018-01-18 16:28:26 +01001816dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
1817{
1818 machine_check_vector(regs, error_code);
1819}
1820
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001821/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 * Called for each booted CPU to set up machine checks.
Ingo Molnare9eee032009-04-08 12:31:17 +02001823 * Must be called with preempt off:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 */
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04001825void mcheck_cpu_init(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826{
Borislav Petkov14625942012-10-17 12:05:33 +02001827 if (mca_cfg.disabled)
Andi Kleen4efc0672009-04-28 19:07:31 +02001828 return;
1829
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001830 if (__mcheck_cpu_ancient_init(c))
1831 return;
Andi Kleen4efc0672009-04-28 19:07:31 +02001832
Andi Kleen5b4408f2009-02-12 13:39:30 +01001833 if (!mce_available(c))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 return;
1835
Borislav Petkov5e099542009-10-16 12:31:32 +02001836 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
Borislav Petkov09933942018-02-21 11:18:54 +01001837 mca_cfg.disabled = 1;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001838 return;
1839 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01001840
Chen, Gong648ed942015-08-12 18:29:34 +02001841 if (mce_gen_pool_init()) {
Borislav Petkov09933942018-02-21 11:18:54 +01001842 mca_cfg.disabled = 1;
Chen, Gong648ed942015-08-12 18:29:34 +02001843 pr_emerg("Couldn't allocate MCE records pool!\n");
1844 return;
1845 }
1846
Andi Kleen5d727922009-04-27 19:25:48 +02001847 machine_check_vector = do_machine_check;
1848
Yazen Ghannam5204bf12017-03-15 12:30:55 -05001849 __mcheck_cpu_init_early(c);
Borislav Petkov5e099542009-10-16 12:31:32 +02001850 __mcheck_cpu_init_generic();
1851 __mcheck_cpu_init_vendor(c);
Aravind Gopalakrishnanbb91f8c2016-04-30 14:33:53 +02001852 __mcheck_cpu_init_clear_banks();
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01001853 __mcheck_cpu_setup_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854}
1855
1856/*
Ashok Raj8838eb62015-08-12 18:29:40 +02001857 * Called for each booted CPU to clear some machine checks opt-ins
1858 */
1859void mcheck_cpu_clear(struct cpuinfo_x86 *c)
1860{
1861 if (mca_cfg.disabled)
1862 return;
1863
1864 if (!mce_available(c))
1865 return;
1866
1867 /*
1868 * Possibly to clear general settings generic to x86
1869 * __mcheck_cpu_clear_generic(c);
1870 */
1871 __mcheck_cpu_clear_vendor(c);
1872
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873}
1874
Naveen N. Raoc3d1fb52013-07-01 21:08:47 +05301875static void __mce_disable_bank(void *arg)
1876{
1877 int bank = *((int *)arg);
Christoph Lameter89cbc762014-08-17 12:30:40 -05001878 __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
Naveen N. Raoc3d1fb52013-07-01 21:08:47 +05301879 cmci_disable_bank(bank);
1880}
1881
1882void mce_disable_bank(int bank)
1883{
1884 if (bank >= mca_cfg.banks) {
1885 pr_warn(FW_BUG
1886 "Ignoring request to disable invalid MCA bank %d.\n",
1887 bank);
1888 return;
1889 }
1890 set_bit(bank, mce_banks_ce_disabled);
1891 on_each_cpu(__mce_disable_bank, &bank, 1);
1892}
1893
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001894/*
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001895 * mce=off Disables machine check
1896 * mce=no_cmci Disables CMCI
Ashok Raj88d53862015-06-04 18:55:23 +02001897 * mce=no_lmce Disables LMCE
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001898 * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
1899 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
Andi Kleen3c079792009-05-27 21:56:55 +02001900 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
1901 * monarchtimeout is how long to wait for other CPUs on machine
1902 * check, or 0 to not wait
Yazen Ghannam60570772017-06-13 18:28:35 +02001903 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD Fam10h
1904 and older.
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001905 * mce=nobootlog Don't log MCEs from before booting.
Naveen N. Rao450cc202012-09-27 10:08:00 -07001906 * mce=bios_cmci_threshold Don't program the CMCI threshold
Tony Luck3637efb2016-09-01 11:39:33 -07001907 * mce=recovery force enable memcpy_mcsafe()
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001908 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909static int __init mcheck_enable(char *str)
1910{
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001911 struct mca_config *cfg = &mca_cfg;
1912
Bartlomiej Zolnierkiewicze3346fc2009-07-28 23:55:09 +02001913 if (*str == 0) {
Andi Kleen4efc0672009-04-28 19:07:31 +02001914 enable_p5_mce();
Bartlomiej Zolnierkiewicze3346fc2009-07-28 23:55:09 +02001915 return 1;
1916 }
Andi Kleen4efc0672009-04-28 19:07:31 +02001917 if (*str == '=')
1918 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 if (!strcmp(str, "off"))
Borislav Petkov09933942018-02-21 11:18:54 +01001920 cfg->disabled = 1;
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001921 else if (!strcmp(str, "no_cmci"))
Borislav Petkov7af19e42012-10-15 20:25:17 +02001922 cfg->cmci_disabled = true;
Ashok Raj88d53862015-06-04 18:55:23 +02001923 else if (!strcmp(str, "no_lmce"))
Borislav Petkov09933942018-02-21 11:18:54 +01001924 cfg->lmce_disabled = 1;
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001925 else if (!strcmp(str, "dont_log_ce"))
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001926 cfg->dont_log_ce = true;
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001927 else if (!strcmp(str, "ignore_ce"))
Borislav Petkov7af19e42012-10-15 20:25:17 +02001928 cfg->ignore_ce = true;
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001929 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
Borislav Petkov84c25592012-10-15 19:59:18 +02001930 cfg->bootlog = (str[0] == 'b');
Naveen N. Rao450cc202012-09-27 10:08:00 -07001931 else if (!strcmp(str, "bios_cmci_threshold"))
Borislav Petkov09933942018-02-21 11:18:54 +01001932 cfg->bios_cmci_threshold = 1;
Tony Luck0f68c082016-02-17 10:20:13 -08001933 else if (!strcmp(str, "recovery"))
Borislav Petkov09933942018-02-21 11:18:54 +01001934 cfg->recovery = 1;
Andi Kleen3c079792009-05-27 21:56:55 +02001935 else if (isdigit(str[0])) {
Xie XiuQi5c31b282015-05-26 10:28:21 +02001936 if (get_option(&str, &cfg->tolerant) == 2)
Borislav Petkov84c25592012-10-15 19:59:18 +02001937 get_option(&str, &(cfg->monarch_timeout));
Andi Kleen3c079792009-05-27 21:56:55 +02001938 } else {
Joe Perchesc767a542012-05-21 19:50:07 -07001939 pr_info("mce argument %s ignored. Please use /sys\n", str);
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001940 return 0;
1941 }
OGAWA Hirofumi9b410462006-03-31 02:30:33 -08001942 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943}
Andi Kleen4efc0672009-04-28 19:07:31 +02001944__setup("mce", mcheck_enable);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
Yong Wanga2202aa2009-11-10 09:38:24 +08001946int __init mcheck_init(void)
Borislav Petkovb33a6362009-10-16 12:31:33 +02001947{
Yong Wanga2202aa2009-11-10 09:38:24 +08001948 mcheck_intel_therm_init();
Borislav Petkov011d8262017-03-27 11:33:02 +02001949 mce_register_decode_chain(&first_nb);
Borislav Petkoveef4dfa2015-08-12 18:29:38 +02001950 mce_register_decode_chain(&mce_srao_nb);
Borislav Petkovcd9c57c2016-11-01 12:52:27 +01001951 mce_register_decode_chain(&mce_default_nb);
Aravind Gopalakrishnan43eaa2a2015-03-23 10:42:53 -05001952 mcheck_vendor_init_severity();
Yong Wanga2202aa2009-11-10 09:38:24 +08001953
Borislav Petkovcff4c032017-01-23 19:35:13 +01001954 INIT_WORK(&mce_work, mce_gen_pool_process);
Chen, Gong061120a2015-08-12 18:29:35 +02001955 init_irq_work(&mce_irq_work, mce_irq_work_cb);
1956
Borislav Petkovb33a6362009-10-16 12:31:33 +02001957 return 0;
1958}
Borislav Petkovb33a6362009-10-16 12:31:33 +02001959
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001960/*
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001961 * mce_syscore: PM support
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001962 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
Andi Kleen973a2dd2009-02-12 13:39:32 +01001964/*
1965 * Disable machine checks on suspend and shutdown. We can't really handle
1966 * them later.
1967 */
Ashok Raj6e067802015-09-28 09:21:43 +02001968static void mce_disable_error_reporting(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01001969{
1970 int i;
1971
Borislav Petkovd203f0b2012-10-15 18:03:57 +02001972 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02001973 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001974
Andi Kleencebe1822009-07-09 00:31:43 +02001975 if (b->init)
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +02001976 wrmsrl(msr_ops.ctl(i), 0);
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001977 }
Ashok Raj6e067802015-09-28 09:21:43 +02001978 return;
1979}
1980
1981static void vendor_disable_error_reporting(void)
1982{
1983 /*
Pu Wenac78bd72018-09-23 17:36:04 +08001984 * Don't clear on Intel or AMD or Hygon CPUs. Some of these MSRs
1985 * are socket-wide.
Ashok Raj6e067802015-09-28 09:21:43 +02001986 * Disabling them for just a single offlined CPU is bad, since it will
1987 * inhibit reporting for all shared resources on the socket like the
1988 * last level cache (LLC), the integrated memory controller (iMC), etc.
1989 */
Yazen Ghannamec338382017-06-13 18:28:34 +02001990 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
Pu Wenac78bd72018-09-23 17:36:04 +08001991 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
Yazen Ghannamec338382017-06-13 18:28:34 +02001992 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
Ashok Raj6e067802015-09-28 09:21:43 +02001993 return;
1994
1995 mce_disable_error_reporting();
Andi Kleen973a2dd2009-02-12 13:39:32 +01001996}
1997
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001998static int mce_syscore_suspend(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01001999{
Ashok Raj6e067802015-09-28 09:21:43 +02002000 vendor_disable_error_reporting();
2001 return 0;
Andi Kleen973a2dd2009-02-12 13:39:32 +01002002}
2003
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002004static void mce_syscore_shutdown(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01002005{
Ashok Raj6e067802015-09-28 09:21:43 +02002006 vendor_disable_error_reporting();
Andi Kleen973a2dd2009-02-12 13:39:32 +01002007}
2008
Ingo Molnare9eee032009-04-08 12:31:17 +02002009/*
2010 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
2011 * Only one CPU is active at this time, the others get re-added later using
2012 * CPU hotplug:
2013 */
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002014static void mce_syscore_resume(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015{
Borislav Petkov5e099542009-10-16 12:31:32 +02002016 __mcheck_cpu_init_generic();
Christoph Lameter89cbc762014-08-17 12:30:40 -05002017 __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
Aravind Gopalakrishnanbb91f8c2016-04-30 14:33:53 +02002018 __mcheck_cpu_init_clear_banks();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019}
2020
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01002021static struct syscore_ops mce_syscore_ops = {
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002022 .suspend = mce_syscore_suspend,
2023 .shutdown = mce_syscore_shutdown,
2024 .resume = mce_syscore_resume,
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01002025};
2026
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002027/*
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002028 * mce_device: Sysfs support
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002029 */
2030
Andi Kleen52d168e2009-02-12 13:39:29 +01002031static void mce_cpu_restart(void *data)
2032{
Christoph Lameter89cbc762014-08-17 12:30:40 -05002033 if (!mce_available(raw_cpu_ptr(&cpu_info)))
Hidetoshi Seto33edbf02009-06-15 17:18:45 +09002034 return;
Borislav Petkov5e099542009-10-16 12:31:32 +02002035 __mcheck_cpu_init_generic();
Aravind Gopalakrishnanbb91f8c2016-04-30 14:33:53 +02002036 __mcheck_cpu_init_clear_banks();
Borislav Petkov5e099542009-10-16 12:31:32 +02002037 __mcheck_cpu_init_timer();
Andi Kleen52d168e2009-02-12 13:39:29 +01002038}
2039
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040/* Reinit MCEs after user configuration changes */
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02002041static void mce_restart(void)
2042{
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04002043 mce_timer_delete_all();
Andi Kleen52d168e2009-02-12 13:39:29 +01002044 on_each_cpu(mce_cpu_restart, NULL, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045}
2046
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002047/* Toggle features for corrected errors */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04002048static void mce_disable_cmci(void *data)
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002049{
Christoph Lameter89cbc762014-08-17 12:30:40 -05002050 if (!mce_available(raw_cpu_ptr(&cpu_info)))
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002051 return;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002052 cmci_clear();
2053}
2054
2055static void mce_enable_ce(void *all)
2056{
Christoph Lameter89cbc762014-08-17 12:30:40 -05002057 if (!mce_available(raw_cpu_ptr(&cpu_info)))
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002058 return;
2059 cmci_reenable();
2060 cmci_recheck();
2061 if (all)
Borislav Petkov5e099542009-10-16 12:31:32 +02002062 __mcheck_cpu_init_timer();
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002063}
2064
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002065static struct bus_type mce_subsys = {
Ingo Molnare9eee032009-04-08 12:31:17 +02002066 .name = "machinecheck",
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002067 .dev_name = "machinecheck",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068};
2069
Greg Kroah-Hartmand6126ef2012-01-26 15:49:14 -08002070DEFINE_PER_CPU(struct device *, mce_device);
Ingo Molnare9eee032009-04-08 12:31:17 +02002071
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002072static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
Andi Kleencebe1822009-07-09 00:31:43 +02002073{
2074 return container_of(attr, struct mce_bank, attr);
2075}
Andi Kleen0d7482e32009-02-17 23:07:13 +01002076
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002077static ssize_t show_bank(struct device *s, struct device_attribute *attr,
Andi Kleen0d7482e32009-02-17 23:07:13 +01002078 char *buf)
2079{
Andi Kleencebe1822009-07-09 00:31:43 +02002080 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
Andi Kleen0d7482e32009-02-17 23:07:13 +01002081}
2082
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002083static ssize_t set_bank(struct device *s, struct device_attribute *attr,
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09002084 const char *buf, size_t size)
Andi Kleen0d7482e32009-02-17 23:07:13 +01002085{
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09002086 u64 new;
Ingo Molnare9eee032009-04-08 12:31:17 +02002087
Daniel Walter164109e2014-08-08 14:24:03 -07002088 if (kstrtou64(buf, 0, &new) < 0)
Andi Kleen0d7482e32009-02-17 23:07:13 +01002089 return -EINVAL;
Ingo Molnare9eee032009-04-08 12:31:17 +02002090
Andi Kleencebe1822009-07-09 00:31:43 +02002091 attr_to_bank(attr)->ctl = new;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002092 mce_restart();
Ingo Molnare9eee032009-04-08 12:31:17 +02002093
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09002094 return size;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002095}
Andi Kleena98f0dd2007-02-13 13:26:23 +01002096
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002097static ssize_t set_ignore_ce(struct device *s,
2098 struct device_attribute *attr,
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002099 const char *buf, size_t size)
2100{
2101 u64 new;
2102
Daniel Walter164109e2014-08-08 14:24:03 -07002103 if (kstrtou64(buf, 0, &new) < 0)
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002104 return -EINVAL;
2105
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002106 mutex_lock(&mce_sysfs_mutex);
Borislav Petkov7af19e42012-10-15 20:25:17 +02002107 if (mca_cfg.ignore_ce ^ !!new) {
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002108 if (new) {
2109 /* disable ce features */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04002110 mce_timer_delete_all();
2111 on_each_cpu(mce_disable_cmci, NULL, 1);
Borislav Petkov7af19e42012-10-15 20:25:17 +02002112 mca_cfg.ignore_ce = true;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002113 } else {
2114 /* enable ce features */
Borislav Petkov7af19e42012-10-15 20:25:17 +02002115 mca_cfg.ignore_ce = false;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002116 on_each_cpu(mce_enable_ce, (void *)1, 1);
2117 }
2118 }
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002119 mutex_unlock(&mce_sysfs_mutex);
2120
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002121 return size;
2122}
2123
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002124static ssize_t set_cmci_disabled(struct device *s,
2125 struct device_attribute *attr,
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002126 const char *buf, size_t size)
2127{
2128 u64 new;
2129
Daniel Walter164109e2014-08-08 14:24:03 -07002130 if (kstrtou64(buf, 0, &new) < 0)
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002131 return -EINVAL;
2132
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002133 mutex_lock(&mce_sysfs_mutex);
Borislav Petkov7af19e42012-10-15 20:25:17 +02002134 if (mca_cfg.cmci_disabled ^ !!new) {
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002135 if (new) {
2136 /* disable cmci */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04002137 on_each_cpu(mce_disable_cmci, NULL, 1);
Borislav Petkov7af19e42012-10-15 20:25:17 +02002138 mca_cfg.cmci_disabled = true;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002139 } else {
2140 /* enable cmci */
Borislav Petkov7af19e42012-10-15 20:25:17 +02002141 mca_cfg.cmci_disabled = false;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002142 on_each_cpu(mce_enable_ce, NULL, 1);
2143 }
2144 }
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002145 mutex_unlock(&mce_sysfs_mutex);
2146
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002147 return size;
2148}
2149
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002150static ssize_t store_int_with_restart(struct device *s,
2151 struct device_attribute *attr,
Andi Kleenb56f6422009-05-27 21:56:52 +02002152 const char *buf, size_t size)
2153{
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002154 unsigned long old_check_interval = check_interval;
2155 ssize_t ret = device_store_ulong(s, attr, buf, size);
2156
2157 if (check_interval == old_check_interval)
2158 return ret;
2159
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002160 mutex_lock(&mce_sysfs_mutex);
Andi Kleenb56f6422009-05-27 21:56:52 +02002161 mce_restart();
Seunghun Hanb3b7c472018-03-06 15:21:43 +01002162 mutex_unlock(&mce_sysfs_mutex);
2163
Andi Kleenb56f6422009-05-27 21:56:52 +02002164 return ret;
2165}
2166
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002167static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
Borislav Petkov84c25592012-10-15 19:59:18 +02002168static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002169static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
Ingo Molnare9eee032009-04-08 12:31:17 +02002170
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002171static struct dev_ext_attribute dev_attr_check_interval = {
2172 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
Andi Kleenb56f6422009-05-27 21:56:52 +02002173 &check_interval
2174};
Ingo Molnare9eee032009-04-08 12:31:17 +02002175
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002176static struct dev_ext_attribute dev_attr_ignore_ce = {
Borislav Petkov7af19e42012-10-15 20:25:17 +02002177 __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
2178 &mca_cfg.ignore_ce
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002179};
2180
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002181static struct dev_ext_attribute dev_attr_cmci_disabled = {
Borislav Petkov7af19e42012-10-15 20:25:17 +02002182 __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
2183 &mca_cfg.cmci_disabled
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09002184};
2185
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002186static struct device_attribute *mce_device_attrs[] = {
2187 &dev_attr_tolerant.attr,
2188 &dev_attr_check_interval.attr,
Tony Luck5de97c92017-03-27 11:33:03 +02002189#ifdef CONFIG_X86_MCELOG_LEGACY
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002190 &dev_attr_trigger,
Tony Luck5de97c92017-03-27 11:33:03 +02002191#endif
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002192 &dev_attr_monarch_timeout.attr,
2193 &dev_attr_dont_log_ce.attr,
2194 &dev_attr_ignore_ce.attr,
2195 &dev_attr_cmci_disabled.attr,
Andi Kleena98f0dd2007-02-13 13:26:23 +01002196 NULL
2197};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002199static cpumask_var_t mce_device_initialized;
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08002200
Greg Kroah-Hartmane032d8072012-01-16 14:40:28 -08002201static void mce_device_release(struct device *dev)
2202{
2203 kfree(dev);
2204}
2205
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002206/* Per cpu device init. All of the cpus still share the same ctrl bank: */
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04002207static int mce_device_create(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208{
Greg Kroah-Hartmane032d8072012-01-16 14:40:28 -08002209 struct device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 int err;
Hidetoshi Setob1f49f92009-06-18 14:53:24 +09002211 int i, j;
Mike Travis92cb7612007-10-19 20:35:04 +02002212
Andreas Herrmann90367552007-11-07 02:12:58 +01002213 if (!mce_available(&boot_cpu_data))
Andi Kleen91c6d402005-07-28 21:15:39 -07002214 return -EIO;
2215
Sebastian Andrzej Siewior7f34b932016-11-10 18:44:43 +01002216 dev = per_cpu(mce_device, cpu);
2217 if (dev)
2218 return 0;
2219
Jordan Borgner0e96f312018-10-28 12:58:28 +00002220 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
Greg Kroah-Hartmane032d8072012-01-16 14:40:28 -08002221 if (!dev)
2222 return -ENOMEM;
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002223 dev->id = cpu;
2224 dev->bus = &mce_subsys;
Greg Kroah-Hartmane032d8072012-01-16 14:40:28 -08002225 dev->release = &mce_device_release;
Andi Kleen91c6d402005-07-28 21:15:39 -07002226
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002227 err = device_register(dev);
Levente Kurusa853d9b12013-11-29 21:28:48 +01002228 if (err) {
2229 put_device(dev);
Akinobu Mitad435d862007-10-18 03:05:15 -07002230 return err;
Levente Kurusa853d9b12013-11-29 21:28:48 +01002231 }
Andi Kleen91c6d402005-07-28 21:15:39 -07002232
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002233 for (i = 0; mce_device_attrs[i]; i++) {
2234 err = device_create_file(dev, mce_device_attrs[i]);
Akinobu Mitad435d862007-10-18 03:05:15 -07002235 if (err)
2236 goto error;
Andi Kleen91c6d402005-07-28 21:15:39 -07002237 }
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002238 for (j = 0; j < mca_cfg.banks; j++) {
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002239 err = device_create_file(dev, &mce_banks[j].attr);
Andi Kleen0d7482e32009-02-17 23:07:13 +01002240 if (err)
2241 goto error2;
2242 }
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002243 cpumask_set_cpu(cpu, mce_device_initialized);
Greg Kroah-Hartmand6126ef2012-01-26 15:49:14 -08002244 per_cpu(mce_device, cpu) = dev;
Akinobu Mitad435d862007-10-18 03:05:15 -07002245
2246 return 0;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002247error2:
Hidetoshi Setob1f49f92009-06-18 14:53:24 +09002248 while (--j >= 0)
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002249 device_remove_file(dev, &mce_banks[j].attr);
Akinobu Mitad435d862007-10-18 03:05:15 -07002250error:
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002251 while (--i >= 0)
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002252 device_remove_file(dev, mce_device_attrs[i]);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002253
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002254 device_unregister(dev);
Akinobu Mitad435d862007-10-18 03:05:15 -07002255
Andi Kleen91c6d402005-07-28 21:15:39 -07002256 return err;
2257}
2258
Paul Gortmaker148f9bb2013-06-18 18:23:59 -04002259static void mce_device_remove(unsigned int cpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07002260{
Greg Kroah-Hartmand6126ef2012-01-26 15:49:14 -08002261 struct device *dev = per_cpu(mce_device, cpu);
Shaohua Li73ca5352006-01-11 22:43:06 +01002262 int i;
2263
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002264 if (!cpumask_test_cpu(cpu, mce_device_initialized))
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08002265 return;
2266
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002267 for (i = 0; mce_device_attrs[i]; i++)
2268 device_remove_file(dev, mce_device_attrs[i]);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002269
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002270 for (i = 0; i < mca_cfg.banks; i++)
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002271 device_remove_file(dev, &mce_banks[i].attr);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002272
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002273 device_unregister(dev);
2274 cpumask_clear_cpu(cpu, mce_device_initialized);
Greg Kroah-Hartmand6126ef2012-01-26 15:49:14 -08002275 per_cpu(mce_device, cpu) = NULL;
Andi Kleen91c6d402005-07-28 21:15:39 -07002276}
Andi Kleen91c6d402005-07-28 21:15:39 -07002277
Andi Kleend6b75582009-02-12 13:39:31 +01002278/* Make sure there are no machine checks on offlined CPUs. */
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01002279static void mce_disable_cpu(void)
Andi Kleend6b75582009-02-12 13:39:31 +01002280{
Christoph Lameter89cbc762014-08-17 12:30:40 -05002281 if (!mce_available(raw_cpu_ptr(&cpu_info)))
Andi Kleend6b75582009-02-12 13:39:31 +01002282 return;
Hidetoshi Seto767df1b2009-11-26 17:29:02 +09002283
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01002284 if (!cpuhp_tasks_frozen)
Andi Kleen88ccbed2009-02-12 13:49:36 +01002285 cmci_clear();
Ingo Molnar11868a22009-09-23 17:49:55 +02002286
Ashok Raj6e067802015-09-28 09:21:43 +02002287 vendor_disable_error_reporting();
Andi Kleend6b75582009-02-12 13:39:31 +01002288}
2289
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01002290static void mce_reenable_cpu(void)
Andi Kleend6b75582009-02-12 13:39:31 +01002291{
Ingo Molnare9eee032009-04-08 12:31:17 +02002292 int i;
Andi Kleend6b75582009-02-12 13:39:31 +01002293
Christoph Lameter89cbc762014-08-17 12:30:40 -05002294 if (!mce_available(raw_cpu_ptr(&cpu_info)))
Andi Kleend6b75582009-02-12 13:39:31 +01002295 return;
Ingo Molnare9eee032009-04-08 12:31:17 +02002296
Sebastian Andrzej Siewior39f152f2016-11-10 18:44:45 +01002297 if (!cpuhp_tasks_frozen)
Andi Kleen88ccbed2009-02-12 13:49:36 +01002298 cmci_reenable();
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002299 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02002300 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02002301
Andi Kleencebe1822009-07-09 00:31:43 +02002302 if (b->init)
Yazen Ghannamd9d73fc2016-04-30 14:33:55 +02002303 wrmsrl(msr_ops.ctl(i), b->ctl);
Andi Kleen06b7a7a2009-04-27 18:37:43 +02002304 }
Andi Kleend6b75582009-02-12 13:39:31 +01002305}
2306
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002307static int mce_cpu_dead(unsigned int cpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07002308{
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002309 mce_intel_hcpu_update(cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07002310
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002311 /* intentionally ignoring frozen here */
2312 if (!cpuhp_tasks_frozen)
2313 cmci_rediscover();
2314 return 0;
Andi Kleen91c6d402005-07-28 21:15:39 -07002315}
2316
Sebastian Andrzej Siewior8c0eeac2016-11-10 18:44:46 +01002317static int mce_cpu_online(unsigned int cpu)
2318{
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01002319 struct timer_list *t = this_cpu_ptr(&mce_timer);
Sebastian Andrzej Siewior8c0eeac2016-11-10 18:44:46 +01002320 int ret;
2321
2322 mce_device_create(cpu);
2323
2324 ret = mce_threshold_create_device(cpu);
2325 if (ret) {
2326 mce_device_remove(cpu);
2327 return ret;
2328 }
2329 mce_reenable_cpu();
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01002330 mce_start_timer(t);
Sebastian Andrzej Siewior8c0eeac2016-11-10 18:44:46 +01002331 return 0;
2332}
2333
2334static int mce_cpu_pre_down(unsigned int cpu)
2335{
Thomas Gleixner0becc0a2017-01-31 09:37:34 +01002336 struct timer_list *t = this_cpu_ptr(&mce_timer);
Sebastian Andrzej Siewior8c0eeac2016-11-10 18:44:46 +01002337
2338 mce_disable_cpu();
2339 del_timer_sync(t);
2340 mce_threshold_remove_device(cpu);
2341 mce_device_remove(cpu);
2342 return 0;
2343}
Andi Kleen91c6d402005-07-28 21:15:39 -07002344
Andi Kleencebe1822009-07-09 00:31:43 +02002345static __init void mce_init_banks(void)
Andi Kleen0d7482e32009-02-17 23:07:13 +01002346{
2347 int i;
2348
Borislav Petkovd203f0b2012-10-15 18:03:57 +02002349 for (i = 0; i < mca_cfg.banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02002350 struct mce_bank *b = &mce_banks[i];
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002351 struct device_attribute *a = &b->attr;
Ingo Molnare9eee032009-04-08 12:31:17 +02002352
Eric W. Biedermana07e4152010-02-11 15:23:05 -08002353 sysfs_attr_init(&a->attr);
Andi Kleencebe1822009-07-09 00:31:43 +02002354 a->attr.name = b->attrname;
2355 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
Ingo Molnare9eee032009-04-08 12:31:17 +02002356
2357 a->attr.mode = 0644;
2358 a->show = show_bank;
2359 a->store = set_bank;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002360 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01002361}
2362
Borislav Petkov5e099542009-10-16 12:31:32 +02002363static __init int mcheck_init_device(void)
Andi Kleen91c6d402005-07-28 21:15:39 -07002364{
2365 int err;
Andi Kleen91c6d402005-07-28 21:15:39 -07002366
Kirill A. Shutemovc65e7742018-02-14 14:16:53 +03002367 /*
2368 * Check if we have a spare virtual bit. This will only become
2369 * a problem if/when we move beyond 5-level page tables.
2370 */
2371 MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
2372
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002373 if (!mce_available(&boot_cpu_data)) {
2374 err = -EIO;
2375 goto err_out;
2376 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01002377
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002378 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2379 err = -ENOMEM;
2380 goto err_out;
2381 }
Rusty Russell996867d2009-03-13 14:49:51 +10302382
Andi Kleencebe1822009-07-09 00:31:43 +02002383 mce_init_banks();
Andi Kleen0d7482e32009-02-17 23:07:13 +01002384
Kay Sievers8a25a2f2011-12-21 14:29:42 -08002385 err = subsys_system_register(&mce_subsys, NULL);
Akinobu Mitad435d862007-10-18 03:05:15 -07002386 if (err)
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002387 goto err_out_mem;
Andi Kleen91c6d402005-07-28 21:15:39 -07002388
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002389 err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL,
2390 mce_cpu_dead);
2391 if (err)
2392 goto err_out_mem;
Andi Kleen91c6d402005-07-28 21:15:39 -07002393
Sebastian Andrzej Siewior8c0eeac2016-11-10 18:44:46 +01002394 err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
2395 mce_cpu_online, mce_cpu_pre_down);
2396 if (err < 0)
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002397 goto err_out_online;
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09002398
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002399 register_syscore_ops(&mce_syscore_ops);
2400
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002401 return 0;
2402
Sebastian Andrzej Siewior0e285d32016-11-10 18:44:47 +01002403err_out_online:
2404 cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
Mathieu Souchaud9c15a242014-05-28 09:12:37 +02002405
2406err_out_mem:
2407 free_cpumask_var(mce_device_initialized);
2408
2409err_out:
Tony Luck5de97c92017-03-27 11:33:03 +02002410 pr_err("Unable to init MCE device (rc: %d)\n", err);
Ingo Molnare9eee032009-04-08 12:31:17 +02002411
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413}
Liu, Jinsongcef12ee2012-06-07 19:56:51 +08002414device_initcall_sync(mcheck_init_device);
Ingo Molnara988d332009-04-08 12:31:25 +02002415
Andi Kleend7c3c9a2009-04-28 23:07:25 +02002416/*
2417 * Old style boot options parsing. Only for compatibility.
2418 */
2419static int __init mcheck_disable(char *str)
2420{
Borislav Petkov09933942018-02-21 11:18:54 +01002421 mca_cfg.disabled = 1;
Andi Kleend7c3c9a2009-04-28 23:07:25 +02002422 return 1;
2423}
2424__setup("nomce", mcheck_disable);
Huang Ying5be9ed22009-07-31 09:41:42 +08002425
2426#ifdef CONFIG_DEBUG_FS
2427struct dentry *mce_get_debugfs_dir(void)
2428{
2429 static struct dentry *dmce;
2430
2431 if (!dmce)
2432 dmce = debugfs_create_dir("mce", NULL);
2433
2434 return dmce;
2435}
Huang Yingbf783f92009-07-31 09:41:43 +08002436
2437static void mce_reset(void)
2438{
2439 cpu_missing = 0;
Borislav Petkovc7c9b392014-12-03 22:36:45 +01002440 atomic_set(&mce_fake_panicked, 0);
Huang Yingbf783f92009-07-31 09:41:43 +08002441 atomic_set(&mce_executing, 0);
2442 atomic_set(&mce_callin, 0);
2443 atomic_set(&global_nwo, 0);
2444}
2445
2446static int fake_panic_get(void *data, u64 *val)
2447{
2448 *val = fake_panic;
2449 return 0;
2450}
2451
2452static int fake_panic_set(void *data, u64 val)
2453{
2454 mce_reset();
2455 fake_panic = val;
2456 return 0;
2457}
2458
2459DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2460 fake_panic_set, "%llu\n");
2461
Borislav Petkov5e099542009-10-16 12:31:32 +02002462static int __init mcheck_debugfs_init(void)
Huang Yingbf783f92009-07-31 09:41:43 +08002463{
2464 struct dentry *dmce, *ffake_panic;
2465
2466 dmce = mce_get_debugfs_dir();
2467 if (!dmce)
2468 return -ENOMEM;
2469 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2470 &fake_panic_fops);
2471 if (!ffake_panic)
2472 return -ENOMEM;
2473
2474 return 0;
2475}
Chen, Gongfd4cf792015-08-12 18:29:36 +02002476#else
2477static int __init mcheck_debugfs_init(void) { return -EINVAL; }
Huang Ying5be9ed22009-07-31 09:41:42 +08002478#endif
Chen, Gongfd4cf792015-08-12 18:29:36 +02002479
Tony Luck3637efb2016-09-01 11:39:33 -07002480DEFINE_STATIC_KEY_FALSE(mcsafe_key);
2481EXPORT_SYMBOL_GPL(mcsafe_key);
2482
Chen, Gongfd4cf792015-08-12 18:29:36 +02002483static int __init mcheck_late_init(void)
2484{
Tony Luck3637efb2016-09-01 11:39:33 -07002485 if (mca_cfg.recovery)
2486 static_branch_inc(&mcsafe_key);
2487
Chen, Gongfd4cf792015-08-12 18:29:36 +02002488 mcheck_debugfs_init();
Borislav Petkov011d8262017-03-27 11:33:02 +02002489 cec_init();
Chen, Gongfd4cf792015-08-12 18:29:36 +02002490
2491 /*
2492 * Flush out everything that has been logged during early boot, now that
2493 * everything has been initialized (workqueues, decoders, ...).
2494 */
2495 mce_schedule_work();
2496
2497 return 0;
2498}
2499late_initcall(mcheck_late_init);