blob: 2af127d4c3d1dc2ebdab3e0e2ff95d2d07595a46 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Machine check handler.
Ingo Molnare9eee032009-04-08 12:31:17 +02003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02005 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
Andi Kleenb79109c2009-02-12 13:43:23 +01007 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 */
Tim Hockine02e68d2007-07-21 17:10:36 +020010#include <linux/thread_info.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020011#include <linux/capability.h>
12#include <linux/miscdevice.h>
Andi Kleen8457c842009-02-12 13:49:33 +010013#include <linux/ratelimit.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020014#include <linux/kallsyms.h>
15#include <linux/rcupdate.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020016#include <linux/kobject.h>
Hidetoshi Seto14a02532009-04-30 16:04:51 +090017#include <linux/uaccess.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020018#include <linux/kdebug.h>
19#include <linux/kernel.h>
20#include <linux/percpu.h>
21#include <linux/string.h>
22#include <linux/sysdev.h>
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +010023#include <linux/syscore_ops.h>
Andi Kleen3c079792009-05-27 21:56:55 +020024#include <linux/delay.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020025#include <linux/ctype.h>
26#include <linux/sched.h>
27#include <linux/sysfs.h>
28#include <linux/types.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090029#include <linux/slab.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020030#include <linux/init.h>
31#include <linux/kmod.h>
32#include <linux/poll.h>
Andi Kleen3c079792009-05-27 21:56:55 +020033#include <linux/nmi.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020034#include <linux/cpu.h>
Hidetoshi Seto14a02532009-04-30 16:04:51 +090035#include <linux/smp.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020036#include <linux/fs.h>
Andi Kleen9b1beaf2009-05-27 21:56:59 +020037#include <linux/mm.h>
Huang Ying5be9ed22009-07-31 09:41:42 +080038#include <linux/debugfs.h>
Hidetoshi Setob77e70b2011-06-08 10:56:02 +090039#include <linux/irq_work.h>
Paul Gortmaker69c60c82011-05-26 12:22:53 -040040#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041
Ingo Molnare9eee032009-04-08 12:31:17 +020042#include <asm/processor.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020043#include <asm/mce.h>
44#include <asm/msr.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020045
Andi Kleenbd19a5e2009-05-27 21:56:55 +020046#include "mce-internal.h"
Ingo Molnar711c2e42009-04-08 12:31:26 +020047
Hidetoshi Seto93b62c32011-06-08 11:00:45 +090048static DEFINE_MUTEX(mce_chrdev_read_mutex);
Ingo Molnar2aa2b50dd2010-03-14 08:57:03 +010049
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -080050#define rcu_dereference_check_mce(p) \
Paul E. McKenneyec8c27e2010-04-30 06:45:36 -070051 rcu_dereference_index_check((p), \
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -080052 rcu_read_lock_sched_held() || \
Hidetoshi Seto93b62c32011-06-08 11:00:45 +090053 lockdep_is_held(&mce_chrdev_read_mutex))
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -080054
Hidetoshi Seto8968f9d2009-10-13 16:19:41 +090055#define CREATE_TRACE_POINTS
56#include <trace/events/mce.h>
57
Hidetoshi Seto4e5b3e62009-06-15 17:20:20 +090058int mce_disabled __read_mostly;
Andi Kleen04b2b1a2009-04-28 22:50:19 +020059
Ingo Molnare9eee032009-04-08 12:31:17 +020060#define MISC_MCELOG_MINOR 227
Andi Kleen0d7482e32009-02-17 23:07:13 +010061
Andi Kleen3c079792009-05-27 21:56:55 +020062#define SPINUNIT 100 /* 100ns */
63
Andi Kleen553f2652006-04-07 19:49:57 +020064atomic_t mce_entry;
65
Andi Kleen01ca79f2009-05-27 21:56:52 +020066DEFINE_PER_CPU(unsigned, mce_exception_count);
67
Tim Hockinbd784322007-07-21 17:10:37 +020068/*
69 * Tolerant levels:
70 * 0: always panic on uncorrected errors, log corrected errors
71 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
72 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
73 * 3: never panic or SIGBUS, log all errors (for testing only)
74 */
Hidetoshi Seto4e5b3e62009-06-15 17:20:20 +090075static int tolerant __read_mostly = 1;
76static int banks __read_mostly;
Hidetoshi Seto4e5b3e62009-06-15 17:20:20 +090077static int rip_msr __read_mostly;
78static int mce_bootlog __read_mostly = -1;
79static int monarch_timeout __read_mostly = -1;
80static int mce_panic_timeout __read_mostly;
81static int mce_dont_log_ce __read_mostly;
82int mce_cmci_disabled __read_mostly;
83int mce_ignore_ce __read_mostly;
84int mce_ser __read_mostly;
Andi Kleena98f0dd2007-02-13 13:26:23 +010085
Andi Kleencebe1822009-07-09 00:31:43 +020086struct mce_bank *mce_banks __read_mostly;
87
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +090088/* User mode helper program triggered by machine check event */
89static unsigned long mce_need_notify;
90static char mce_helper[128];
91static char *mce_helper_argv[2] = { mce_helper, NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
Hidetoshi Seto93b62c32011-06-08 11:00:45 +090093static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
94
Andi Kleen3c079792009-05-27 21:56:55 +020095static DEFINE_PER_CPU(struct mce, mces_seen);
96static int cpu_missing;
97
Borislav Petkovfb253192009-10-07 13:20:38 +020098/*
99 * CPU/chipset specific EDAC code can register a notifier call here to print
100 * MCE errors in a human-readable form.
101 */
102ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
103EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
104
Andi Kleenee031c32009-02-12 13:49:34 +0100105/* MCA banks polled by the period polling timer for corrected events */
106DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
107 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
108};
109
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200110static DEFINE_PER_CPU(struct work_struct, mce_work);
111
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100112/* Do initial initialization of a struct mce */
113void mce_setup(struct mce *m)
114{
115 memset(m, 0, sizeof(struct mce));
Andi Kleend620c672009-05-27 21:56:56 +0200116 m->cpu = m->extcpu = smp_processor_id();
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100117 rdtscll(m->tsc);
Andi Kleen8ee08342009-05-27 21:56:56 +0200118 /* We hope get_seconds stays lockless */
119 m->time = get_seconds();
120 m->cpuvendor = boot_cpu_data.x86_vendor;
121 m->cpuid = cpuid_eax(1);
122#ifdef CONFIG_SMP
123 m->socketid = cpu_data(m->extcpu).phys_proc_id;
124#endif
125 m->apicid = cpu_data(m->extcpu).initial_apicid;
126 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100127}
128
Andi Kleenea149b32009-04-29 19:31:00 +0200129DEFINE_PER_CPU(struct mce, injectm);
130EXPORT_PER_CPU_SYMBOL_GPL(injectm);
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
133 * Lockless MCE logging infrastructure.
134 * This avoids deadlocks on printk locks without having to break locks. Also
135 * separate MCEs from kernel messages to avoid bogus bug reports.
136 */
137
Adrian Bunk231fd902008-01-30 13:30:30 +0100138static struct mce_log mcelog = {
Andi Kleenf6fb0ac2009-05-27 21:56:55 +0200139 .signature = MCE_LOG_SIGNATURE,
140 .len = MCE_LOG_LEN,
141 .recordlen = sizeof(struct mce),
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200142};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
144void mce_log(struct mce *mce)
145{
146 unsigned next, entry;
Borislav Petkovf0cb5452011-07-18 11:24:45 -0300147 int ret = 0;
Ingo Molnare9eee032009-04-08 12:31:17 +0200148
Hidetoshi Seto8968f9d2009-10-13 16:19:41 +0900149 /* Emit the trace record: */
150 trace_mce_record(mce);
151
Borislav Petkovf0cb5452011-07-18 11:24:45 -0300152 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
153 if (ret == NOTIFY_STOP)
154 return;
155
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 mce->finished = 0;
Mike Waychison76441432005-09-30 00:01:27 +0200157 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 for (;;) {
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -0800159 entry = rcu_dereference_check_mce(mcelog.next);
Andi Kleen673242c2005-09-12 18:49:24 +0200160 for (;;) {
Mauro Carvalho Chehab696e4092009-07-23 06:57:45 -0300161
162 /*
Ingo Molnare9eee032009-04-08 12:31:17 +0200163 * When the buffer fills up discard new entries.
164 * Assume that the earlier errors are the more
165 * interesting ones:
166 */
Andi Kleen673242c2005-09-12 18:49:24 +0200167 if (entry >= MCE_LOG_LEN) {
Hidetoshi Seto14a02532009-04-30 16:04:51 +0900168 set_bit(MCE_OVERFLOW,
169 (unsigned long *)&mcelog.flags);
Andi Kleen673242c2005-09-12 18:49:24 +0200170 return;
171 }
Ingo Molnare9eee032009-04-08 12:31:17 +0200172 /* Old left over entry. Skip: */
Andi Kleen673242c2005-09-12 18:49:24 +0200173 if (mcelog.entry[entry].finished) {
174 entry++;
175 continue;
176 }
Mike Waychison76441432005-09-30 00:01:27 +0200177 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 smp_rmb();
180 next = entry + 1;
181 if (cmpxchg(&mcelog.next, entry, next) == entry)
182 break;
183 }
184 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
Mike Waychison76441432005-09-30 00:01:27 +0200185 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 mcelog.entry[entry].finished = 1;
Mike Waychison76441432005-09-30 00:01:27 +0200187 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Andi Kleena0189c72009-05-27 21:56:54 +0200189 mce->finished = 1;
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +0900190 set_bit(0, &mce_need_notify);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900193static void print_mce(struct mce *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194{
Borislav Petkovdffa4b22011-04-20 12:23:49 +0200195 int ret = 0;
196
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800197 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
Andi Kleend620c672009-05-27 21:56:56 +0200198 m->extcpu, m->mcgstatus, m->bank, m->status);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200199
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100200 if (m->ip) {
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800201 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200202 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
203 m->cs, m->ip);
204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 if (m->cs == __KERNEL_CS)
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100206 print_symbol("{%s}", m->ip);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200207 pr_cont("\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
Borislav Petkov549d0422009-07-24 13:51:42 +0200209
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800210 pr_emerg(HW_ERR "TSC %llx ", m->tsc);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200211 if (m->addr)
212 pr_cont("ADDR %llx ", m->addr);
213 if (m->misc)
214 pr_cont("MISC %llx ", m->misc);
215
216 pr_cont("\n");
Andi Kleen506ed6b2011-10-12 17:46:33 -0700217 /*
218 * Note this output is parsed by external tools and old fields
219 * should not be changed.
220 */
Borislav Petkov881e23e2011-10-17 16:45:10 +0200221 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
Andi Kleen506ed6b2011-10-12 17:46:33 -0700222 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
223 cpu_data(m->extcpu).microcode);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200224
225 /*
226 * Print out human-readable details about the MCE error,
Borislav Petkovfb253192009-10-07 13:20:38 +0200227 * (if the CPU has an implementation for that)
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200228 */
Borislav Petkovdffa4b22011-04-20 12:23:49 +0200229 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
230 if (ret == NOTIFY_STOP)
231 return;
232
233 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
Andi Kleen86503562009-05-27 21:56:58 +0200234}
235
Andi Kleenf94b61c2009-05-27 21:56:55 +0200236#define PANIC_TIMEOUT 5 /* 5 seconds */
237
238static atomic_t mce_paniced;
239
Huang Yingbf783f92009-07-31 09:41:43 +0800240static int fake_panic;
241static atomic_t mce_fake_paniced;
242
Andi Kleenf94b61c2009-05-27 21:56:55 +0200243/* Panic in progress. Enable interrupts and wait for final IPI */
244static void wait_for_panic(void)
245{
246 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200247
Andi Kleenf94b61c2009-05-27 21:56:55 +0200248 preempt_disable();
249 local_irq_enable();
250 while (timeout-- > 0)
251 udelay(1);
Andi Kleen29b0f592009-05-27 21:56:56 +0200252 if (panic_timeout == 0)
253 panic_timeout = mce_panic_timeout;
Andi Kleenf94b61c2009-05-27 21:56:55 +0200254 panic("Panicing machine check CPU died");
255}
256
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200257static void mce_panic(char *msg, struct mce *final, char *exp)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200258{
Huang Ying482908b2010-05-18 14:35:22 +0800259 int i, apei_err = 0;
Tim Hockine02e68d2007-07-21 17:10:36 +0200260
Huang Yingbf783f92009-07-31 09:41:43 +0800261 if (!fake_panic) {
262 /*
263 * Make sure only one CPU runs in machine check panic
264 */
265 if (atomic_inc_return(&mce_paniced) > 1)
266 wait_for_panic();
267 barrier();
Andi Kleenf94b61c2009-05-27 21:56:55 +0200268
Huang Yingbf783f92009-07-31 09:41:43 +0800269 bust_spinlocks(1);
270 console_verbose();
271 } else {
272 /* Don't log too much for fake panic */
273 if (atomic_inc_return(&mce_fake_paniced) > 1)
274 return;
275 }
Andi Kleena0189c72009-05-27 21:56:54 +0200276 /* First print corrected ones that are still unlogged */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 for (i = 0; i < MCE_LOG_LEN; i++) {
Andi Kleena0189c72009-05-27 21:56:54 +0200278 struct mce *m = &mcelog.entry[i];
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900279 if (!(m->status & MCI_STATUS_VAL))
280 continue;
Huang Ying482908b2010-05-18 14:35:22 +0800281 if (!(m->status & MCI_STATUS_UC)) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900282 print_mce(m);
Huang Ying482908b2010-05-18 14:35:22 +0800283 if (!apei_err)
284 apei_err = apei_write_mce(m);
285 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 }
Andi Kleena0189c72009-05-27 21:56:54 +0200287 /* Now print uncorrected but with the final one last */
288 for (i = 0; i < MCE_LOG_LEN; i++) {
289 struct mce *m = &mcelog.entry[i];
290 if (!(m->status & MCI_STATUS_VAL))
291 continue;
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900292 if (!(m->status & MCI_STATUS_UC))
293 continue;
Huang Ying482908b2010-05-18 14:35:22 +0800294 if (!final || memcmp(m, final, sizeof(struct mce))) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900295 print_mce(m);
Huang Ying482908b2010-05-18 14:35:22 +0800296 if (!apei_err)
297 apei_err = apei_write_mce(m);
298 }
Andi Kleena0189c72009-05-27 21:56:54 +0200299 }
Huang Ying482908b2010-05-18 14:35:22 +0800300 if (final) {
Hidetoshi Seto77e26cc2009-06-11 16:04:35 +0900301 print_mce(final);
Huang Ying482908b2010-05-18 14:35:22 +0800302 if (!apei_err)
303 apei_err = apei_write_mce(final);
304 }
Andi Kleen3c079792009-05-27 21:56:55 +0200305 if (cpu_missing)
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800306 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200307 if (exp)
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800308 pr_emerg(HW_ERR "Machine check: %s\n", exp);
Huang Yingbf783f92009-07-31 09:41:43 +0800309 if (!fake_panic) {
310 if (panic_timeout == 0)
311 panic_timeout = mce_panic_timeout;
312 panic(msg);
313 } else
Huang Yinga2d7b0d2010-06-08 14:35:39 +0800314 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200315}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Andi Kleenea149b32009-04-29 19:31:00 +0200317/* Support code for software error injection */
318
319static int msr_to_offset(u32 msr)
320{
Tejun Heo0a3aee02010-12-18 16:28:55 +0100321 unsigned bank = __this_cpu_read(injectm.bank);
Ingo Molnarf436f8b2009-10-01 16:14:32 +0200322
Andi Kleenea149b32009-04-29 19:31:00 +0200323 if (msr == rip_msr)
324 return offsetof(struct mce, ip);
Andi Kleena2d32bc2009-07-09 00:31:44 +0200325 if (msr == MSR_IA32_MCx_STATUS(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200326 return offsetof(struct mce, status);
Andi Kleena2d32bc2009-07-09 00:31:44 +0200327 if (msr == MSR_IA32_MCx_ADDR(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200328 return offsetof(struct mce, addr);
Andi Kleena2d32bc2009-07-09 00:31:44 +0200329 if (msr == MSR_IA32_MCx_MISC(bank))
Andi Kleenea149b32009-04-29 19:31:00 +0200330 return offsetof(struct mce, misc);
331 if (msr == MSR_IA32_MCG_STATUS)
332 return offsetof(struct mce, mcgstatus);
333 return -1;
334}
335
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200336/* MSR access wrappers used for error injection */
337static u64 mce_rdmsrl(u32 msr)
338{
339 u64 v;
Ingo Molnar11868a22009-09-23 17:49:55 +0200340
Tejun Heo0a3aee02010-12-18 16:28:55 +0100341 if (__this_cpu_read(injectm.finished)) {
Andi Kleenea149b32009-04-29 19:31:00 +0200342 int offset = msr_to_offset(msr);
Ingo Molnar11868a22009-09-23 17:49:55 +0200343
Andi Kleenea149b32009-04-29 19:31:00 +0200344 if (offset < 0)
345 return 0;
346 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
347 }
Ingo Molnar11868a22009-09-23 17:49:55 +0200348
349 if (rdmsrl_safe(msr, &v)) {
350 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
351 /*
352 * Return zero in case the access faulted. This should
353 * not happen normally but can happen if the CPU does
354 * something weird, or if the code is buggy.
355 */
356 v = 0;
357 }
358
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200359 return v;
360}
361
362static void mce_wrmsrl(u32 msr, u64 v)
363{
Tejun Heo0a3aee02010-12-18 16:28:55 +0100364 if (__this_cpu_read(injectm.finished)) {
Andi Kleenea149b32009-04-29 19:31:00 +0200365 int offset = msr_to_offset(msr);
Ingo Molnar11868a22009-09-23 17:49:55 +0200366
Andi Kleenea149b32009-04-29 19:31:00 +0200367 if (offset >= 0)
368 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
369 return;
370 }
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200371 wrmsrl(msr, v);
372}
373
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200374/*
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900375 * Collect all global (w.r.t. this processor) status about this machine
376 * check into our "mce" struct so that we can use it later to assess
377 * the severity of the problem as we read per-bank specific details.
378 */
379static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
380{
381 mce_setup(m);
382
383 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
384 if (regs) {
385 /*
386 * Get the address of the instruction at the time of
387 * the machine check error.
388 */
389 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
390 m->ip = regs->ip;
391 m->cs = regs->cs;
392 }
393 /* Use accurate RIP reporting if available. */
394 if (rip_msr)
395 m->ip = mce_rdmsrl(rip_msr);
396 }
397}
398
399/*
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200400 * Simple lockless ring to communicate PFNs from the exception handler with the
401 * process context work function. This is vastly simplified because there's
402 * only a single reader and a single writer.
403 */
404#define MCE_RING_SIZE 16 /* we use one entry less */
405
406struct mce_ring {
407 unsigned short start;
408 unsigned short end;
409 unsigned long ring[MCE_RING_SIZE];
410};
411static DEFINE_PER_CPU(struct mce_ring, mce_ring);
412
413/* Runs with CPU affinity in workqueue */
414static int mce_ring_empty(void)
415{
416 struct mce_ring *r = &__get_cpu_var(mce_ring);
417
418 return r->start == r->end;
419}
420
421static int mce_ring_get(unsigned long *pfn)
422{
423 struct mce_ring *r;
424 int ret = 0;
425
426 *pfn = 0;
427 get_cpu();
428 r = &__get_cpu_var(mce_ring);
429 if (r->start == r->end)
430 goto out;
431 *pfn = r->ring[r->start];
432 r->start = (r->start + 1) % MCE_RING_SIZE;
433 ret = 1;
434out:
435 put_cpu();
436 return ret;
437}
438
439/* Always runs in MCE context with preempt off */
440static int mce_ring_add(unsigned long pfn)
441{
442 struct mce_ring *r = &__get_cpu_var(mce_ring);
443 unsigned next;
444
445 next = (r->end + 1) % MCE_RING_SIZE;
446 if (next == r->start)
447 return -1;
448 r->ring[r->end] = pfn;
449 wmb();
450 r->end = next;
451 return 0;
452}
453
Andi Kleen88ccbed2009-02-12 13:49:36 +0100454int mce_available(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455{
Andi Kleen04b2b1a2009-04-28 22:50:19 +0200456 if (mce_disabled)
Andi Kleen5b4408f2009-02-12 13:39:30 +0100457 return 0;
Akinobu Mita3d1712c2006-03-24 03:15:11 -0800458 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459}
460
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200461static void mce_schedule_work(void)
462{
463 if (!mce_ring_empty()) {
464 struct work_struct *work = &__get_cpu_var(mce_work);
465 if (!work_pending(work))
466 schedule_work(work);
467 }
468}
469
Hidetoshi Setob77e70b2011-06-08 10:56:02 +0900470DEFINE_PER_CPU(struct irq_work, mce_irq_work);
471
472static void mce_irq_work_cb(struct irq_work *entry)
Andi Kleenccc3c312009-05-27 21:56:54 +0200473{
Andi Kleen9ff36ee2009-05-27 21:56:58 +0200474 mce_notify_irq();
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200475 mce_schedule_work();
Andi Kleenccc3c312009-05-27 21:56:54 +0200476}
Andi Kleenccc3c312009-05-27 21:56:54 +0200477
478static void mce_report_event(struct pt_regs *regs)
479{
480 if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
Andi Kleen9ff36ee2009-05-27 21:56:58 +0200481 mce_notify_irq();
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200482 /*
483 * Triggering the work queue here is just an insurance
484 * policy in case the syscall exit notify handler
485 * doesn't run soon enough or ends up running on the
486 * wrong CPU (can happen when audit sleeps)
487 */
488 mce_schedule_work();
Andi Kleenccc3c312009-05-27 21:56:54 +0200489 return;
490 }
491
Hidetoshi Setob77e70b2011-06-08 10:56:02 +0900492 irq_work_queue(&__get_cpu_var(mce_irq_work));
Andi Kleenccc3c312009-05-27 21:56:54 +0200493}
494
Andi Kleenca84f692009-05-27 21:56:57 +0200495DEFINE_PER_CPU(unsigned, mce_poll_count);
496
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200497/*
Andi Kleenb79109c2009-02-12 13:43:23 +0100498 * Poll for corrected events or events that happened before reset.
499 * Those are just logged through /dev/mcelog.
500 *
501 * This is executed in standard interrupt context.
Andi Kleened7290d2009-05-27 21:56:57 +0200502 *
503 * Note: spec recommends to panic for fatal unsignalled
504 * errors here. However this would be quite problematic --
505 * we would need to reimplement the Monarch handling and
506 * it would mess up the exclusion between exception handler
507 * and poll hander -- * so we skip this for now.
508 * These cases should not happen anyways, or only when the CPU
509 * is already totally * confused. In this case it's likely it will
510 * not fully execute the machine check handler either.
Andi Kleenb79109c2009-02-12 13:43:23 +0100511 */
Andi Kleenee031c32009-02-12 13:49:34 +0100512void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
Andi Kleenb79109c2009-02-12 13:43:23 +0100513{
514 struct mce m;
515 int i;
516
Jan Beulich402af0d2010-04-21 15:21:51 +0100517 percpu_inc(mce_poll_count);
Andi Kleenca84f692009-05-27 21:56:57 +0200518
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900519 mce_gather_info(&m, NULL);
Andi Kleenb79109c2009-02-12 13:43:23 +0100520
Andi Kleenb79109c2009-02-12 13:43:23 +0100521 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +0200522 if (!mce_banks[i].ctl || !test_bit(i, *b))
Andi Kleenb79109c2009-02-12 13:43:23 +0100523 continue;
524
525 m.misc = 0;
526 m.addr = 0;
527 m.bank = i;
528 m.tsc = 0;
529
530 barrier();
Andi Kleena2d32bc2009-07-09 00:31:44 +0200531 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
Andi Kleenb79109c2009-02-12 13:43:23 +0100532 if (!(m.status & MCI_STATUS_VAL))
533 continue;
534
535 /*
Andi Kleened7290d2009-05-27 21:56:57 +0200536 * Uncorrected or signalled events are handled by the exception
537 * handler when it is enabled, so don't process those here.
Andi Kleenb79109c2009-02-12 13:43:23 +0100538 *
539 * TBD do the same check for MCI_STATUS_EN here?
540 */
Andi Kleened7290d2009-05-27 21:56:57 +0200541 if (!(flags & MCP_UC) &&
542 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
Andi Kleenb79109c2009-02-12 13:43:23 +0100543 continue;
544
545 if (m.status & MCI_STATUS_MISCV)
Andi Kleena2d32bc2009-07-09 00:31:44 +0200546 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
Andi Kleenb79109c2009-02-12 13:43:23 +0100547 if (m.status & MCI_STATUS_ADDRV)
Andi Kleena2d32bc2009-07-09 00:31:44 +0200548 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
Andi Kleenb79109c2009-02-12 13:43:23 +0100549
550 if (!(flags & MCP_TIMESTAMP))
551 m.tsc = 0;
552 /*
553 * Don't get the IP here because it's unlikely to
554 * have anything to do with the actual error location.
555 */
Borislav Petkovf0cb5452011-07-18 11:24:45 -0300556 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
Andi Kleen5679af42009-04-07 17:06:55 +0200557 mce_log(&m);
Andi Kleenb79109c2009-02-12 13:43:23 +0100558
559 /*
560 * Clear state for this bank.
561 */
Andi Kleena2d32bc2009-07-09 00:31:44 +0200562 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
Andi Kleenb79109c2009-02-12 13:43:23 +0100563 }
564
565 /*
566 * Don't clear MCG_STATUS here because it's only defined for
567 * exceptions.
568 */
Andi Kleen88921be2009-05-27 21:56:51 +0200569
570 sync_core();
Andi Kleenb79109c2009-02-12 13:43:23 +0100571}
Andi Kleenea149b32009-04-29 19:31:00 +0200572EXPORT_SYMBOL_GPL(machine_check_poll);
Andi Kleenb79109c2009-02-12 13:43:23 +0100573
574/*
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200575 * Do a quick check if any of the events requires a panic.
576 * This decides if we keep the events around or clear them.
577 */
578static int mce_no_way_out(struct mce *m, char **msg)
579{
580 int i;
581
582 for (i = 0; i < banks; i++) {
Andi Kleena2d32bc2009-07-09 00:31:44 +0200583 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200584 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
585 return 1;
586 }
587 return 0;
588}
589
590/*
Andi Kleen3c079792009-05-27 21:56:55 +0200591 * Variable to establish order between CPUs while scanning.
592 * Each CPU spins initially until executing is equal its number.
593 */
594static atomic_t mce_executing;
595
596/*
597 * Defines order of CPUs on entry. First CPU becomes Monarch.
598 */
599static atomic_t mce_callin;
600
601/*
602 * Check if a timeout waiting for other CPUs happened.
603 */
604static int mce_timed_out(u64 *t)
605{
606 /*
607 * The others already did panic for some reason.
608 * Bail out like in a timeout.
609 * rmb() to tell the compiler that system_state
610 * might have been modified by someone else.
611 */
612 rmb();
613 if (atomic_read(&mce_paniced))
614 wait_for_panic();
615 if (!monarch_timeout)
616 goto out;
617 if ((s64)*t < SPINUNIT) {
618 /* CHECKME: Make panic default for 1 too? */
619 if (tolerant < 1)
620 mce_panic("Timeout synchronizing machine check over CPUs",
621 NULL, NULL);
622 cpu_missing = 1;
623 return 1;
624 }
625 *t -= SPINUNIT;
626out:
627 touch_nmi_watchdog();
628 return 0;
629}
630
631/*
632 * The Monarch's reign. The Monarch is the CPU who entered
633 * the machine check handler first. It waits for the others to
634 * raise the exception too and then grades them. When any
635 * error is fatal panic. Only then let the others continue.
636 *
637 * The other CPUs entering the MCE handler will be controlled by the
638 * Monarch. They are called Subjects.
639 *
640 * This way we prevent any potential data corruption in a unrecoverable case
641 * and also makes sure always all CPU's errors are examined.
642 *
Hidetoshi Seto680b6cf2009-08-26 16:20:36 +0900643 * Also this detects the case of a machine check event coming from outer
Andi Kleen3c079792009-05-27 21:56:55 +0200644 * space (not detected by any CPUs) In this case some external agent wants
645 * us to shut down, so panic too.
646 *
647 * The other CPUs might still decide to panic if the handler happens
648 * in a unrecoverable place, but in this case the system is in a semi-stable
649 * state and won't corrupt anything by itself. It's ok to let the others
650 * continue for a bit first.
651 *
652 * All the spin loops have timeouts; when a timeout happens a CPU
653 * typically elects itself to be Monarch.
654 */
655static void mce_reign(void)
656{
657 int cpu;
658 struct mce *m = NULL;
659 int global_worst = 0;
660 char *msg = NULL;
661 char *nmsg = NULL;
662
663 /*
664 * This CPU is the Monarch and the other CPUs have run
665 * through their handlers.
666 * Grade the severity of the errors of all the CPUs.
667 */
668 for_each_possible_cpu(cpu) {
669 int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
670 &nmsg);
671 if (severity > global_worst) {
672 msg = nmsg;
673 global_worst = severity;
674 m = &per_cpu(mces_seen, cpu);
675 }
676 }
677
678 /*
679 * Cannot recover? Panic here then.
680 * This dumps all the mces in the log buffer and stops the
681 * other CPUs.
682 */
683 if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
Andi Kleenac960372009-05-27 21:56:58 +0200684 mce_panic("Fatal Machine check", m, msg);
Andi Kleen3c079792009-05-27 21:56:55 +0200685
686 /*
687 * For UC somewhere we let the CPU who detects it handle it.
688 * Also must let continue the others, otherwise the handling
689 * CPU could deadlock on a lock.
690 */
691
692 /*
693 * No machine check event found. Must be some external
694 * source or one CPU is hung. Panic.
695 */
Hidetoshi Seto680b6cf2009-08-26 16:20:36 +0900696 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
Andi Kleen3c079792009-05-27 21:56:55 +0200697 mce_panic("Machine check from unknown source", NULL, NULL);
698
699 /*
700 * Now clear all the mces_seen so that they don't reappear on
701 * the next mce.
702 */
703 for_each_possible_cpu(cpu)
704 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
705}
706
707static atomic_t global_nwo;
708
709/*
710 * Start of Monarch synchronization. This waits until all CPUs have
711 * entered the exception handler and then determines if any of them
712 * saw a fatal event that requires panic. Then it executes them
713 * in the entry order.
714 * TBD double check parallel CPU hotunplug
715 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900716static int mce_start(int *no_way_out)
Andi Kleen3c079792009-05-27 21:56:55 +0200717{
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900718 int order;
Andi Kleen3c079792009-05-27 21:56:55 +0200719 int cpus = num_online_cpus();
720 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
721
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900722 if (!timeout)
723 return -1;
Andi Kleen3c079792009-05-27 21:56:55 +0200724
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900725 atomic_add(*no_way_out, &global_nwo);
Huang Ying184e1fd2009-06-15 15:37:07 +0800726 /*
727 * global_nwo should be updated before mce_callin
728 */
729 smp_wmb();
Borislav Petkova95436e2009-06-20 23:28:22 -0700730 order = atomic_inc_return(&mce_callin);
Andi Kleen3c079792009-05-27 21:56:55 +0200731
732 /*
733 * Wait for everyone.
734 */
735 while (atomic_read(&mce_callin) != cpus) {
736 if (mce_timed_out(&timeout)) {
737 atomic_set(&global_nwo, 0);
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900738 return -1;
Andi Kleen3c079792009-05-27 21:56:55 +0200739 }
740 ndelay(SPINUNIT);
741 }
742
743 /*
Huang Ying184e1fd2009-06-15 15:37:07 +0800744 * mce_callin should be read before global_nwo
745 */
746 smp_rmb();
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900747
748 if (order == 1) {
749 /*
750 * Monarch: Starts executing now, the others wait.
751 */
752 atomic_set(&mce_executing, 1);
753 } else {
754 /*
755 * Subject: Now start the scanning loop one by one in
756 * the original callin order.
757 * This way when there are any shared banks it will be
758 * only seen by one CPU before cleared, avoiding duplicates.
759 */
760 while (atomic_read(&mce_executing) < order) {
761 if (mce_timed_out(&timeout)) {
762 atomic_set(&global_nwo, 0);
763 return -1;
764 }
765 ndelay(SPINUNIT);
766 }
767 }
768
Huang Ying184e1fd2009-06-15 15:37:07 +0800769 /*
Andi Kleen3c079792009-05-27 21:56:55 +0200770 * Cache the global no_way_out state.
771 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900772 *no_way_out = atomic_read(&global_nwo);
Andi Kleen3c079792009-05-27 21:56:55 +0200773
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900774 return order;
Andi Kleen3c079792009-05-27 21:56:55 +0200775}
776
777/*
778 * Synchronize between CPUs after main scanning loop.
779 * This invokes the bulk of the Monarch processing.
780 */
781static int mce_end(int order)
782{
783 int ret = -1;
784 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
785
786 if (!timeout)
787 goto reset;
788 if (order < 0)
789 goto reset;
790
791 /*
792 * Allow others to run.
793 */
794 atomic_inc(&mce_executing);
795
796 if (order == 1) {
797 /* CHECKME: Can this race with a parallel hotplug? */
798 int cpus = num_online_cpus();
799
800 /*
801 * Monarch: Wait for everyone to go through their scanning
802 * loops.
803 */
804 while (atomic_read(&mce_executing) <= cpus) {
805 if (mce_timed_out(&timeout))
806 goto reset;
807 ndelay(SPINUNIT);
808 }
809
810 mce_reign();
811 barrier();
812 ret = 0;
813 } else {
814 /*
815 * Subject: Wait for Monarch to finish.
816 */
817 while (atomic_read(&mce_executing) != 0) {
818 if (mce_timed_out(&timeout))
819 goto reset;
820 ndelay(SPINUNIT);
821 }
822
823 /*
824 * Don't reset anything. That's done by the Monarch.
825 */
826 return 0;
827 }
828
829 /*
830 * Reset all global state.
831 */
832reset:
833 atomic_set(&global_nwo, 0);
834 atomic_set(&mce_callin, 0);
835 barrier();
836
837 /*
838 * Let others run again.
839 */
840 atomic_set(&mce_executing, 0);
841 return ret;
842}
843
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200844/*
845 * Check if the address reported by the CPU is in a format we can parse.
846 * It would be possible to add code for most other cases, but all would
847 * be somewhat complicated (e.g. segment offset would require an instruction
Lucas De Marchi0d2eb442011-03-17 16:24:16 -0300848 * parser). So only support physical addresses up to page granuality for now.
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200849 */
850static int mce_usable_address(struct mce *m)
851{
852 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
853 return 0;
Hidetoshi Seto2b90e772011-06-08 10:56:56 +0900854 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200855 return 0;
Hidetoshi Seto2b90e772011-06-08 10:56:56 +0900856 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200857 return 0;
858 return 1;
859}
860
Andi Kleen3c079792009-05-27 21:56:55 +0200861static void mce_clear_state(unsigned long *toclear)
862{
863 int i;
864
865 for (i = 0; i < banks; i++) {
866 if (test_bit(i, toclear))
Andi Kleena2d32bc2009-07-09 00:31:44 +0200867 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
Andi Kleen3c079792009-05-27 21:56:55 +0200868 }
869}
870
871/*
Andi Kleenb79109c2009-02-12 13:43:23 +0100872 * The actual machine check handler. This only handles real
873 * exceptions when something got corrupted coming in through int 18.
874 *
875 * This is executed in NMI context not subject to normal locking rules. This
876 * implies that most kernel services cannot be safely used. Don't even
877 * think about putting a printk in there!
Andi Kleen3c079792009-05-27 21:56:55 +0200878 *
879 * On Intel systems this is entered on all CPUs in parallel through
880 * MCE broadcast. However some CPUs might be broken beyond repair,
881 * so be always careful when synchronizing with others.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 */
Ingo Molnare9eee032009-04-08 12:31:17 +0200883void do_machine_check(struct pt_regs *regs, long error_code)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884{
Andi Kleen3c079792009-05-27 21:56:55 +0200885 struct mce m, *final;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 int i;
Andi Kleen3c079792009-05-27 21:56:55 +0200887 int worst = 0;
888 int severity;
889 /*
890 * Establish sequential order between the CPUs entering the machine
891 * check handler.
892 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900893 int order;
Tim Hockinbd784322007-07-21 17:10:37 +0200894 /*
895 * If no_way_out gets set, there is no safe way to recover from this
896 * MCE. If tolerant is cranked up, we'll try anyway.
897 */
898 int no_way_out = 0;
899 /*
900 * If kill_it gets set, there might be a way to recover from this
901 * error.
902 */
903 int kill_it = 0;
Andi Kleenb79109c2009-02-12 13:43:23 +0100904 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
Andi Kleenbd19a5e2009-05-27 21:56:55 +0200905 char *msg = "Unknown";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
Andi Kleen553f2652006-04-07 19:49:57 +0200907 atomic_inc(&mce_entry);
908
Jan Beulich402af0d2010-04-21 15:21:51 +0100909 percpu_inc(mce_exception_count);
Andi Kleen01ca79f2009-05-27 21:56:52 +0200910
Andi Kleenb79109c2009-02-12 13:43:23 +0100911 if (!banks)
Andi Kleen32561692009-05-27 21:56:53 +0200912 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
Hidetoshi Setob8325c52011-06-08 10:57:46 +0900914 mce_gather_info(&m, regs);
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100915
Andi Kleen3c079792009-05-27 21:56:55 +0200916 final = &__get_cpu_var(mces_seen);
917 *final = m;
918
Hidetoshi Seto680b6cf2009-08-26 16:20:36 +0900919 no_way_out = mce_no_way_out(&m, &msg);
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 barrier();
922
Andi Kleen3c079792009-05-27 21:56:55 +0200923 /*
Andi Kleened7290d2009-05-27 21:56:57 +0200924 * When no restart IP must always kill or panic.
925 */
926 if (!(m.mcgstatus & MCG_STATUS_RIPV))
927 kill_it = 1;
928
929 /*
Andi Kleen3c079792009-05-27 21:56:55 +0200930 * Go through all the banks in exclusion of the other CPUs.
931 * This way we don't report duplicated events on shared banks
932 * because the first one to see it will clear it.
933 */
Hidetoshi Seto7fb06fc2009-06-15 18:18:43 +0900934 order = mce_start(&no_way_out);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 for (i = 0; i < banks; i++) {
Andi Kleenb79109c2009-02-12 13:43:23 +0100936 __clear_bit(i, toclear);
Andi Kleencebe1822009-07-09 00:31:43 +0200937 if (!mce_banks[i].ctl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938 continue;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200939
940 m.misc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 m.addr = 0;
942 m.bank = i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Andi Kleena2d32bc2009-07-09 00:31:44 +0200944 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 if ((m.status & MCI_STATUS_VAL) == 0)
946 continue;
947
Andi Kleenb79109c2009-02-12 13:43:23 +0100948 /*
Andi Kleened7290d2009-05-27 21:56:57 +0200949 * Non uncorrected or non signaled errors are handled by
950 * machine_check_poll. Leave them alone, unless this panics.
Andi Kleenb79109c2009-02-12 13:43:23 +0100951 */
Andi Kleened7290d2009-05-27 21:56:57 +0200952 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
953 !no_way_out)
Andi Kleenb79109c2009-02-12 13:43:23 +0100954 continue;
955
956 /*
957 * Set taint even when machine check was not enabled.
958 */
959 add_taint(TAINT_MACHINE_CHECK);
960
Andi Kleened7290d2009-05-27 21:56:57 +0200961 severity = mce_severity(&m, tolerant, NULL);
Andi Kleenb79109c2009-02-12 13:43:23 +0100962
Andi Kleened7290d2009-05-27 21:56:57 +0200963 /*
964 * When machine check was for corrected handler don't touch,
965 * unless we're panicing.
966 */
967 if (severity == MCE_KEEP_SEVERITY && !no_way_out)
968 continue;
969 __set_bit(i, toclear);
970 if (severity == MCE_NO_SEVERITY) {
Andi Kleenb79109c2009-02-12 13:43:23 +0100971 /*
972 * Machine check event was not enabled. Clear, but
973 * ignore.
974 */
975 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 }
977
Andi Kleened7290d2009-05-27 21:56:57 +0200978 /*
979 * Kill on action required.
980 */
981 if (severity == MCE_AR_SEVERITY)
982 kill_it = 1;
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (m.status & MCI_STATUS_MISCV)
Andi Kleena2d32bc2009-07-09 00:31:44 +0200985 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 if (m.status & MCI_STATUS_ADDRV)
Andi Kleena2d32bc2009-07-09 00:31:44 +0200987 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988
Andi Kleen9b1beaf2009-05-27 21:56:59 +0200989 /*
990 * Action optional error. Queue address for later processing.
991 * When the ring overflows we just ignore the AO error.
992 * RED-PEN add some logging mechanism when
993 * usable_address or mce_add_ring fails.
994 * RED-PEN don't ignore overflow for tolerant == 0
995 */
996 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
997 mce_ring_add(m.addr >> PAGE_SHIFT);
998
Andi Kleenb79109c2009-02-12 13:43:23 +0100999 mce_log(&m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
Andi Kleen3c079792009-05-27 21:56:55 +02001001 if (severity > worst) {
1002 *final = m;
1003 worst = severity;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 }
1006
Andi Kleen3c079792009-05-27 21:56:55 +02001007 if (!no_way_out)
1008 mce_clear_state(toclear);
1009
Ingo Molnare9eee032009-04-08 12:31:17 +02001010 /*
Andi Kleen3c079792009-05-27 21:56:55 +02001011 * Do most of the synchronization with other CPUs.
1012 * When there's any problem use only local no_way_out state.
Ingo Molnare9eee032009-04-08 12:31:17 +02001013 */
Andi Kleen3c079792009-05-27 21:56:55 +02001014 if (mce_end(order) < 0)
1015 no_way_out = worst >= MCE_PANIC_SEVERITY;
Tim Hockinbd784322007-07-21 17:10:37 +02001016
1017 /*
1018 * If we have decided that we just CAN'T continue, and the user
Ingo Molnare9eee032009-04-08 12:31:17 +02001019 * has not set tolerant to an insane level, give up and die.
Andi Kleen3c079792009-05-27 21:56:55 +02001020 *
1021 * This is mainly used in the case when the system doesn't
1022 * support MCE broadcasting or it has been disabled.
Tim Hockinbd784322007-07-21 17:10:37 +02001023 */
1024 if (no_way_out && tolerant < 3)
Andi Kleenac960372009-05-27 21:56:58 +02001025 mce_panic("Fatal machine check on current CPU", final, msg);
Tim Hockinbd784322007-07-21 17:10:37 +02001026
1027 /*
1028 * If the error seems to be unrecoverable, something should be
1029 * done. Try to kill as little as possible. If we can kill just
1030 * one task, do that. If the user has set the tolerance very
1031 * high, don't try to do anything at all.
1032 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033
Andi Kleened7290d2009-05-27 21:56:57 +02001034 if (kill_it && tolerant < 3)
1035 force_sig(SIGBUS, current);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036
Tim Hockine02e68d2007-07-21 17:10:36 +02001037 /* notify userspace ASAP */
1038 set_thread_flag(TIF_MCE_NOTIFY);
1039
Andi Kleen3c079792009-05-27 21:56:55 +02001040 if (worst > 0)
1041 mce_report_event(regs);
Andi Kleen5f8c1a52009-04-29 19:29:12 +02001042 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
Andi Kleen32561692009-05-27 21:56:53 +02001043out:
Andi Kleen553f2652006-04-07 19:49:57 +02001044 atomic_dec(&mce_entry);
Andi Kleen88921be2009-05-27 21:56:51 +02001045 sync_core();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046}
Andi Kleenea149b32009-04-29 19:31:00 +02001047EXPORT_SYMBOL_GPL(do_machine_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048
Andi Kleen9b1beaf2009-05-27 21:56:59 +02001049/* dummy to break dependency. actual code is in mm/memory-failure.c */
1050void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
1051{
1052 printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
1053}
1054
1055/*
1056 * Called after mce notification in process context. This code
1057 * is allowed to sleep. Call the high level VM handler to process
1058 * any corrupted pages.
1059 * Assume that the work queue code only calls this one at a time
1060 * per CPU.
1061 * Note we don't disable preemption, so this code might run on the wrong
1062 * CPU. In this case the event is picked up by the scheduled work queue.
1063 * This is merely a fast path to expedite processing in some common
1064 * cases.
1065 */
1066void mce_notify_process(void)
1067{
1068 unsigned long pfn;
1069 mce_notify_irq();
1070 while (mce_ring_get(&pfn))
1071 memory_failure(pfn, MCE_VECTOR);
1072}
1073
1074static void mce_process_work(struct work_struct *dummy)
1075{
1076 mce_notify_process();
1077}
1078
Dmitriy Zavin15d5f832006-09-26 10:52:42 +02001079#ifdef CONFIG_X86_MCE_INTEL
1080/***
1081 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
Simon Arlott676b1852007-10-20 01:25:36 +02001082 * @cpu: The CPU on which the event occurred.
Dmitriy Zavin15d5f832006-09-26 10:52:42 +02001083 * @status: Event status information
1084 *
1085 * This function should be called by the thermal interrupt after the
1086 * event has been processed and the decision was made to log the event
1087 * further.
1088 *
1089 * The status parameter will be saved to the 'status' field of 'struct mce'
1090 * and historically has been the register value of the
1091 * MSR_IA32_THERMAL_STATUS (Intel) msr.
1092 */
Andi Kleenb5f2fa42009-02-12 13:43:22 +01001093void mce_log_therm_throt_event(__u64 status)
Dmitriy Zavin15d5f832006-09-26 10:52:42 +02001094{
1095 struct mce m;
1096
Andi Kleenb5f2fa42009-02-12 13:43:22 +01001097 mce_setup(&m);
Dmitriy Zavin15d5f832006-09-26 10:52:42 +02001098 m.bank = MCE_THERMAL_BANK;
1099 m.status = status;
Dmitriy Zavin15d5f832006-09-26 10:52:42 +02001100 mce_log(&m);
1101}
1102#endif /* CONFIG_X86_MCE_INTEL */
1103
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104/*
Tim Hockin8a336b02007-05-02 19:27:19 +02001105 * Periodic polling timer for "silent" machine check errors. If the
1106 * poller finds an MCE, poll 2x faster. When the poller finds no more
1107 * errors, poll 2x slower (up to check_interval seconds).
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109static int check_interval = 5 * 60; /* 5 minutes */
Ingo Molnare9eee032009-04-08 12:31:17 +02001110
Tejun Heo245b2e72009-06-24 15:13:48 +09001111static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
Andi Kleen52d168e2009-02-12 13:39:29 +01001112static DEFINE_PER_CPU(struct timer_list, mce_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
Borislav Petkov5e099542009-10-16 12:31:32 +02001114static void mce_start_timer(unsigned long data)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115{
Andi Kleen52d168e2009-02-12 13:39:29 +01001116 struct timer_list *t = &per_cpu(mce_timer, data);
Andi Kleen6298c512009-04-09 12:28:22 +02001117 int *n;
Andi Kleen52d168e2009-02-12 13:39:29 +01001118
1119 WARN_ON(smp_processor_id() != data);
1120
Tejun Heo7b543a52010-12-18 16:30:05 +01001121 if (mce_available(__this_cpu_ptr(&cpu_info))) {
Andi Kleenee031c32009-02-12 13:49:34 +01001122 machine_check_poll(MCP_TIMESTAMP,
1123 &__get_cpu_var(mce_poll_banks));
Ingo Molnare9eee032009-04-08 12:31:17 +02001124 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125
1126 /*
Tim Hockine02e68d2007-07-21 17:10:36 +02001127 * Alert userspace if needed. If we logged an MCE, reduce the
1128 * polling interval, otherwise increase the polling interval.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 */
Tejun Heo245b2e72009-06-24 15:13:48 +09001130 n = &__get_cpu_var(mce_next_interval);
Andi Kleen9ff36ee2009-05-27 21:56:58 +02001131 if (mce_notify_irq())
Andi Kleen6298c512009-04-09 12:28:22 +02001132 *n = max(*n/2, HZ/100);
Hidetoshi Seto14a02532009-04-30 16:04:51 +09001133 else
Andi Kleen6298c512009-04-09 12:28:22 +02001134 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
Tim Hockin8a336b02007-05-02 19:27:19 +02001135
Andi Kleen6298c512009-04-09 12:28:22 +02001136 t->expires = jiffies + *n;
Hidetoshi Seto5be60662009-06-24 09:21:10 +09001137 add_timer_on(t, smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138}
1139
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001140/* Must not be called in IRQ context where del_timer_sync() can deadlock */
1141static void mce_timer_delete_all(void)
1142{
1143 int cpu;
1144
1145 for_each_online_cpu(cpu)
1146 del_timer_sync(&per_cpu(mce_timer, cpu));
1147}
1148
Andi Kleen9bd98402009-02-12 13:39:28 +01001149static void mce_do_trigger(struct work_struct *work)
1150{
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001151 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
Andi Kleen9bd98402009-02-12 13:39:28 +01001152}
1153
1154static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
1155
Tim Hockine02e68d2007-07-21 17:10:36 +02001156/*
Andi Kleen9bd98402009-02-12 13:39:28 +01001157 * Notify the user(s) about new machine check events.
1158 * Can be called from interrupt context, but not from machine check/NMI
1159 * context.
Tim Hockine02e68d2007-07-21 17:10:36 +02001160 */
Andi Kleen9ff36ee2009-05-27 21:56:58 +02001161int mce_notify_irq(void)
Tim Hockine02e68d2007-07-21 17:10:36 +02001162{
Andi Kleen8457c842009-02-12 13:49:33 +01001163 /* Not more than two messages every minute */
1164 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1165
Tim Hockine02e68d2007-07-21 17:10:36 +02001166 clear_thread_flag(TIF_MCE_NOTIFY);
Ingo Molnare9eee032009-04-08 12:31:17 +02001167
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001168 if (test_and_clear_bit(0, &mce_need_notify)) {
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001169 /* wake processes polling /dev/mcelog */
1170 wake_up_interruptible(&mce_chrdev_wait);
Andi Kleen9bd98402009-02-12 13:39:28 +01001171
1172 /*
1173 * There is no risk of missing notifications because
1174 * work_pending is always cleared before the function is
1175 * executed.
1176 */
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001177 if (mce_helper[0] && !work_pending(&mce_trigger_work))
Andi Kleen9bd98402009-02-12 13:39:28 +01001178 schedule_work(&mce_trigger_work);
Tim Hockine02e68d2007-07-21 17:10:36 +02001179
Andi Kleen8457c842009-02-12 13:49:33 +01001180 if (__ratelimit(&ratelimit))
Huang Yinga2d7b0d2010-06-08 14:35:39 +08001181 pr_info(HW_ERR "Machine check events logged\n");
Tim Hockine02e68d2007-07-21 17:10:36 +02001182
1183 return 1;
1184 }
1185 return 0;
1186}
Andi Kleen9ff36ee2009-05-27 21:56:58 +02001187EXPORT_SYMBOL_GPL(mce_notify_irq);
Tim Hockine02e68d2007-07-21 17:10:36 +02001188
Hidetoshi Setocffd3772009-11-12 15:52:40 +09001189static int __cpuinit __mcheck_cpu_mce_banks_init(void)
Andi Kleencebe1822009-07-09 00:31:43 +02001190{
1191 int i;
1192
1193 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
1194 if (!mce_banks)
1195 return -ENOMEM;
1196 for (i = 0; i < banks; i++) {
1197 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001198
Andi Kleencebe1822009-07-09 00:31:43 +02001199 b->ctl = -1ULL;
1200 b->init = 1;
1201 }
1202 return 0;
1203}
1204
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001205/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 * Initialize Machine Checks for a CPU.
1207 */
Borislav Petkov5e099542009-10-16 12:31:32 +02001208static int __cpuinit __mcheck_cpu_cap_init(void)
Andi Kleen0d7482e32009-02-17 23:07:13 +01001209{
Andi Kleen0d7482e32009-02-17 23:07:13 +01001210 unsigned b;
Ingo Molnare9eee032009-04-08 12:31:17 +02001211 u64 cap;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001212
1213 rdmsrl(MSR_IA32_MCG_CAP, cap);
Thomas Gleixner01c66802009-04-08 12:31:24 +02001214
1215 b = cap & MCG_BANKCNT_MASK;
Roland Dreier93ae5012009-10-15 14:21:14 -07001216 if (!banks)
1217 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
Ingo Molnarb6592942009-04-08 12:31:27 +02001218
Andi Kleen0d7482e32009-02-17 23:07:13 +01001219 if (b > MAX_NR_BANKS) {
1220 printk(KERN_WARNING
1221 "MCE: Using only %u machine check banks out of %u\n",
1222 MAX_NR_BANKS, b);
1223 b = MAX_NR_BANKS;
1224 }
1225
1226 /* Don't support asymmetric configurations today */
1227 WARN_ON(banks != 0 && b != banks);
1228 banks = b;
Andi Kleencebe1822009-07-09 00:31:43 +02001229 if (!mce_banks) {
Hidetoshi Setocffd3772009-11-12 15:52:40 +09001230 int err = __mcheck_cpu_mce_banks_init();
Ingo Molnar11868a22009-09-23 17:49:55 +02001231
Andi Kleencebe1822009-07-09 00:31:43 +02001232 if (err)
1233 return err;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001234 }
1235
1236 /* Use accurate RIP reporting if available. */
Thomas Gleixner01c66802009-04-08 12:31:24 +02001237 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
Andi Kleen0d7482e32009-02-17 23:07:13 +01001238 rip_msr = MSR_IA32_MCG_EIP;
1239
Andi Kleened7290d2009-05-27 21:56:57 +02001240 if (cap & MCG_SER_P)
1241 mce_ser = 1;
1242
Andi Kleen0d7482e32009-02-17 23:07:13 +01001243 return 0;
1244}
1245
Borislav Petkov5e099542009-10-16 12:31:32 +02001246static void __mcheck_cpu_init_generic(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247{
Ingo Molnare9eee032009-04-08 12:31:17 +02001248 mce_banks_t all_banks;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 u64 cap;
1250 int i;
1251
Andi Kleenb79109c2009-02-12 13:43:23 +01001252 /*
1253 * Log the machine checks left over from the previous reset.
1254 */
Andi Kleenee031c32009-02-12 13:49:34 +01001255 bitmap_fill(all_banks, MAX_NR_BANKS);
Andi Kleen5679af42009-04-07 17:06:55 +02001256 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
1258 set_in_cr4(X86_CR4_MCE);
1259
Andi Kleen0d7482e32009-02-17 23:07:13 +01001260 rdmsrl(MSR_IA32_MCG_CAP, cap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 if (cap & MCG_CTL_P)
1262 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1263
1264 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02001265 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001266
Andi Kleencebe1822009-07-09 00:31:43 +02001267 if (!b->init)
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001268 continue;
Andi Kleena2d32bc2009-07-09 00:31:44 +02001269 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1270 wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001271 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272}
1273
1274/* Add per CPU specific workarounds here */
Borislav Petkov5e099542009-10-16 12:31:32 +02001275static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001276{
Ingo Molnare412cd22009-08-17 10:19:00 +02001277 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1278 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
1279 return -EOPNOTSUPP;
1280 }
1281
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 /* This should be disabled by the BIOS, but isn't always */
Jan Beulich911f6a72008-04-22 16:22:21 +01001283 if (c->x86_vendor == X86_VENDOR_AMD) {
Ingo Molnare9eee032009-04-08 12:31:17 +02001284 if (c->x86 == 15 && banks > 4) {
1285 /*
1286 * disable GART TBL walk error reporting, which
1287 * trips off incorrectly with the IOMMU & 3ware
1288 * & Cerberus:
1289 */
Andi Kleencebe1822009-07-09 00:31:43 +02001290 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
Ingo Molnare9eee032009-04-08 12:31:17 +02001291 }
1292 if (c->x86 <= 17 && mce_bootlog < 0) {
1293 /*
1294 * Lots of broken BIOS around that don't clear them
1295 * by default and leave crap in there. Don't log:
1296 */
Jan Beulich911f6a72008-04-22 16:22:21 +01001297 mce_bootlog = 0;
Ingo Molnare9eee032009-04-08 12:31:17 +02001298 }
Andi Kleen2e6f6942009-04-27 18:42:48 +02001299 /*
1300 * Various K7s with broken bank 0 around. Always disable
1301 * by default.
1302 */
Andi Kleen203abd62009-06-15 14:52:01 +02001303 if (c->x86 == 6 && banks > 0)
Andi Kleencebe1822009-07-09 00:31:43 +02001304 mce_banks[0].ctl = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 }
Andi Kleene5835382005-11-05 17:25:54 +01001306
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001307 if (c->x86_vendor == X86_VENDOR_INTEL) {
1308 /*
1309 * SDM documents that on family 6 bank 0 should not be written
1310 * because it aliases to another special BIOS controlled
1311 * register.
1312 * But it's not aliased anymore on model 0x1a+
1313 * Don't ignore bank 0 completely because there could be a
1314 * valid event later, merely don't write CTL0.
1315 */
1316
Andi Kleencebe1822009-07-09 00:31:43 +02001317 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
1318 mce_banks[0].init = 0;
Andi Kleen3c079792009-05-27 21:56:55 +02001319
1320 /*
1321 * All newer Intel systems support MCE broadcasting. Enable
1322 * synchronization with a one second timeout.
1323 */
1324 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1325 monarch_timeout < 0)
1326 monarch_timeout = USEC_PER_SEC;
Bartlomiej Zolnierkiewiczc7f6fa42009-07-28 23:52:54 +02001327
Ingo Molnare412cd22009-08-17 10:19:00 +02001328 /*
1329 * There are also broken BIOSes on some Pentium M and
1330 * earlier systems:
1331 */
1332 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
Bartlomiej Zolnierkiewiczc7f6fa42009-07-28 23:52:54 +02001333 mce_bootlog = 0;
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001334 }
Andi Kleen3c079792009-05-27 21:56:55 +02001335 if (monarch_timeout < 0)
1336 monarch_timeout = 0;
Andi Kleen29b0f592009-05-27 21:56:56 +02001337 if (mce_bootlog != 0)
1338 mce_panic_timeout = 30;
Ingo Molnare412cd22009-08-17 10:19:00 +02001339
1340 return 0;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001341}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001343static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
Andi Kleen4efc0672009-04-28 19:07:31 +02001344{
1345 if (c->x86 != 5)
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001346 return 0;
1347
Andi Kleen4efc0672009-04-28 19:07:31 +02001348 switch (c->x86_vendor) {
1349 case X86_VENDOR_INTEL:
Hidetoshi Setoc6978362009-06-15 17:22:49 +09001350 intel_p5_mcheck_init(c);
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001351 return 1;
Andi Kleen4efc0672009-04-28 19:07:31 +02001352 break;
1353 case X86_VENDOR_CENTAUR:
1354 winchip_mcheck_init(c);
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001355 return 1;
Andi Kleen4efc0672009-04-28 19:07:31 +02001356 break;
1357 }
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001358
1359 return 0;
Andi Kleen4efc0672009-04-28 19:07:31 +02001360}
1361
Borislav Petkov5e099542009-10-16 12:31:32 +02001362static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363{
1364 switch (c->x86_vendor) {
1365 case X86_VENDOR_INTEL:
1366 mce_intel_feature_init(c);
1367 break;
Jacob Shin89b831e2005-11-05 17:25:53 +01001368 case X86_VENDOR_AMD:
1369 mce_amd_feature_init(c);
1370 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 default:
1372 break;
1373 }
1374}
1375
Borislav Petkov5e099542009-10-16 12:31:32 +02001376static void __mcheck_cpu_init_timer(void)
Andi Kleen52d168e2009-02-12 13:39:29 +01001377{
1378 struct timer_list *t = &__get_cpu_var(mce_timer);
Tejun Heo245b2e72009-06-24 15:13:48 +09001379 int *n = &__get_cpu_var(mce_next_interval);
Andi Kleen52d168e2009-02-12 13:39:29 +01001380
Jan Beulichbc09eff2009-12-08 11:21:37 +09001381 setup_timer(t, mce_start_timer, smp_processor_id());
1382
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001383 if (mce_ignore_ce)
1384 return;
1385
Andi Kleen6298c512009-04-09 12:28:22 +02001386 *n = check_interval * HZ;
1387 if (!*n)
Andi Kleen52d168e2009-02-12 13:39:29 +01001388 return;
Andi Kleen6298c512009-04-09 12:28:22 +02001389 t->expires = round_jiffies(jiffies + *n);
Hidetoshi Seto5be60662009-06-24 09:21:10 +09001390 add_timer_on(t, smp_processor_id());
Andi Kleen52d168e2009-02-12 13:39:29 +01001391}
1392
Andi Kleen9eda8cb2009-07-09 00:31:42 +02001393/* Handle unconfigured int18 (should never happen) */
1394static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1395{
1396 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
1397 smp_processor_id());
1398}
1399
1400/* Call the installed machine check handler for this CPU setup. */
1401void (*machine_check_vector)(struct pt_regs *, long error_code) =
1402 unexpected_machine_check;
1403
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001404/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 * Called for each booted CPU to set up machine checks.
Ingo Molnare9eee032009-04-08 12:31:17 +02001406 * Must be called with preempt off:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 */
Borislav Petkov5e099542009-10-16 12:31:32 +02001408void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409{
Andi Kleen4efc0672009-04-28 19:07:31 +02001410 if (mce_disabled)
1411 return;
1412
Hidetoshi Seto3a97fc32011-06-08 10:58:35 +09001413 if (__mcheck_cpu_ancient_init(c))
1414 return;
Andi Kleen4efc0672009-04-28 19:07:31 +02001415
Andi Kleen5b4408f2009-02-12 13:39:30 +01001416 if (!mce_available(c))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 return;
1418
Borislav Petkov5e099542009-10-16 12:31:32 +02001419 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
Andi Kleen04b2b1a2009-04-28 22:50:19 +02001420 mce_disabled = 1;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001421 return;
1422 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01001423
Andi Kleen5d727922009-04-27 19:25:48 +02001424 machine_check_vector = do_machine_check;
1425
Borislav Petkov5e099542009-10-16 12:31:32 +02001426 __mcheck_cpu_init_generic();
1427 __mcheck_cpu_init_vendor(c);
1428 __mcheck_cpu_init_timer();
Andi Kleen9b1beaf2009-05-27 21:56:59 +02001429 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
Hidetoshi Setob77e70b2011-06-08 10:56:02 +09001430 init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431}
1432
1433/*
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001434 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 */
1436
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001437static DEFINE_SPINLOCK(mce_chrdev_state_lock);
1438static int mce_chrdev_open_count; /* #times opened */
1439static int mce_chrdev_open_exclu; /* already open exclusive? */
Tim Hockinf528e7b2007-07-21 17:10:35 +02001440
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001441static int mce_chrdev_open(struct inode *inode, struct file *file)
Tim Hockinf528e7b2007-07-21 17:10:35 +02001442{
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001443 spin_lock(&mce_chrdev_state_lock);
Tim Hockinf528e7b2007-07-21 17:10:35 +02001444
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001445 if (mce_chrdev_open_exclu ||
1446 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
1447 spin_unlock(&mce_chrdev_state_lock);
Ingo Molnare9eee032009-04-08 12:31:17 +02001448
Tim Hockinf528e7b2007-07-21 17:10:35 +02001449 return -EBUSY;
1450 }
1451
1452 if (file->f_flags & O_EXCL)
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001453 mce_chrdev_open_exclu = 1;
1454 mce_chrdev_open_count++;
Tim Hockinf528e7b2007-07-21 17:10:35 +02001455
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001456 spin_unlock(&mce_chrdev_state_lock);
Tim Hockinf528e7b2007-07-21 17:10:35 +02001457
Tim Hockinbd784322007-07-21 17:10:37 +02001458 return nonseekable_open(inode, file);
Tim Hockinf528e7b2007-07-21 17:10:35 +02001459}
1460
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001461static int mce_chrdev_release(struct inode *inode, struct file *file)
Tim Hockinf528e7b2007-07-21 17:10:35 +02001462{
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001463 spin_lock(&mce_chrdev_state_lock);
Tim Hockinf528e7b2007-07-21 17:10:35 +02001464
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001465 mce_chrdev_open_count--;
1466 mce_chrdev_open_exclu = 0;
Tim Hockinf528e7b2007-07-21 17:10:35 +02001467
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001468 spin_unlock(&mce_chrdev_state_lock);
Tim Hockinf528e7b2007-07-21 17:10:35 +02001469
1470 return 0;
1471}
1472
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001473static void collect_tscs(void *data)
1474{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 unsigned long *cpu_tsc = (unsigned long *)data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001477 rdtscll(cpu_tsc[smp_processor_id()]);
1478}
1479
Huang Ying482908b2010-05-18 14:35:22 +08001480static int mce_apei_read_done;
1481
1482/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
1483static int __mce_read_apei(char __user **ubuf, size_t usize)
1484{
1485 int rc;
1486 u64 record_id;
1487 struct mce m;
1488
1489 if (usize < sizeof(struct mce))
1490 return -EINVAL;
1491
1492 rc = apei_read_mce(&m, &record_id);
1493 /* Error or no more MCE record */
1494 if (rc <= 0) {
1495 mce_apei_read_done = 1;
1496 return rc;
1497 }
1498 rc = -EFAULT;
1499 if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
1500 return rc;
1501 /*
1502 * In fact, we should have cleared the record after that has
1503 * been flushed to the disk or sent to network in
1504 * /sbin/mcelog, but we have no interface to support that now,
1505 * so just clear it to avoid duplication.
1506 */
1507 rc = apei_clear_mce(record_id);
1508 if (rc) {
1509 mce_apei_read_done = 1;
1510 return rc;
1511 }
1512 *ubuf += sizeof(struct mce);
1513
1514 return 0;
1515}
1516
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001517static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
1518 size_t usize, loff_t *off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 char __user *buf = ubuf;
Ingo Molnare9eee032009-04-08 12:31:17 +02001521 unsigned long *cpu_tsc;
1522 unsigned prev, next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 int i, err;
1524
Mike Travis6bca67f2008-07-18 18:11:27 -07001525 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
Andi Kleenf0de53b2005-04-16 15:25:10 -07001526 if (!cpu_tsc)
1527 return -ENOMEM;
1528
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001529 mutex_lock(&mce_chrdev_read_mutex);
Huang Ying482908b2010-05-18 14:35:22 +08001530
1531 if (!mce_apei_read_done) {
1532 err = __mce_read_apei(&buf, usize);
1533 if (err || buf != ubuf)
1534 goto out;
1535 }
1536
Paul E. McKenneyf56e8a02010-03-05 15:03:27 -08001537 next = rcu_dereference_check_mce(mcelog.next);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
1539 /* Only supports full reads right now */
Huang Ying482908b2010-05-18 14:35:22 +08001540 err = -EINVAL;
1541 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
1542 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543
1544 err = 0;
Huang Yingef41df4342009-02-12 13:39:34 +01001545 prev = 0;
1546 do {
1547 for (i = prev; i < next; i++) {
1548 unsigned long start = jiffies;
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001549 struct mce *m = &mcelog.entry[i];
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001550
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001551 while (!m->finished) {
Huang Yingef41df4342009-02-12 13:39:34 +01001552 if (time_after_eq(jiffies, start + 2)) {
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001553 memset(m, 0, sizeof(*m));
Huang Yingef41df4342009-02-12 13:39:34 +01001554 goto timeout;
1555 }
1556 cpu_relax();
Andi Kleen673242c2005-09-12 18:49:24 +02001557 }
Huang Yingef41df4342009-02-12 13:39:34 +01001558 smp_rmb();
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001559 err |= copy_to_user(buf, m, sizeof(*m));
1560 buf += sizeof(*m);
Huang Yingef41df4342009-02-12 13:39:34 +01001561timeout:
1562 ;
Andi Kleen673242c2005-09-12 18:49:24 +02001563 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564
Huang Yingef41df4342009-02-12 13:39:34 +01001565 memset(mcelog.entry + prev, 0,
1566 (next - prev) * sizeof(struct mce));
1567 prev = next;
1568 next = cmpxchg(&mcelog.next, prev, 0);
1569 } while (next != prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570
Paul E. McKenneyb2b18662005-06-25 14:55:38 -07001571 synchronize_sched();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001573 /*
1574 * Collect entries that were still getting written before the
1575 * synchronize.
1576 */
Jens Axboe15c8b6c2008-05-09 09:39:44 +02001577 on_each_cpu(collect_tscs, cpu_tsc, 1);
Ingo Molnare9eee032009-04-08 12:31:17 +02001578
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001579 for (i = next; i < MCE_LOG_LEN; i++) {
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001580 struct mce *m = &mcelog.entry[i];
1581
1582 if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
1583 err |= copy_to_user(buf, m, sizeof(*m));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584 smp_rmb();
Hidetoshi Seto559faa62011-06-08 11:00:08 +09001585 buf += sizeof(*m);
1586 memset(m, 0, sizeof(*m));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 }
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001588 }
Huang Ying482908b2010-05-18 14:35:22 +08001589
1590 if (err)
1591 err = -EFAULT;
1592
1593out:
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001594 mutex_unlock(&mce_chrdev_read_mutex);
Andi Kleenf0de53b2005-04-16 15:25:10 -07001595 kfree(cpu_tsc);
Ingo Molnare9eee032009-04-08 12:31:17 +02001596
Huang Ying482908b2010-05-18 14:35:22 +08001597 return err ? err : buf - ubuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598}
1599
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001600static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
Tim Hockine02e68d2007-07-21 17:10:36 +02001601{
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001602 poll_wait(file, &mce_chrdev_wait, wait);
Paul E. McKenneya4dd9922011-04-01 07:15:14 -07001603 if (rcu_access_index(mcelog.next))
Tim Hockine02e68d2007-07-21 17:10:36 +02001604 return POLLIN | POLLRDNORM;
Huang Ying482908b2010-05-18 14:35:22 +08001605 if (!mce_apei_read_done && apei_check_mce())
1606 return POLLIN | POLLRDNORM;
Tim Hockine02e68d2007-07-21 17:10:36 +02001607 return 0;
1608}
1609
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001610static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
1611 unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612{
1613 int __user *p = (int __user *)arg;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001614
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 if (!capable(CAP_SYS_ADMIN))
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001616 return -EPERM;
Ingo Molnare9eee032009-04-08 12:31:17 +02001617
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 switch (cmd) {
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001619 case MCE_GET_RECORD_LEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 return put_user(sizeof(struct mce), p);
1621 case MCE_GET_LOG_LEN:
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001622 return put_user(MCE_LOG_LEN, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 case MCE_GETCLEAR_FLAGS: {
1624 unsigned flags;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001625
1626 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 flags = mcelog.flags;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001628 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
Ingo Molnare9eee032009-04-08 12:31:17 +02001629
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001630 return put_user(flags, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 }
1632 default:
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001633 return -ENOTTY;
1634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635}
1636
Luck, Tony66f5ddf2011-11-03 11:46:47 -07001637static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
1638 size_t usize, loff_t *off);
1639
1640void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
1641 const char __user *ubuf,
1642 size_t usize, loff_t *off))
1643{
1644 mce_write = fn;
1645}
1646EXPORT_SYMBOL_GPL(register_mce_write_callback);
1647
1648ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
1649 size_t usize, loff_t *off)
1650{
1651 if (mce_write)
1652 return mce_write(filp, ubuf, usize, off);
1653 else
1654 return -EINVAL;
1655}
1656
1657static const struct file_operations mce_chrdev_ops = {
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001658 .open = mce_chrdev_open,
1659 .release = mce_chrdev_release,
1660 .read = mce_chrdev_read,
Luck, Tony66f5ddf2011-11-03 11:46:47 -07001661 .write = mce_chrdev_write,
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001662 .poll = mce_chrdev_poll,
1663 .unlocked_ioctl = mce_chrdev_ioctl,
1664 .llseek = no_llseek,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665};
1666
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09001667static struct miscdevice mce_chrdev_device = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 MISC_MCELOG_MINOR,
1669 "mcelog",
1670 &mce_chrdev_ops,
1671};
1672
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001673/*
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001674 * mce=off Disables machine check
1675 * mce=no_cmci Disables CMCI
1676 * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
1677 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
Andi Kleen3c079792009-05-27 21:56:55 +02001678 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
1679 * monarchtimeout is how long to wait for other CPUs on machine
1680 * check, or 0 to not wait
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001681 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
1682 * mce=nobootlog Don't log MCEs from before booting.
1683 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684static int __init mcheck_enable(char *str)
1685{
Bartlomiej Zolnierkiewicze3346fc2009-07-28 23:55:09 +02001686 if (*str == 0) {
Andi Kleen4efc0672009-04-28 19:07:31 +02001687 enable_p5_mce();
Bartlomiej Zolnierkiewicze3346fc2009-07-28 23:55:09 +02001688 return 1;
1689 }
Andi Kleen4efc0672009-04-28 19:07:31 +02001690 if (*str == '=')
1691 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 if (!strcmp(str, "off"))
Andi Kleen04b2b1a2009-04-28 22:50:19 +02001693 mce_disabled = 1;
Hidetoshi Seto62fdac52009-06-11 16:06:07 +09001694 else if (!strcmp(str, "no_cmci"))
1695 mce_cmci_disabled = 1;
1696 else if (!strcmp(str, "dont_log_ce"))
1697 mce_dont_log_ce = 1;
1698 else if (!strcmp(str, "ignore_ce"))
1699 mce_ignore_ce = 1;
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001700 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1701 mce_bootlog = (str[0] == 'b');
Andi Kleen3c079792009-05-27 21:56:55 +02001702 else if (isdigit(str[0])) {
Andi Kleen8c566ef2005-09-12 18:49:24 +02001703 get_option(&str, &tolerant);
Andi Kleen3c079792009-05-27 21:56:55 +02001704 if (*str == ',') {
1705 ++str;
1706 get_option(&str, &monarch_timeout);
1707 }
1708 } else {
Andi Kleen4efc0672009-04-28 19:07:31 +02001709 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
Hidetoshi Seto13503fa2009-03-26 17:39:20 +09001710 str);
1711 return 0;
1712 }
OGAWA Hirofumi9b410462006-03-31 02:30:33 -08001713 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714}
Andi Kleen4efc0672009-04-28 19:07:31 +02001715__setup("mce", mcheck_enable);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716
Yong Wanga2202aa2009-11-10 09:38:24 +08001717int __init mcheck_init(void)
Borislav Petkovb33a6362009-10-16 12:31:33 +02001718{
Yong Wanga2202aa2009-11-10 09:38:24 +08001719 mcheck_intel_therm_init();
1720
Borislav Petkovb33a6362009-10-16 12:31:33 +02001721 return 0;
1722}
Borislav Petkovb33a6362009-10-16 12:31:33 +02001723
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001724/*
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001725 * mce_syscore: PM support
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001726 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727
Andi Kleen973a2dd2009-02-12 13:39:32 +01001728/*
1729 * Disable machine checks on suspend and shutdown. We can't really handle
1730 * them later.
1731 */
Borislav Petkov5e099542009-10-16 12:31:32 +02001732static int mce_disable_error_reporting(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01001733{
1734 int i;
1735
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001736 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02001737 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02001738
Andi Kleencebe1822009-07-09 00:31:43 +02001739 if (b->init)
Andi Kleena2d32bc2009-07-09 00:31:44 +02001740 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001741 }
Andi Kleen973a2dd2009-02-12 13:39:32 +01001742 return 0;
1743}
1744
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001745static int mce_syscore_suspend(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01001746{
Borislav Petkov5e099542009-10-16 12:31:32 +02001747 return mce_disable_error_reporting();
Andi Kleen973a2dd2009-02-12 13:39:32 +01001748}
1749
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001750static void mce_syscore_shutdown(void)
Andi Kleen973a2dd2009-02-12 13:39:32 +01001751{
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01001752 mce_disable_error_reporting();
Andi Kleen973a2dd2009-02-12 13:39:32 +01001753}
1754
Ingo Molnare9eee032009-04-08 12:31:17 +02001755/*
1756 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
1757 * Only one CPU is active at this time, the others get re-added later using
1758 * CPU hotplug:
1759 */
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001760static void mce_syscore_resume(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761{
Borislav Petkov5e099542009-10-16 12:31:32 +02001762 __mcheck_cpu_init_generic();
Tejun Heo7b543a52010-12-18 16:30:05 +01001763 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764}
1765
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01001766static struct syscore_ops mce_syscore_ops = {
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001767 .suspend = mce_syscore_suspend,
1768 .shutdown = mce_syscore_shutdown,
1769 .resume = mce_syscore_resume,
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01001770};
1771
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001772/*
1773 * mce_sysdev: Sysfs support
1774 */
1775
Andi Kleen52d168e2009-02-12 13:39:29 +01001776static void mce_cpu_restart(void *data)
1777{
Tejun Heo7b543a52010-12-18 16:30:05 +01001778 if (!mce_available(__this_cpu_ptr(&cpu_info)))
Hidetoshi Seto33edbf02009-06-15 17:18:45 +09001779 return;
Borislav Petkov5e099542009-10-16 12:31:32 +02001780 __mcheck_cpu_init_generic();
1781 __mcheck_cpu_init_timer();
Andi Kleen52d168e2009-02-12 13:39:29 +01001782}
1783
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784/* Reinit MCEs after user configuration changes */
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001785static void mce_restart(void)
1786{
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001787 mce_timer_delete_all();
Andi Kleen52d168e2009-02-12 13:39:29 +01001788 on_each_cpu(mce_cpu_restart, NULL, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789}
1790
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001791/* Toggle features for corrected errors */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001792static void mce_disable_cmci(void *data)
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001793{
Tejun Heo7b543a52010-12-18 16:30:05 +01001794 if (!mce_available(__this_cpu_ptr(&cpu_info)))
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001795 return;
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001796 cmci_clear();
1797}
1798
1799static void mce_enable_ce(void *all)
1800{
Tejun Heo7b543a52010-12-18 16:30:05 +01001801 if (!mce_available(__this_cpu_ptr(&cpu_info)))
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001802 return;
1803 cmci_reenable();
1804 cmci_recheck();
1805 if (all)
Borislav Petkov5e099542009-10-16 12:31:32 +02001806 __mcheck_cpu_init_timer();
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001807}
1808
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001809static struct sysdev_class mce_sysdev_class = {
Ingo Molnare9eee032009-04-08 12:31:17 +02001810 .name = "machinecheck",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811};
1812
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001813DEFINE_PER_CPU(struct sys_device, mce_sysdev);
Ingo Molnare9eee032009-04-08 12:31:17 +02001814
1815__cpuinitdata
1816void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817
Andi Kleencebe1822009-07-09 00:31:43 +02001818static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
1819{
1820 return container_of(attr, struct mce_bank, attr);
1821}
Andi Kleen0d7482e32009-02-17 23:07:13 +01001822
1823static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1824 char *buf)
1825{
Andi Kleencebe1822009-07-09 00:31:43 +02001826 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
Andi Kleen0d7482e32009-02-17 23:07:13 +01001827}
1828
1829static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09001830 const char *buf, size_t size)
Andi Kleen0d7482e32009-02-17 23:07:13 +01001831{
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09001832 u64 new;
Ingo Molnare9eee032009-04-08 12:31:17 +02001833
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09001834 if (strict_strtoull(buf, 0, &new) < 0)
Andi Kleen0d7482e32009-02-17 23:07:13 +01001835 return -EINVAL;
Ingo Molnare9eee032009-04-08 12:31:17 +02001836
Andi Kleencebe1822009-07-09 00:31:43 +02001837 attr_to_bank(attr)->ctl = new;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001838 mce_restart();
Ingo Molnare9eee032009-04-08 12:31:17 +02001839
Hidetoshi Seto9319cec2009-04-14 17:26:30 +09001840 return size;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001841}
Andi Kleena98f0dd2007-02-13 13:26:23 +01001842
Ingo Molnare9eee032009-04-08 12:31:17 +02001843static ssize_t
1844show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
Andi Kleena98f0dd2007-02-13 13:26:23 +01001845{
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001846 strcpy(buf, mce_helper);
Andi Kleena98f0dd2007-02-13 13:26:23 +01001847 strcat(buf, "\n");
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001848 return strlen(mce_helper) + 1;
Andi Kleena98f0dd2007-02-13 13:26:23 +01001849}
1850
Andi Kleen4a0b2b42008-07-01 18:48:41 +02001851static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
Ingo Molnare9eee032009-04-08 12:31:17 +02001852 const char *buf, size_t siz)
Andi Kleena98f0dd2007-02-13 13:26:23 +01001853{
1854 char *p;
Ingo Molnare9eee032009-04-08 12:31:17 +02001855
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001856 strncpy(mce_helper, buf, sizeof(mce_helper));
1857 mce_helper[sizeof(mce_helper)-1] = 0;
Hidetoshi Seto1020bcb2009-06-15 17:20:57 +09001858 p = strchr(mce_helper, '\n');
Ingo Molnare9eee032009-04-08 12:31:17 +02001859
Jan Beuliche9084ec2009-07-16 09:45:11 +01001860 if (p)
Ingo Molnare9eee032009-04-08 12:31:17 +02001861 *p = 0;
1862
Jan Beuliche9084ec2009-07-16 09:45:11 +01001863 return strlen(mce_helper) + !!p;
Andi Kleena98f0dd2007-02-13 13:26:23 +01001864}
1865
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001866static ssize_t set_ignore_ce(struct sys_device *s,
1867 struct sysdev_attribute *attr,
1868 const char *buf, size_t size)
1869{
1870 u64 new;
1871
1872 if (strict_strtoull(buf, 0, &new) < 0)
1873 return -EINVAL;
1874
1875 if (mce_ignore_ce ^ !!new) {
1876 if (new) {
1877 /* disable ce features */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001878 mce_timer_delete_all();
1879 on_each_cpu(mce_disable_cmci, NULL, 1);
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001880 mce_ignore_ce = 1;
1881 } else {
1882 /* enable ce features */
1883 mce_ignore_ce = 0;
1884 on_each_cpu(mce_enable_ce, (void *)1, 1);
1885 }
1886 }
1887 return size;
1888}
1889
1890static ssize_t set_cmci_disabled(struct sys_device *s,
1891 struct sysdev_attribute *attr,
1892 const char *buf, size_t size)
1893{
1894 u64 new;
1895
1896 if (strict_strtoull(buf, 0, &new) < 0)
1897 return -EINVAL;
1898
1899 if (mce_cmci_disabled ^ !!new) {
1900 if (new) {
1901 /* disable cmci */
Hidetoshi Seto9aaef962011-06-17 04:40:36 -04001902 on_each_cpu(mce_disable_cmci, NULL, 1);
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001903 mce_cmci_disabled = 1;
1904 } else {
1905 /* enable cmci */
1906 mce_cmci_disabled = 0;
1907 on_each_cpu(mce_enable_ce, NULL, 1);
1908 }
1909 }
1910 return size;
1911}
1912
Andi Kleenb56f6422009-05-27 21:56:52 +02001913static ssize_t store_int_with_restart(struct sys_device *s,
1914 struct sysdev_attribute *attr,
1915 const char *buf, size_t size)
1916{
1917 ssize_t ret = sysdev_store_int(s, attr, buf, size);
1918 mce_restart();
1919 return ret;
1920}
1921
Andi Kleena98f0dd2007-02-13 13:26:23 +01001922static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
Andi Kleend95d62c2008-07-01 18:48:43 +02001923static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
Andi Kleen3c079792009-05-27 21:56:55 +02001924static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001925static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
Ingo Molnare9eee032009-04-08 12:31:17 +02001926
Andi Kleenb56f6422009-05-27 21:56:52 +02001927static struct sysdev_ext_attribute attr_check_interval = {
1928 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
1929 store_int_with_restart),
1930 &check_interval
1931};
Ingo Molnare9eee032009-04-08 12:31:17 +02001932
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001933static struct sysdev_ext_attribute attr_ignore_ce = {
1934 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
1935 &mce_ignore_ce
1936};
1937
1938static struct sysdev_ext_attribute attr_cmci_disabled = {
Yinghai Lu74b602c2009-06-17 14:43:32 -07001939 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001940 &mce_cmci_disabled
1941};
1942
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001943static struct sysdev_attribute *mce_sysdev_attrs[] = {
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001944 &attr_tolerant.attr,
1945 &attr_check_interval.attr,
1946 &attr_trigger,
Andi Kleen3c079792009-05-27 21:56:55 +02001947 &attr_monarch_timeout.attr,
Hidetoshi Seto9af43b52009-06-15 17:21:36 +09001948 &attr_dont_log_ce.attr,
1949 &attr_ignore_ce.attr,
1950 &attr_cmci_disabled.attr,
Andi Kleena98f0dd2007-02-13 13:26:23 +01001951 NULL
1952};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001954static cpumask_var_t mce_sysdev_initialized;
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08001955
Ingo Molnare9eee032009-04-08 12:31:17 +02001956/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001957static __cpuinit int mce_sysdev_create(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958{
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001959 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 int err;
Hidetoshi Setob1f49f92009-06-18 14:53:24 +09001961 int i, j;
Mike Travis92cb7612007-10-19 20:35:04 +02001962
Andreas Herrmann90367552007-11-07 02:12:58 +01001963 if (!mce_available(&boot_cpu_data))
Andi Kleen91c6d402005-07-28 21:15:39 -07001964 return -EIO;
1965
Hidetoshi Setof6783c42011-06-08 10:59:19 +09001966 memset(&sysdev->kobj, 0, sizeof(struct kobject));
1967 sysdev->id = cpu;
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001968 sysdev->cls = &mce_sysdev_class;
Andi Kleen91c6d402005-07-28 21:15:39 -07001969
Hidetoshi Setof6783c42011-06-08 10:59:19 +09001970 err = sysdev_register(sysdev);
Akinobu Mitad435d862007-10-18 03:05:15 -07001971 if (err)
1972 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07001973
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001974 for (i = 0; mce_sysdev_attrs[i]; i++) {
1975 err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]);
Akinobu Mitad435d862007-10-18 03:05:15 -07001976 if (err)
1977 goto error;
Andi Kleen91c6d402005-07-28 21:15:39 -07001978 }
Hidetoshi Setob1f49f92009-06-18 14:53:24 +09001979 for (j = 0; j < banks; j++) {
Hidetoshi Setof6783c42011-06-08 10:59:19 +09001980 err = sysdev_create_file(sysdev, &mce_banks[j].attr);
Andi Kleen0d7482e32009-02-17 23:07:13 +01001981 if (err)
1982 goto error2;
1983 }
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001984 cpumask_set_cpu(cpu, mce_sysdev_initialized);
Akinobu Mitad435d862007-10-18 03:05:15 -07001985
1986 return 0;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001987error2:
Hidetoshi Setob1f49f92009-06-18 14:53:24 +09001988 while (--j >= 0)
Hidetoshi Setof6783c42011-06-08 10:59:19 +09001989 sysdev_remove_file(sysdev, &mce_banks[j].attr);
Akinobu Mitad435d862007-10-18 03:05:15 -07001990error:
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001991 while (--i >= 0)
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001992 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001993
Hidetoshi Setof6783c42011-06-08 10:59:19 +09001994 sysdev_unregister(sysdev);
Akinobu Mitad435d862007-10-18 03:05:15 -07001995
Andi Kleen91c6d402005-07-28 21:15:39 -07001996 return err;
1997}
1998
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09001999static __cpuinit void mce_sysdev_remove(unsigned int cpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07002000{
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002001 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
Shaohua Li73ca5352006-01-11 22:43:06 +01002002 int i;
2003
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002004 if (!cpumask_test_cpu(cpu, mce_sysdev_initialized))
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08002005 return;
2006
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002007 for (i = 0; mce_sysdev_attrs[i]; i++)
2008 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002009
Andi Kleen0d7482e32009-02-17 23:07:13 +01002010 for (i = 0; i < banks; i++)
Hidetoshi Setof6783c42011-06-08 10:59:19 +09002011 sysdev_remove_file(sysdev, &mce_banks[i].attr);
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002012
Hidetoshi Setof6783c42011-06-08 10:59:19 +09002013 sysdev_unregister(sysdev);
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002014 cpumask_clear_cpu(cpu, mce_sysdev_initialized);
Andi Kleen91c6d402005-07-28 21:15:39 -07002015}
Andi Kleen91c6d402005-07-28 21:15:39 -07002016
Andi Kleend6b75582009-02-12 13:39:31 +01002017/* Make sure there are no machine checks on offlined CPUs. */
Hidetoshi Seto767df1b2009-11-26 17:29:02 +09002018static void __cpuinit mce_disable_cpu(void *h)
Andi Kleend6b75582009-02-12 13:39:31 +01002019{
Andi Kleen88ccbed2009-02-12 13:49:36 +01002020 unsigned long action = *(unsigned long *)h;
Ingo Molnarcb491fc2009-04-08 12:31:17 +02002021 int i;
Andi Kleend6b75582009-02-12 13:39:31 +01002022
Tejun Heo7b543a52010-12-18 16:30:05 +01002023 if (!mce_available(__this_cpu_ptr(&cpu_info)))
Andi Kleend6b75582009-02-12 13:39:31 +01002024 return;
Hidetoshi Seto767df1b2009-11-26 17:29:02 +09002025
Andi Kleen88ccbed2009-02-12 13:49:36 +01002026 if (!(action & CPU_TASKS_FROZEN))
2027 cmci_clear();
Andi Kleen06b7a7a2009-04-27 18:37:43 +02002028 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02002029 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02002030
Andi Kleencebe1822009-07-09 00:31:43 +02002031 if (b->init)
Andi Kleena2d32bc2009-07-09 00:31:44 +02002032 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
Andi Kleen06b7a7a2009-04-27 18:37:43 +02002033 }
Andi Kleend6b75582009-02-12 13:39:31 +01002034}
2035
Hidetoshi Seto767df1b2009-11-26 17:29:02 +09002036static void __cpuinit mce_reenable_cpu(void *h)
Andi Kleend6b75582009-02-12 13:39:31 +01002037{
Andi Kleen88ccbed2009-02-12 13:49:36 +01002038 unsigned long action = *(unsigned long *)h;
Ingo Molnare9eee032009-04-08 12:31:17 +02002039 int i;
Andi Kleend6b75582009-02-12 13:39:31 +01002040
Tejun Heo7b543a52010-12-18 16:30:05 +01002041 if (!mce_available(__this_cpu_ptr(&cpu_info)))
Andi Kleend6b75582009-02-12 13:39:31 +01002042 return;
Ingo Molnare9eee032009-04-08 12:31:17 +02002043
Andi Kleen88ccbed2009-02-12 13:49:36 +01002044 if (!(action & CPU_TASKS_FROZEN))
2045 cmci_reenable();
Andi Kleen06b7a7a2009-04-27 18:37:43 +02002046 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02002047 struct mce_bank *b = &mce_banks[i];
Ingo Molnar11868a22009-09-23 17:49:55 +02002048
Andi Kleencebe1822009-07-09 00:31:43 +02002049 if (b->init)
Andi Kleena2d32bc2009-07-09 00:31:44 +02002050 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
Andi Kleen06b7a7a2009-04-27 18:37:43 +02002051 }
Andi Kleend6b75582009-02-12 13:39:31 +01002052}
2053
Andi Kleen91c6d402005-07-28 21:15:39 -07002054/* Get notified when a cpu comes on/off. Be hotplug friendly. */
Ingo Molnare9eee032009-04-08 12:31:17 +02002055static int __cpuinit
2056mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07002057{
2058 unsigned int cpu = (unsigned long)hcpu;
Andi Kleen52d168e2009-02-12 13:39:29 +01002059 struct timer_list *t = &per_cpu(mce_timer, cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07002060
2061 switch (action) {
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08002062 case CPU_ONLINE:
2063 case CPU_ONLINE_FROZEN:
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002064 mce_sysdev_create(cpu);
Rafael J. Wysocki87357282008-08-22 22:23:09 +02002065 if (threshold_cpu_callback)
2066 threshold_cpu_callback(action, cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07002067 break;
Andi Kleen91c6d402005-07-28 21:15:39 -07002068 case CPU_DEAD:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07002069 case CPU_DEAD_FROZEN:
Rafael J. Wysocki87357282008-08-22 22:23:09 +02002070 if (threshold_cpu_callback)
2071 threshold_cpu_callback(action, cpu);
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002072 mce_sysdev_remove(cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07002073 break;
Andi Kleen52d168e2009-02-12 13:39:29 +01002074 case CPU_DOWN_PREPARE:
2075 case CPU_DOWN_PREPARE_FROZEN:
2076 del_timer_sync(t);
Andi Kleen88ccbed2009-02-12 13:49:36 +01002077 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
Andi Kleen52d168e2009-02-12 13:39:29 +01002078 break;
2079 case CPU_DOWN_FAILED:
2080 case CPU_DOWN_FAILED_FROZEN:
Hidetoshi Setofe5ed912009-12-03 11:33:08 +09002081 if (!mce_ignore_ce && check_interval) {
2082 t->expires = round_jiffies(jiffies +
Tejun Heo245b2e72009-06-24 15:13:48 +09002083 __get_cpu_var(mce_next_interval));
Hidetoshi Setofe5ed912009-12-03 11:33:08 +09002084 add_timer_on(t, cpu);
2085 }
Andi Kleen88ccbed2009-02-12 13:49:36 +01002086 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2087 break;
2088 case CPU_POST_DEAD:
2089 /* intentionally ignoring frozen here */
2090 cmci_rediscover(cpu);
Andi Kleen52d168e2009-02-12 13:39:29 +01002091 break;
Andi Kleen91c6d402005-07-28 21:15:39 -07002092 }
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08002093 return NOTIFY_OK;
Andi Kleen91c6d402005-07-28 21:15:39 -07002094}
2095
Sam Ravnborg1e356692008-01-30 13:33:36 +01002096static struct notifier_block mce_cpu_notifier __cpuinitdata = {
Andi Kleen91c6d402005-07-28 21:15:39 -07002097 .notifier_call = mce_cpu_callback,
2098};
2099
Andi Kleencebe1822009-07-09 00:31:43 +02002100static __init void mce_init_banks(void)
Andi Kleen0d7482e32009-02-17 23:07:13 +01002101{
2102 int i;
2103
Andi Kleen0d7482e32009-02-17 23:07:13 +01002104 for (i = 0; i < banks; i++) {
Andi Kleencebe1822009-07-09 00:31:43 +02002105 struct mce_bank *b = &mce_banks[i];
2106 struct sysdev_attribute *a = &b->attr;
Ingo Molnare9eee032009-04-08 12:31:17 +02002107
Eric W. Biedermana07e4152010-02-11 15:23:05 -08002108 sysfs_attr_init(&a->attr);
Andi Kleencebe1822009-07-09 00:31:43 +02002109 a->attr.name = b->attrname;
2110 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
Ingo Molnare9eee032009-04-08 12:31:17 +02002111
2112 a->attr.mode = 0644;
2113 a->show = show_bank;
2114 a->store = set_bank;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002115 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01002116}
2117
Borislav Petkov5e099542009-10-16 12:31:32 +02002118static __init int mcheck_init_device(void)
Andi Kleen91c6d402005-07-28 21:15:39 -07002119{
2120 int err;
2121 int i = 0;
2122
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 if (!mce_available(&boot_cpu_data))
2124 return -EIO;
Andi Kleen0d7482e32009-02-17 23:07:13 +01002125
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002126 zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL);
Rusty Russell996867d2009-03-13 14:49:51 +10302127
Andi Kleencebe1822009-07-09 00:31:43 +02002128 mce_init_banks();
Andi Kleen0d7482e32009-02-17 23:07:13 +01002129
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002130 err = sysdev_class_register(&mce_sysdev_class);
Akinobu Mitad435d862007-10-18 03:05:15 -07002131 if (err)
2132 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07002133
2134 for_each_online_cpu(i) {
Hidetoshi Setoc7cece82011-06-08 11:02:03 +09002135 err = mce_sysdev_create(i);
Akinobu Mitad435d862007-10-18 03:05:15 -07002136 if (err)
2137 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07002138 }
2139
Rafael J. Wysockif3c6ea12011-03-23 22:15:54 +01002140 register_syscore_ops(&mce_syscore_ops);
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -07002141 register_hotcpu_notifier(&mce_cpu_notifier);
Hidetoshi Seto93b62c32011-06-08 11:00:45 +09002142
2143 /* register character device /dev/mcelog */
2144 misc_register(&mce_chrdev_device);
Ingo Molnare9eee032009-04-08 12:31:17 +02002145
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147}
Borislav Petkov5e099542009-10-16 12:31:32 +02002148device_initcall(mcheck_init_device);
Ingo Molnara988d332009-04-08 12:31:25 +02002149
Andi Kleend7c3c9a2009-04-28 23:07:25 +02002150/*
2151 * Old style boot options parsing. Only for compatibility.
2152 */
2153static int __init mcheck_disable(char *str)
2154{
2155 mce_disabled = 1;
2156 return 1;
2157}
2158__setup("nomce", mcheck_disable);
Huang Ying5be9ed22009-07-31 09:41:42 +08002159
2160#ifdef CONFIG_DEBUG_FS
2161struct dentry *mce_get_debugfs_dir(void)
2162{
2163 static struct dentry *dmce;
2164
2165 if (!dmce)
2166 dmce = debugfs_create_dir("mce", NULL);
2167
2168 return dmce;
2169}
Huang Yingbf783f92009-07-31 09:41:43 +08002170
2171static void mce_reset(void)
2172{
2173 cpu_missing = 0;
2174 atomic_set(&mce_fake_paniced, 0);
2175 atomic_set(&mce_executing, 0);
2176 atomic_set(&mce_callin, 0);
2177 atomic_set(&global_nwo, 0);
2178}
2179
2180static int fake_panic_get(void *data, u64 *val)
2181{
2182 *val = fake_panic;
2183 return 0;
2184}
2185
2186static int fake_panic_set(void *data, u64 val)
2187{
2188 mce_reset();
2189 fake_panic = val;
2190 return 0;
2191}
2192
2193DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2194 fake_panic_set, "%llu\n");
2195
Borislav Petkov5e099542009-10-16 12:31:32 +02002196static int __init mcheck_debugfs_init(void)
Huang Yingbf783f92009-07-31 09:41:43 +08002197{
2198 struct dentry *dmce, *ffake_panic;
2199
2200 dmce = mce_get_debugfs_dir();
2201 if (!dmce)
2202 return -ENOMEM;
2203 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2204 &fake_panic_fops);
2205 if (!ffake_panic)
2206 return -ENOMEM;
2207
2208 return 0;
2209}
Borislav Petkov5e099542009-10-16 12:31:32 +02002210late_initcall(mcheck_debugfs_init);
Huang Ying5be9ed22009-07-31 09:41:42 +08002211#endif