blob: 156cdf6d91816c1e8a6f9c2c0bb9cd8d010a9ad9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Machine check handler.
Ingo Molnare9eee032009-04-08 12:31:17 +02003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02005 * Rest from unknown author(s).
6 * 2004 Andi Kleen. Rewrote most of it.
Andi Kleenb79109c2009-02-12 13:43:23 +01007 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 */
Tim Hockine02e68d2007-07-21 17:10:36 +020010#include <linux/thread_info.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020011#include <linux/capability.h>
12#include <linux/miscdevice.h>
Andi Kleen8457c842009-02-12 13:49:33 +010013#include <linux/ratelimit.h>
Ingo Molnare9eee032009-04-08 12:31:17 +020014#include <linux/kallsyms.h>
15#include <linux/rcupdate.h>
16#include <linux/smp_lock.h>
17#include <linux/kobject.h>
18#include <linux/kdebug.h>
19#include <linux/kernel.h>
20#include <linux/percpu.h>
21#include <linux/string.h>
22#include <linux/sysdev.h>
23#include <linux/ctype.h>
24#include <linux/sched.h>
25#include <linux/sysfs.h>
26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/kmod.h>
29#include <linux/poll.h>
30#include <linux/cpu.h>
31#include <linux/fs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Ingo Molnare9eee032009-04-08 12:31:17 +020033#include <asm/processor.h>
34#include <asm/uaccess.h>
35#include <asm/idle.h>
36#include <asm/mce.h>
37#include <asm/msr.h>
38#include <asm/smp.h>
39
Ingo Molnar711c2e42009-04-08 12:31:26 +020040#include "mce.h"
41
Andi Kleen5d727922009-04-27 19:25:48 +020042/* Handle unconfigured int18 (should never happen) */
43static void unexpected_machine_check(struct pt_regs *regs, long error_code)
44{
45 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
46 smp_processor_id());
47}
48
49/* Call the installed machine check handler for this CPU setup. */
50void (*machine_check_vector)(struct pt_regs *, long error_code) =
51 unexpected_machine_check;
Andi Kleen04b2b1a2009-04-28 22:50:19 +020052
53int mce_disabled;
54
Andi Kleen4efc0672009-04-28 19:07:31 +020055#ifdef CONFIG_X86_NEW_MCE
Ingo Molnar711c2e42009-04-08 12:31:26 +020056
Ingo Molnare9eee032009-04-08 12:31:17 +020057#define MISC_MCELOG_MINOR 227
Andi Kleen0d7482e32009-02-17 23:07:13 +010058
Andi Kleen553f2652006-04-07 19:49:57 +020059atomic_t mce_entry;
60
Tim Hockinbd784322007-07-21 17:10:37 +020061/*
62 * Tolerant levels:
63 * 0: always panic on uncorrected errors, log corrected errors
64 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
65 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
66 * 3: never panic or SIGBUS, log all errors (for testing only)
67 */
Ingo Molnare9eee032009-04-08 12:31:17 +020068static int tolerant = 1;
69static int banks;
70static u64 *bank;
71static unsigned long notify_user;
72static int rip_msr;
73static int mce_bootlog = -1;
74static atomic_t mce_events;
Andi Kleena98f0dd2007-02-13 13:26:23 +010075
Ingo Molnare9eee032009-04-08 12:31:17 +020076static char trigger[128];
77static char *trigger_argv[2] = { trigger, NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Andi Kleen06b7a7a2009-04-27 18:37:43 +020079static unsigned long dont_init_banks;
80
Tim Hockine02e68d2007-07-21 17:10:36 +020081static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
82
Andi Kleenee031c32009-02-12 13:49:34 +010083/* MCA banks polled by the period polling timer for corrected events */
84DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
85 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
86};
87
Andi Kleen06b7a7a2009-04-27 18:37:43 +020088static inline int skip_bank_init(int i)
89{
90 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
91}
92
Andi Kleenb5f2fa42009-02-12 13:43:22 +010093/* Do initial initialization of a struct mce */
94void mce_setup(struct mce *m)
95{
96 memset(m, 0, sizeof(struct mce));
97 m->cpu = smp_processor_id();
98 rdtscll(m->tsc);
99}
100
Andi Kleenea149b32009-04-29 19:31:00 +0200101DEFINE_PER_CPU(struct mce, injectm);
102EXPORT_PER_CPU_SYMBOL_GPL(injectm);
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104/*
105 * Lockless MCE logging infrastructure.
106 * This avoids deadlocks on printk locks without having to break locks. Also
107 * separate MCEs from kernel messages to avoid bogus bug reports.
108 */
109
Adrian Bunk231fd902008-01-30 13:30:30 +0100110static struct mce_log mcelog = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 MCE_LOG_SIGNATURE,
112 MCE_LOG_LEN,
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200113};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115void mce_log(struct mce *mce)
116{
117 unsigned next, entry;
Ingo Molnare9eee032009-04-08 12:31:17 +0200118
Andi Kleena98f0dd2007-02-13 13:26:23 +0100119 atomic_inc(&mce_events);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 mce->finished = 0;
Mike Waychison76441432005-09-30 00:01:27 +0200121 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 for (;;) {
123 entry = rcu_dereference(mcelog.next);
Andi Kleen673242c2005-09-12 18:49:24 +0200124 for (;;) {
Ingo Molnare9eee032009-04-08 12:31:17 +0200125 /*
126 * When the buffer fills up discard new entries.
127 * Assume that the earlier errors are the more
128 * interesting ones:
129 */
Andi Kleen673242c2005-09-12 18:49:24 +0200130 if (entry >= MCE_LOG_LEN) {
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100131 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
Andi Kleen673242c2005-09-12 18:49:24 +0200132 return;
133 }
Ingo Molnare9eee032009-04-08 12:31:17 +0200134 /* Old left over entry. Skip: */
Andi Kleen673242c2005-09-12 18:49:24 +0200135 if (mcelog.entry[entry].finished) {
136 entry++;
137 continue;
138 }
Mike Waychison76441432005-09-30 00:01:27 +0200139 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 smp_rmb();
142 next = entry + 1;
143 if (cmpxchg(&mcelog.next, entry, next) == entry)
144 break;
145 }
146 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
Mike Waychison76441432005-09-30 00:01:27 +0200147 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 mcelog.entry[entry].finished = 1;
Mike Waychison76441432005-09-30 00:01:27 +0200149 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
Tim Hockine02e68d2007-07-21 17:10:36 +0200151 set_bit(0, &notify_user);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152}
153
154static void print_mce(struct mce *m)
155{
156 printk(KERN_EMERG "\n"
Andi Kleen48551702006-01-11 22:44:48 +0100157 KERN_EMERG "HARDWARE ERROR\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 KERN_EMERG
159 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
160 m->cpu, m->mcgstatus, m->bank, m->status);
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100161 if (m->ip) {
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200162 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100164 m->cs, m->ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 if (m->cs == __KERNEL_CS)
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100166 print_symbol("{%s}", m->ip);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 printk("\n");
168 }
H. Peter Anvinf6d18262009-02-19 15:44:58 -0800169 printk(KERN_EMERG "TSC %llx ", m->tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 if (m->addr)
H. Peter Anvinf6d18262009-02-19 15:44:58 -0800171 printk("ADDR %llx ", m->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 if (m->misc)
H. Peter Anvinf6d18262009-02-19 15:44:58 -0800173 printk("MISC %llx ", m->misc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 printk("\n");
Andi Kleen48551702006-01-11 22:44:48 +0100175 printk(KERN_EMERG "This is not a software problem!\n");
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200176 printk(KERN_EMERG "Run through mcelog --ascii to decode "
177 "and contact your hardware vendor\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178}
179
Andi Kleen3cde5c82009-04-27 18:01:31 +0200180static void mce_panic(char *msg, struct mce *backup, u64 start)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200181{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 int i;
Tim Hockine02e68d2007-07-21 17:10:36 +0200183
Andi Kleend896a942009-04-28 14:25:18 +0200184 bust_spinlocks(1);
185 console_verbose();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 for (i = 0; i < MCE_LOG_LEN; i++) {
Andi Kleen3cde5c82009-04-27 18:01:31 +0200187 u64 tsc = mcelog.entry[i].tsc;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200188
Andi Kleen3cde5c82009-04-27 18:01:31 +0200189 if ((s64)(tsc - start) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 continue;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200191 print_mce(&mcelog.entry[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 if (backup && mcelog.entry[i].tsc == backup->tsc)
193 backup = NULL;
194 }
195 if (backup)
196 print_mce(backup);
Tim Hockine02e68d2007-07-21 17:10:36 +0200197 panic(msg);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200198}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Andi Kleenea149b32009-04-29 19:31:00 +0200200/* Support code for software error injection */
201
202static int msr_to_offset(u32 msr)
203{
204 unsigned bank = __get_cpu_var(injectm.bank);
205 if (msr == rip_msr)
206 return offsetof(struct mce, ip);
207 if (msr == MSR_IA32_MC0_STATUS + bank*4)
208 return offsetof(struct mce, status);
209 if (msr == MSR_IA32_MC0_ADDR + bank*4)
210 return offsetof(struct mce, addr);
211 if (msr == MSR_IA32_MC0_MISC + bank*4)
212 return offsetof(struct mce, misc);
213 if (msr == MSR_IA32_MCG_STATUS)
214 return offsetof(struct mce, mcgstatus);
215 return -1;
216}
217
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200218/* MSR access wrappers used for error injection */
219static u64 mce_rdmsrl(u32 msr)
220{
221 u64 v;
Andi Kleenea149b32009-04-29 19:31:00 +0200222 if (__get_cpu_var(injectm).finished) {
223 int offset = msr_to_offset(msr);
224 if (offset < 0)
225 return 0;
226 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
227 }
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200228 rdmsrl(msr, v);
229 return v;
230}
231
232static void mce_wrmsrl(u32 msr, u64 v)
233{
Andi Kleenea149b32009-04-29 19:31:00 +0200234 if (__get_cpu_var(injectm).finished) {
235 int offset = msr_to_offset(msr);
236 if (offset >= 0)
237 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
238 return;
239 }
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200240 wrmsrl(msr, v);
241}
242
Andi Kleen88ccbed2009-02-12 13:49:36 +0100243int mce_available(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244{
Andi Kleen04b2b1a2009-04-28 22:50:19 +0200245 if (mce_disabled)
Andi Kleen5b4408f2009-02-12 13:39:30 +0100246 return 0;
Akinobu Mita3d1712c2006-03-24 03:15:11 -0800247 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248}
249
Andi Kleen94ad8472005-04-16 15:25:09 -0700250static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
251{
252 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100253 m->ip = regs->ip;
Andi Kleen94ad8472005-04-16 15:25:09 -0700254 m->cs = regs->cs;
255 } else {
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100256 m->ip = 0;
Andi Kleen94ad8472005-04-16 15:25:09 -0700257 m->cs = 0;
258 }
259 if (rip_msr) {
260 /* Assume the RIP in the MSR is exact. Is this true? */
261 m->mcgstatus |= MCG_STATUS_EIPV;
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200262 m->ip = mce_rdmsrl(rip_msr);
Andi Kleen94ad8472005-04-16 15:25:09 -0700263 m->cs = 0;
264 }
265}
266
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200267/*
Andi Kleenb79109c2009-02-12 13:43:23 +0100268 * Poll for corrected events or events that happened before reset.
269 * Those are just logged through /dev/mcelog.
270 *
271 * This is executed in standard interrupt context.
272 */
Andi Kleenee031c32009-02-12 13:49:34 +0100273void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
Andi Kleenb79109c2009-02-12 13:43:23 +0100274{
275 struct mce m;
276 int i;
277
278 mce_setup(&m);
279
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200280 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
Andi Kleenb79109c2009-02-12 13:43:23 +0100281 for (i = 0; i < banks; i++) {
Andi Kleenee031c32009-02-12 13:49:34 +0100282 if (!bank[i] || !test_bit(i, *b))
Andi Kleenb79109c2009-02-12 13:43:23 +0100283 continue;
284
285 m.misc = 0;
286 m.addr = 0;
287 m.bank = i;
288 m.tsc = 0;
289
290 barrier();
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200291 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
Andi Kleenb79109c2009-02-12 13:43:23 +0100292 if (!(m.status & MCI_STATUS_VAL))
293 continue;
294
295 /*
296 * Uncorrected events are handled by the exception handler
297 * when it is enabled. But when the exception is disabled log
298 * everything.
299 *
300 * TBD do the same check for MCI_STATUS_EN here?
301 */
302 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
303 continue;
304
305 if (m.status & MCI_STATUS_MISCV)
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200306 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
Andi Kleenb79109c2009-02-12 13:43:23 +0100307 if (m.status & MCI_STATUS_ADDRV)
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200308 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
Andi Kleenb79109c2009-02-12 13:43:23 +0100309
310 if (!(flags & MCP_TIMESTAMP))
311 m.tsc = 0;
312 /*
313 * Don't get the IP here because it's unlikely to
314 * have anything to do with the actual error location.
315 */
Andi Kleen5679af42009-04-07 17:06:55 +0200316 if (!(flags & MCP_DONTLOG)) {
317 mce_log(&m);
318 add_taint(TAINT_MACHINE_CHECK);
319 }
Andi Kleenb79109c2009-02-12 13:43:23 +0100320
321 /*
322 * Clear state for this bank.
323 */
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200324 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
Andi Kleenb79109c2009-02-12 13:43:23 +0100325 }
326
327 /*
328 * Don't clear MCG_STATUS here because it's only defined for
329 * exceptions.
330 */
331}
Andi Kleenea149b32009-04-29 19:31:00 +0200332EXPORT_SYMBOL_GPL(machine_check_poll);
Andi Kleenb79109c2009-02-12 13:43:23 +0100333
334/*
335 * The actual machine check handler. This only handles real
336 * exceptions when something got corrupted coming in through int 18.
337 *
338 * This is executed in NMI context not subject to normal locking rules. This
339 * implies that most kernel services cannot be safely used. Don't even
340 * think about putting a printk in there!
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 */
Ingo Molnare9eee032009-04-08 12:31:17 +0200342void do_machine_check(struct pt_regs *regs, long error_code)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343{
344 struct mce m, panicm;
Ingo Molnare9eee032009-04-08 12:31:17 +0200345 int panicm_found = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 u64 mcestart = 0;
347 int i;
Tim Hockinbd784322007-07-21 17:10:37 +0200348 /*
349 * If no_way_out gets set, there is no safe way to recover from this
350 * MCE. If tolerant is cranked up, we'll try anyway.
351 */
352 int no_way_out = 0;
353 /*
354 * If kill_it gets set, there might be a way to recover from this
355 * error.
356 */
357 int kill_it = 0;
Andi Kleenb79109c2009-02-12 13:43:23 +0100358 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
Andi Kleen553f2652006-04-07 19:49:57 +0200360 atomic_inc(&mce_entry);
361
Andi Kleenb79109c2009-02-12 13:43:23 +0100362 if (notify_die(DIE_NMI, "machine check", regs, error_code,
Jan Beulich22f59912008-01-30 13:31:23 +0100363 18, SIGKILL) == NOTIFY_STOP)
Andi Kleenb79109c2009-02-12 13:43:23 +0100364 goto out2;
365 if (!banks)
Andi Kleen553f2652006-04-07 19:49:57 +0200366 goto out2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100368 mce_setup(&m);
369
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200370 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
Ingo Molnare9eee032009-04-08 12:31:17 +0200371
Tim Hockinbd784322007-07-21 17:10:37 +0200372 /* if the restart IP is not valid, we're done for */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 if (!(m.mcgstatus & MCG_STATUS_RIPV))
Tim Hockinbd784322007-07-21 17:10:37 +0200374 no_way_out = 1;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200375
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 rdtscll(mcestart);
377 barrier();
378
379 for (i = 0; i < banks; i++) {
Andi Kleenb79109c2009-02-12 13:43:23 +0100380 __clear_bit(i, toclear);
Andi Kleen0d7482e32009-02-17 23:07:13 +0100381 if (!bank[i])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 continue;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200383
384 m.misc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 m.addr = 0;
386 m.bank = i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200388 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 if ((m.status & MCI_STATUS_VAL) == 0)
390 continue;
391
Andi Kleenb79109c2009-02-12 13:43:23 +0100392 /*
393 * Non uncorrected errors are handled by machine_check_poll
394 * Leave them alone.
395 */
396 if ((m.status & MCI_STATUS_UC) == 0)
397 continue;
398
399 /*
400 * Set taint even when machine check was not enabled.
401 */
402 add_taint(TAINT_MACHINE_CHECK);
403
404 __set_bit(i, toclear);
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 if (m.status & MCI_STATUS_EN) {
Tim Hockinbd784322007-07-21 17:10:37 +0200407 /* if PCC was set, there's no way out */
408 no_way_out |= !!(m.status & MCI_STATUS_PCC);
409 /*
410 * If this error was uncorrectable and there was
411 * an overflow, we're in trouble. If no overflow,
412 * we might get away with just killing a task.
413 */
414 if (m.status & MCI_STATUS_UC) {
415 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
416 no_way_out = 1;
417 kill_it = 1;
418 }
Andi Kleenb79109c2009-02-12 13:43:23 +0100419 } else {
420 /*
421 * Machine check event was not enabled. Clear, but
422 * ignore.
423 */
424 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 }
426
427 if (m.status & MCI_STATUS_MISCV)
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200428 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 if (m.status & MCI_STATUS_ADDRV)
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200430 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
Andi Kleen94ad8472005-04-16 15:25:09 -0700432 mce_get_rip(&m, regs);
Andi Kleenb79109c2009-02-12 13:43:23 +0100433 mce_log(&m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Ingo Molnare9eee032009-04-08 12:31:17 +0200435 /*
436 * Did this bank cause the exception?
437 *
438 * Assume that the bank with uncorrectable errors did it,
439 * and that there is only a single one:
440 */
441 if ((m.status & MCI_STATUS_UC) &&
442 (m.status & MCI_STATUS_EN)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 panicm = m;
444 panicm_found = 1;
445 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 }
447
Ingo Molnare9eee032009-04-08 12:31:17 +0200448 /*
449 * If we didn't find an uncorrectable error, pick
450 * the last one (shouldn't happen, just being safe).
451 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 if (!panicm_found)
453 panicm = m;
Tim Hockinbd784322007-07-21 17:10:37 +0200454
455 /*
456 * If we have decided that we just CAN'T continue, and the user
Ingo Molnare9eee032009-04-08 12:31:17 +0200457 * has not set tolerant to an insane level, give up and die.
Tim Hockinbd784322007-07-21 17:10:37 +0200458 */
459 if (no_way_out && tolerant < 3)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 mce_panic("Machine check", &panicm, mcestart);
Tim Hockinbd784322007-07-21 17:10:37 +0200461
462 /*
463 * If the error seems to be unrecoverable, something should be
464 * done. Try to kill as little as possible. If we can kill just
465 * one task, do that. If the user has set the tolerance very
466 * high, don't try to do anything at all.
467 */
468 if (kill_it && tolerant < 3) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 int user_space = 0;
470
Tim Hockinbd784322007-07-21 17:10:37 +0200471 /*
472 * If the EIPV bit is set, it means the saved IP is the
473 * instruction which caused the MCE.
474 */
475 if (m.mcgstatus & MCG_STATUS_EIPV)
H. Peter Anvin65ea5b02008-01-30 13:30:56 +0100476 user_space = panicm.ip && (panicm.cs & 3);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
Tim Hockinbd784322007-07-21 17:10:37 +0200478 /*
479 * If we know that the error was in user space, send a
480 * SIGBUS. Otherwise, panic if tolerance is low.
481 *
Andi Kleen380851b2009-02-12 13:39:33 +0100482 * force_sig() takes an awful lot of locks and has a slight
Tim Hockinbd784322007-07-21 17:10:37 +0200483 * risk of deadlocking.
484 */
485 if (user_space) {
Andi Kleen380851b2009-02-12 13:39:33 +0100486 force_sig(SIGBUS, current);
Tim Hockinbd784322007-07-21 17:10:37 +0200487 } else if (panic_on_oops || tolerant < 2) {
488 mce_panic("Uncorrected machine check",
489 &panicm, mcestart);
490 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 }
492
Tim Hockine02e68d2007-07-21 17:10:36 +0200493 /* notify userspace ASAP */
494 set_thread_flag(TIF_MCE_NOTIFY);
495
Tim Hockinbd784322007-07-21 17:10:37 +0200496 /* the last thing we do is clear state */
Andi Kleenb79109c2009-02-12 13:43:23 +0100497 for (i = 0; i < banks; i++) {
498 if (test_bit(i, toclear))
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200499 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
Andi Kleenb79109c2009-02-12 13:43:23 +0100500 }
Andi Kleen5f8c1a52009-04-29 19:29:12 +0200501 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
Andi Kleen553f2652006-04-07 19:49:57 +0200502 out2:
503 atomic_dec(&mce_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504}
Andi Kleenea149b32009-04-29 19:31:00 +0200505EXPORT_SYMBOL_GPL(do_machine_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200507#ifdef CONFIG_X86_MCE_INTEL
508/***
509 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
Simon Arlott676b1852007-10-20 01:25:36 +0200510 * @cpu: The CPU on which the event occurred.
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200511 * @status: Event status information
512 *
513 * This function should be called by the thermal interrupt after the
514 * event has been processed and the decision was made to log the event
515 * further.
516 *
517 * The status parameter will be saved to the 'status' field of 'struct mce'
518 * and historically has been the register value of the
519 * MSR_IA32_THERMAL_STATUS (Intel) msr.
520 */
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100521void mce_log_therm_throt_event(__u64 status)
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200522{
523 struct mce m;
524
Andi Kleenb5f2fa42009-02-12 13:43:22 +0100525 mce_setup(&m);
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200526 m.bank = MCE_THERMAL_BANK;
527 m.status = status;
Dmitriy Zavin15d5f832006-09-26 10:52:42 +0200528 mce_log(&m);
529}
530#endif /* CONFIG_X86_MCE_INTEL */
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532/*
Tim Hockin8a336b02007-05-02 19:27:19 +0200533 * Periodic polling timer for "silent" machine check errors. If the
534 * poller finds an MCE, poll 2x faster. When the poller finds no more
535 * errors, poll 2x slower (up to check_interval seconds).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537static int check_interval = 5 * 60; /* 5 minutes */
Ingo Molnare9eee032009-04-08 12:31:17 +0200538
Andi Kleen6298c512009-04-09 12:28:22 +0200539static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
Andi Kleen52d168e2009-02-12 13:39:29 +0100540static DEFINE_PER_CPU(struct timer_list, mce_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
Andi Kleen52d168e2009-02-12 13:39:29 +0100542static void mcheck_timer(unsigned long data)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543{
Andi Kleen52d168e2009-02-12 13:39:29 +0100544 struct timer_list *t = &per_cpu(mce_timer, data);
Andi Kleen6298c512009-04-09 12:28:22 +0200545 int *n;
Andi Kleen52d168e2009-02-12 13:39:29 +0100546
547 WARN_ON(smp_processor_id() != data);
548
Ingo Molnare9eee032009-04-08 12:31:17 +0200549 if (mce_available(&current_cpu_data)) {
Andi Kleenee031c32009-02-12 13:49:34 +0100550 machine_check_poll(MCP_TIMESTAMP,
551 &__get_cpu_var(mce_poll_banks));
Ingo Molnare9eee032009-04-08 12:31:17 +0200552 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
554 /*
Tim Hockine02e68d2007-07-21 17:10:36 +0200555 * Alert userspace if needed. If we logged an MCE, reduce the
556 * polling interval, otherwise increase the polling interval.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 */
Andi Kleen6298c512009-04-09 12:28:22 +0200558 n = &__get_cpu_var(next_interval);
Tim Hockine02e68d2007-07-21 17:10:36 +0200559 if (mce_notify_user()) {
Andi Kleen6298c512009-04-09 12:28:22 +0200560 *n = max(*n/2, HZ/100);
Tim Hockin8a336b02007-05-02 19:27:19 +0200561 } else {
Andi Kleen6298c512009-04-09 12:28:22 +0200562 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 }
Tim Hockin8a336b02007-05-02 19:27:19 +0200564
Andi Kleen6298c512009-04-09 12:28:22 +0200565 t->expires = jiffies + *n;
Andi Kleen52d168e2009-02-12 13:39:29 +0100566 add_timer(t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567}
568
Andi Kleen9bd98402009-02-12 13:39:28 +0100569static void mce_do_trigger(struct work_struct *work)
570{
571 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
572}
573
574static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
575
Tim Hockine02e68d2007-07-21 17:10:36 +0200576/*
Andi Kleen9bd98402009-02-12 13:39:28 +0100577 * Notify the user(s) about new machine check events.
578 * Can be called from interrupt context, but not from machine check/NMI
579 * context.
Tim Hockine02e68d2007-07-21 17:10:36 +0200580 */
581int mce_notify_user(void)
582{
Andi Kleen8457c842009-02-12 13:49:33 +0100583 /* Not more than two messages every minute */
584 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
585
Tim Hockine02e68d2007-07-21 17:10:36 +0200586 clear_thread_flag(TIF_MCE_NOTIFY);
Ingo Molnare9eee032009-04-08 12:31:17 +0200587
Tim Hockine02e68d2007-07-21 17:10:36 +0200588 if (test_and_clear_bit(0, &notify_user)) {
Tim Hockine02e68d2007-07-21 17:10:36 +0200589 wake_up_interruptible(&mce_wait);
Andi Kleen9bd98402009-02-12 13:39:28 +0100590
591 /*
592 * There is no risk of missing notifications because
593 * work_pending is always cleared before the function is
594 * executed.
595 */
596 if (trigger[0] && !work_pending(&mce_trigger_work))
597 schedule_work(&mce_trigger_work);
Tim Hockine02e68d2007-07-21 17:10:36 +0200598
Andi Kleen8457c842009-02-12 13:49:33 +0100599 if (__ratelimit(&ratelimit))
Tim Hockine02e68d2007-07-21 17:10:36 +0200600 printk(KERN_INFO "Machine check events logged\n");
Tim Hockine02e68d2007-07-21 17:10:36 +0200601
602 return 1;
603 }
604 return 0;
605}
Andi Kleenea149b32009-04-29 19:31:00 +0200606EXPORT_SYMBOL_GPL(mce_notify_user);
Tim Hockine02e68d2007-07-21 17:10:36 +0200607
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200608/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 * Initialize Machine Checks for a CPU.
610 */
Andi Kleen0d7482e32009-02-17 23:07:13 +0100611static int mce_cap_init(void)
612{
Andi Kleen0d7482e32009-02-17 23:07:13 +0100613 unsigned b;
Ingo Molnare9eee032009-04-08 12:31:17 +0200614 u64 cap;
Andi Kleen0d7482e32009-02-17 23:07:13 +0100615
616 rdmsrl(MSR_IA32_MCG_CAP, cap);
Thomas Gleixner01c66802009-04-08 12:31:24 +0200617
618 b = cap & MCG_BANKCNT_MASK;
Ingo Molnarb6592942009-04-08 12:31:27 +0200619 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
620
Andi Kleen0d7482e32009-02-17 23:07:13 +0100621 if (b > MAX_NR_BANKS) {
622 printk(KERN_WARNING
623 "MCE: Using only %u machine check banks out of %u\n",
624 MAX_NR_BANKS, b);
625 b = MAX_NR_BANKS;
626 }
627
628 /* Don't support asymmetric configurations today */
629 WARN_ON(banks != 0 && b != banks);
630 banks = b;
631 if (!bank) {
632 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
633 if (!bank)
634 return -ENOMEM;
635 memset(bank, 0xff, banks * sizeof(u64));
636 }
637
638 /* Use accurate RIP reporting if available. */
Thomas Gleixner01c66802009-04-08 12:31:24 +0200639 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
Andi Kleen0d7482e32009-02-17 23:07:13 +0100640 rip_msr = MSR_IA32_MCG_EIP;
641
642 return 0;
643}
644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645static void mce_init(void *dummy)
646{
Ingo Molnare9eee032009-04-08 12:31:17 +0200647 mce_banks_t all_banks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 u64 cap;
649 int i;
650
Andi Kleenb79109c2009-02-12 13:43:23 +0100651 /*
652 * Log the machine checks left over from the previous reset.
653 */
Andi Kleenee031c32009-02-12 13:49:34 +0100654 bitmap_fill(all_banks, MAX_NR_BANKS);
Andi Kleen5679af42009-04-07 17:06:55 +0200655 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656
657 set_in_cr4(X86_CR4_MCE);
658
Andi Kleen0d7482e32009-02-17 23:07:13 +0100659 rdmsrl(MSR_IA32_MCG_CAP, cap);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 if (cap & MCG_CTL_P)
661 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
662
663 for (i = 0; i < banks; i++) {
Andi Kleen06b7a7a2009-04-27 18:37:43 +0200664 if (skip_bank_init(i))
665 continue;
Andi Kleen0d7482e32009-02-17 23:07:13 +0100666 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200668 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669}
670
671/* Add per CPU specific workarounds here */
H. Peter Anvinec5b3d32009-02-23 14:01:04 -0800672static void mce_cpu_quirks(struct cpuinfo_x86 *c)
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200673{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 /* This should be disabled by the BIOS, but isn't always */
Jan Beulich911f6a72008-04-22 16:22:21 +0100675 if (c->x86_vendor == X86_VENDOR_AMD) {
Ingo Molnare9eee032009-04-08 12:31:17 +0200676 if (c->x86 == 15 && banks > 4) {
677 /*
678 * disable GART TBL walk error reporting, which
679 * trips off incorrectly with the IOMMU & 3ware
680 * & Cerberus:
681 */
Andi Kleen0d7482e32009-02-17 23:07:13 +0100682 clear_bit(10, (unsigned long *)&bank[4]);
Ingo Molnare9eee032009-04-08 12:31:17 +0200683 }
684 if (c->x86 <= 17 && mce_bootlog < 0) {
685 /*
686 * Lots of broken BIOS around that don't clear them
687 * by default and leave crap in there. Don't log:
688 */
Jan Beulich911f6a72008-04-22 16:22:21 +0100689 mce_bootlog = 0;
Ingo Molnare9eee032009-04-08 12:31:17 +0200690 }
Andi Kleen2e6f6942009-04-27 18:42:48 +0200691 /*
692 * Various K7s with broken bank 0 around. Always disable
693 * by default.
694 */
695 if (c->x86 == 6)
696 bank[0] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 }
Andi Kleene5835382005-11-05 17:25:54 +0100698
Andi Kleen06b7a7a2009-04-27 18:37:43 +0200699 if (c->x86_vendor == X86_VENDOR_INTEL) {
700 /*
701 * SDM documents that on family 6 bank 0 should not be written
702 * because it aliases to another special BIOS controlled
703 * register.
704 * But it's not aliased anymore on model 0x1a+
705 * Don't ignore bank 0 completely because there could be a
706 * valid event later, merely don't write CTL0.
707 */
708
709 if (c->x86 == 6 && c->x86_model < 0x1A)
710 __set_bit(0, &dont_init_banks);
711 }
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200712}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713
Andi Kleen4efc0672009-04-28 19:07:31 +0200714static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
715{
716 if (c->x86 != 5)
717 return;
718 switch (c->x86_vendor) {
719 case X86_VENDOR_INTEL:
720 if (mce_p5_enabled())
721 intel_p5_mcheck_init(c);
722 break;
723 case X86_VENDOR_CENTAUR:
724 winchip_mcheck_init(c);
725 break;
726 }
727}
728
H. Peter Anvincc3ca222009-02-20 23:35:51 -0800729static void mce_cpu_features(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730{
731 switch (c->x86_vendor) {
732 case X86_VENDOR_INTEL:
733 mce_intel_feature_init(c);
734 break;
Jacob Shin89b831e2005-11-05 17:25:53 +0100735 case X86_VENDOR_AMD:
736 mce_amd_feature_init(c);
737 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 default:
739 break;
740 }
741}
742
Andi Kleen52d168e2009-02-12 13:39:29 +0100743static void mce_init_timer(void)
744{
745 struct timer_list *t = &__get_cpu_var(mce_timer);
Andi Kleen6298c512009-04-09 12:28:22 +0200746 int *n = &__get_cpu_var(next_interval);
Andi Kleen52d168e2009-02-12 13:39:29 +0100747
Andi Kleen6298c512009-04-09 12:28:22 +0200748 *n = check_interval * HZ;
749 if (!*n)
Andi Kleen52d168e2009-02-12 13:39:29 +0100750 return;
751 setup_timer(t, mcheck_timer, smp_processor_id());
Andi Kleen6298c512009-04-09 12:28:22 +0200752 t->expires = round_jiffies(jiffies + *n);
Andi Kleen52d168e2009-02-12 13:39:29 +0100753 add_timer(t);
754}
755
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200756/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 * Called for each booted CPU to set up machine checks.
Ingo Molnare9eee032009-04-08 12:31:17 +0200758 * Must be called with preempt off:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 */
Ashok Raje6982c62005-06-25 14:54:58 -0700760void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761{
Andi Kleen4efc0672009-04-28 19:07:31 +0200762 if (mce_disabled)
763 return;
764
765 mce_ancient_init(c);
766
Andi Kleen5b4408f2009-02-12 13:39:30 +0100767 if (!mce_available(c))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 return;
769
Andi Kleen0d7482e32009-02-17 23:07:13 +0100770 if (mce_cap_init() < 0) {
Andi Kleen04b2b1a2009-04-28 22:50:19 +0200771 mce_disabled = 1;
Andi Kleen0d7482e32009-02-17 23:07:13 +0100772 return;
773 }
774 mce_cpu_quirks(c);
775
Andi Kleen5d727922009-04-27 19:25:48 +0200776 machine_check_vector = do_machine_check;
777
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 mce_init(NULL);
779 mce_cpu_features(c);
Andi Kleen52d168e2009-02-12 13:39:29 +0100780 mce_init_timer();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781}
782
783/*
784 * Character device to read and clear the MCE log.
785 */
786
Tim Hockinf528e7b2007-07-21 17:10:35 +0200787static DEFINE_SPINLOCK(mce_state_lock);
Ingo Molnare9eee032009-04-08 12:31:17 +0200788static int open_count; /* #times opened */
789static int open_exclu; /* already open exclusive? */
Tim Hockinf528e7b2007-07-21 17:10:35 +0200790
791static int mce_open(struct inode *inode, struct file *file)
792{
Arnd Bergmann38c4c972008-05-20 19:17:02 +0200793 lock_kernel();
Tim Hockinf528e7b2007-07-21 17:10:35 +0200794 spin_lock(&mce_state_lock);
795
796 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
797 spin_unlock(&mce_state_lock);
Arnd Bergmann38c4c972008-05-20 19:17:02 +0200798 unlock_kernel();
Ingo Molnare9eee032009-04-08 12:31:17 +0200799
Tim Hockinf528e7b2007-07-21 17:10:35 +0200800 return -EBUSY;
801 }
802
803 if (file->f_flags & O_EXCL)
804 open_exclu = 1;
805 open_count++;
806
807 spin_unlock(&mce_state_lock);
Arnd Bergmann38c4c972008-05-20 19:17:02 +0200808 unlock_kernel();
Tim Hockinf528e7b2007-07-21 17:10:35 +0200809
Tim Hockinbd784322007-07-21 17:10:37 +0200810 return nonseekable_open(inode, file);
Tim Hockinf528e7b2007-07-21 17:10:35 +0200811}
812
813static int mce_release(struct inode *inode, struct file *file)
814{
815 spin_lock(&mce_state_lock);
816
817 open_count--;
818 open_exclu = 0;
819
820 spin_unlock(&mce_state_lock);
821
822 return 0;
823}
824
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200825static void collect_tscs(void *data)
826{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 unsigned long *cpu_tsc = (unsigned long *)data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200829 rdtscll(cpu_tsc[smp_processor_id()]);
830}
831
Ingo Molnare9eee032009-04-08 12:31:17 +0200832static DEFINE_MUTEX(mce_read_mutex);
833
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200834static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
835 loff_t *off)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 char __user *buf = ubuf;
Ingo Molnare9eee032009-04-08 12:31:17 +0200838 unsigned long *cpu_tsc;
839 unsigned prev, next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 int i, err;
841
Mike Travis6bca67f2008-07-18 18:11:27 -0700842 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700843 if (!cpu_tsc)
844 return -ENOMEM;
845
Daniel Walker8c8b8852008-01-30 13:31:17 +0100846 mutex_lock(&mce_read_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 next = rcu_dereference(mcelog.next);
848
849 /* Only supports full reads right now */
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200850 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
Daniel Walker8c8b8852008-01-30 13:31:17 +0100851 mutex_unlock(&mce_read_mutex);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700852 kfree(cpu_tsc);
Ingo Molnare9eee032009-04-08 12:31:17 +0200853
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 return -EINVAL;
855 }
856
857 err = 0;
Huang Yingef41df4342009-02-12 13:39:34 +0100858 prev = 0;
859 do {
860 for (i = prev; i < next; i++) {
861 unsigned long start = jiffies;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200862
Huang Yingef41df4342009-02-12 13:39:34 +0100863 while (!mcelog.entry[i].finished) {
864 if (time_after_eq(jiffies, start + 2)) {
865 memset(mcelog.entry + i, 0,
866 sizeof(struct mce));
867 goto timeout;
868 }
869 cpu_relax();
Andi Kleen673242c2005-09-12 18:49:24 +0200870 }
Huang Yingef41df4342009-02-12 13:39:34 +0100871 smp_rmb();
872 err |= copy_to_user(buf, mcelog.entry + i,
873 sizeof(struct mce));
874 buf += sizeof(struct mce);
875timeout:
876 ;
Andi Kleen673242c2005-09-12 18:49:24 +0200877 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
Huang Yingef41df4342009-02-12 13:39:34 +0100879 memset(mcelog.entry + prev, 0,
880 (next - prev) * sizeof(struct mce));
881 prev = next;
882 next = cmpxchg(&mcelog.next, prev, 0);
883 } while (next != prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
Paul E. McKenneyb2b18662005-06-25 14:55:38 -0700885 synchronize_sched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200887 /*
888 * Collect entries that were still getting written before the
889 * synchronize.
890 */
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200891 on_each_cpu(collect_tscs, cpu_tsc, 1);
Ingo Molnare9eee032009-04-08 12:31:17 +0200892
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200893 for (i = next; i < MCE_LOG_LEN; i++) {
894 if (mcelog.entry[i].finished &&
895 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
896 err |= copy_to_user(buf, mcelog.entry+i,
897 sizeof(struct mce));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 smp_rmb();
899 buf += sizeof(struct mce);
900 memset(&mcelog.entry[i], 0, sizeof(struct mce));
901 }
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200902 }
Daniel Walker8c8b8852008-01-30 13:31:17 +0100903 mutex_unlock(&mce_read_mutex);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700904 kfree(cpu_tsc);
Ingo Molnare9eee032009-04-08 12:31:17 +0200905
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200906 return err ? -EFAULT : buf - ubuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907}
908
Tim Hockine02e68d2007-07-21 17:10:36 +0200909static unsigned int mce_poll(struct file *file, poll_table *wait)
910{
911 poll_wait(file, &mce_wait, wait);
912 if (rcu_dereference(mcelog.next))
913 return POLLIN | POLLRDNORM;
914 return 0;
915}
916
Nikanth Karthikesanc68461b2008-01-30 13:32:59 +0100917static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918{
919 int __user *p = (int __user *)arg;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 if (!capable(CAP_SYS_ADMIN))
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200922 return -EPERM;
Ingo Molnare9eee032009-04-08 12:31:17 +0200923
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 switch (cmd) {
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200925 case MCE_GET_RECORD_LEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 return put_user(sizeof(struct mce), p);
927 case MCE_GET_LOG_LEN:
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200928 return put_user(MCE_LOG_LEN, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 case MCE_GETCLEAR_FLAGS: {
930 unsigned flags;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200931
932 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 flags = mcelog.flags;
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200934 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
Ingo Molnare9eee032009-04-08 12:31:17 +0200935
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200936 return put_user(flags, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 }
938 default:
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200939 return -ENOTTY;
940 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941}
942
H. Peter Anvina1ff41b2009-05-25 22:16:14 -0700943/* Modified in mce-inject.c, so not static or const */
Andi Kleenea149b32009-04-29 19:31:00 +0200944struct file_operations mce_chrdev_ops = {
Ingo Molnare9eee032009-04-08 12:31:17 +0200945 .open = mce_open,
946 .release = mce_release,
947 .read = mce_read,
948 .poll = mce_poll,
949 .unlocked_ioctl = mce_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950};
Andi Kleenea149b32009-04-29 19:31:00 +0200951EXPORT_SYMBOL_GPL(mce_chrdev_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953static struct miscdevice mce_log_device = {
954 MISC_MCELOG_MINOR,
955 "mcelog",
956 &mce_chrdev_ops,
957};
958
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200959/*
Hidetoshi Seto13503fa2009-03-26 17:39:20 +0900960 * mce=off disables machine check
961 * mce=TOLERANCELEVEL (number, see above)
962 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
963 * mce=nobootlog Don't log MCEs from before booting.
964 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965static int __init mcheck_enable(char *str)
966{
Andi Kleen4efc0672009-04-28 19:07:31 +0200967 if (*str == 0)
968 enable_p5_mce();
969 if (*str == '=')
970 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (!strcmp(str, "off"))
Andi Kleen04b2b1a2009-04-28 22:50:19 +0200972 mce_disabled = 1;
Hidetoshi Seto13503fa2009-03-26 17:39:20 +0900973 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
974 mce_bootlog = (str[0] == 'b');
Andi Kleen8c566ef2005-09-12 18:49:24 +0200975 else if (isdigit(str[0]))
976 get_option(&str, &tolerant);
Hidetoshi Seto13503fa2009-03-26 17:39:20 +0900977 else {
Andi Kleen4efc0672009-04-28 19:07:31 +0200978 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
Hidetoshi Seto13503fa2009-03-26 17:39:20 +0900979 str);
980 return 0;
981 }
OGAWA Hirofumi9b410462006-03-31 02:30:33 -0800982 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983}
Andi Kleen4efc0672009-04-28 19:07:31 +0200984__setup("mce", mcheck_enable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200986/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 * Sysfs support
Thomas Gleixnerd88203d2007-10-23 22:37:23 +0200988 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
Andi Kleen973a2dd2009-02-12 13:39:32 +0100990/*
991 * Disable machine checks on suspend and shutdown. We can't really handle
992 * them later.
993 */
994static int mce_disable(void)
995{
996 int i;
997
Andi Kleen06b7a7a2009-04-27 18:37:43 +0200998 for (i = 0; i < banks; i++) {
999 if (!skip_bank_init(i))
1000 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1001 }
Andi Kleen973a2dd2009-02-12 13:39:32 +01001002 return 0;
1003}
1004
1005static int mce_suspend(struct sys_device *dev, pm_message_t state)
1006{
1007 return mce_disable();
1008}
1009
1010static int mce_shutdown(struct sys_device *dev)
1011{
1012 return mce_disable();
1013}
1014
Ingo Molnare9eee032009-04-08 12:31:17 +02001015/*
1016 * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
1017 * Only one CPU is active at this time, the others get re-added later using
1018 * CPU hotplug:
1019 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020static int mce_resume(struct sys_device *dev)
1021{
Andi Kleen413588c2005-09-12 18:49:24 +02001022 mce_init(NULL);
Andi Kleen6ec68bf2009-02-12 13:39:26 +01001023 mce_cpu_features(&current_cpu_data);
Ingo Molnare9eee032009-04-08 12:31:17 +02001024
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 return 0;
1026}
1027
Andi Kleen52d168e2009-02-12 13:39:29 +01001028static void mce_cpu_restart(void *data)
1029{
1030 del_timer_sync(&__get_cpu_var(mce_timer));
1031 if (mce_available(&current_cpu_data))
1032 mce_init(NULL);
1033 mce_init_timer();
1034}
1035
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036/* Reinit MCEs after user configuration changes */
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001037static void mce_restart(void)
1038{
Andi Kleen52d168e2009-02-12 13:39:29 +01001039 on_each_cpu(mce_cpu_restart, NULL, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040}
1041
1042static struct sysdev_class mce_sysclass = {
Ingo Molnare9eee032009-04-08 12:31:17 +02001043 .suspend = mce_suspend,
1044 .shutdown = mce_shutdown,
1045 .resume = mce_resume,
1046 .name = "machinecheck",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047};
1048
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001049DEFINE_PER_CPU(struct sys_device, mce_dev);
Ingo Molnare9eee032009-04-08 12:31:17 +02001050
1051__cpuinitdata
1052void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053
1054/* Why are there no generic functions for this? */
1055#define ACCESSOR(name, var, start) \
Andi Kleen4a0b2b42008-07-01 18:48:41 +02001056 static ssize_t show_ ## name(struct sys_device *s, \
1057 struct sysdev_attribute *attr, \
1058 char *buf) { \
Andi Kleen3cde5c82009-04-27 18:01:31 +02001059 return sprintf(buf, "%Lx\n", (u64)var); \
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001060 } \
Andi Kleen4a0b2b42008-07-01 18:48:41 +02001061 static ssize_t set_ ## name(struct sys_device *s, \
1062 struct sysdev_attribute *attr, \
1063 const char *buf, size_t siz) { \
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001064 char *end; \
Andi Kleen3cde5c82009-04-27 18:01:31 +02001065 u64 new = simple_strtoull(buf, &end, 0); \
Ingo Molnare9eee032009-04-08 12:31:17 +02001066 \
1067 if (end == buf) \
1068 return -EINVAL; \
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001069 var = new; \
1070 start; \
Ingo Molnare9eee032009-04-08 12:31:17 +02001071 \
Thomas Gleixnerd88203d2007-10-23 22:37:23 +02001072 return end-buf; \
1073 } \
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
1075
Andi Kleen0d7482e32009-02-17 23:07:13 +01001076static struct sysdev_attribute *bank_attrs;
1077
1078static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1079 char *buf)
1080{
1081 u64 b = bank[attr - bank_attrs];
Ingo Molnare9eee032009-04-08 12:31:17 +02001082
H. Peter Anvinf6d18262009-02-19 15:44:58 -08001083 return sprintf(buf, "%llx\n", b);
Andi Kleen0d7482e32009-02-17 23:07:13 +01001084}
1085
1086static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1087 const char *buf, size_t siz)
1088{
1089 char *end;
1090 u64 new = simple_strtoull(buf, &end, 0);
Ingo Molnare9eee032009-04-08 12:31:17 +02001091
Andi Kleen0d7482e32009-02-17 23:07:13 +01001092 if (end == buf)
1093 return -EINVAL;
Ingo Molnare9eee032009-04-08 12:31:17 +02001094
Andi Kleen0d7482e32009-02-17 23:07:13 +01001095 bank[attr - bank_attrs] = new;
1096 mce_restart();
Ingo Molnare9eee032009-04-08 12:31:17 +02001097
Andi Kleen0d7482e32009-02-17 23:07:13 +01001098 return end-buf;
1099}
Andi Kleena98f0dd2007-02-13 13:26:23 +01001100
Ingo Molnare9eee032009-04-08 12:31:17 +02001101static ssize_t
1102show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
Andi Kleena98f0dd2007-02-13 13:26:23 +01001103{
1104 strcpy(buf, trigger);
1105 strcat(buf, "\n");
1106 return strlen(trigger) + 1;
1107}
1108
Andi Kleen4a0b2b42008-07-01 18:48:41 +02001109static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
Ingo Molnare9eee032009-04-08 12:31:17 +02001110 const char *buf, size_t siz)
Andi Kleena98f0dd2007-02-13 13:26:23 +01001111{
1112 char *p;
1113 int len;
Ingo Molnare9eee032009-04-08 12:31:17 +02001114
Andi Kleena98f0dd2007-02-13 13:26:23 +01001115 strncpy(trigger, buf, sizeof(trigger));
1116 trigger[sizeof(trigger)-1] = 0;
1117 len = strlen(trigger);
1118 p = strchr(trigger, '\n');
Ingo Molnare9eee032009-04-08 12:31:17 +02001119
1120 if (*p)
1121 *p = 0;
1122
Andi Kleena98f0dd2007-02-13 13:26:23 +01001123 return len;
1124}
1125
1126static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
Andi Kleend95d62c2008-07-01 18:48:43 +02001127static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
Ingo Molnare9eee032009-04-08 12:31:17 +02001128
1129ACCESSOR(check_interval, check_interval, mce_restart())
1130
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001131static struct sysdev_attribute *mce_attrs[] = {
Andi Kleend95d62c2008-07-01 18:48:43 +02001132 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
Andi Kleena98f0dd2007-02-13 13:26:23 +01001133 NULL
1134};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001136static cpumask_var_t mce_dev_initialized;
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08001137
Ingo Molnare9eee032009-04-08 12:31:17 +02001138/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
Andi Kleen91c6d402005-07-28 21:15:39 -07001139static __cpuinit int mce_create_device(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140{
1141 int err;
Shaohua Li73ca5352006-01-11 22:43:06 +01001142 int i;
Mike Travis92cb7612007-10-19 20:35:04 +02001143
Andreas Herrmann90367552007-11-07 02:12:58 +01001144 if (!mce_available(&boot_cpu_data))
Andi Kleen91c6d402005-07-28 21:15:39 -07001145 return -EIO;
1146
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001147 memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
1148 per_cpu(mce_dev, cpu).id = cpu;
1149 per_cpu(mce_dev, cpu).cls = &mce_sysclass;
Andi Kleen91c6d402005-07-28 21:15:39 -07001150
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001151 err = sysdev_register(&per_cpu(mce_dev, cpu));
Akinobu Mitad435d862007-10-18 03:05:15 -07001152 if (err)
1153 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07001154
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001155 for (i = 0; mce_attrs[i]; i++) {
1156 err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
Akinobu Mitad435d862007-10-18 03:05:15 -07001157 if (err)
1158 goto error;
Andi Kleen91c6d402005-07-28 21:15:39 -07001159 }
Andi Kleen0d7482e32009-02-17 23:07:13 +01001160 for (i = 0; i < banks; i++) {
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001161 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
Andi Kleen0d7482e32009-02-17 23:07:13 +01001162 &bank_attrs[i]);
1163 if (err)
1164 goto error2;
1165 }
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001166 cpumask_set_cpu(cpu, mce_dev_initialized);
Akinobu Mitad435d862007-10-18 03:05:15 -07001167
1168 return 0;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001169error2:
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001170 while (--i >= 0)
1171 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
Akinobu Mitad435d862007-10-18 03:05:15 -07001172error:
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001173 while (--i >= 0)
1174 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1175
1176 sysdev_unregister(&per_cpu(mce_dev, cpu));
Akinobu Mitad435d862007-10-18 03:05:15 -07001177
Andi Kleen91c6d402005-07-28 21:15:39 -07001178 return err;
1179}
1180
Jan Beulich2d9cd6c2008-08-29 13:15:04 +01001181static __cpuinit void mce_remove_device(unsigned int cpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07001182{
Shaohua Li73ca5352006-01-11 22:43:06 +01001183 int i;
1184
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001185 if (!cpumask_test_cpu(cpu, mce_dev_initialized))
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08001186 return;
1187
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001188 for (i = 0; mce_attrs[i]; i++)
1189 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1190
Andi Kleen0d7482e32009-02-17 23:07:13 +01001191 for (i = 0; i < banks; i++)
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001192 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
1193
1194 sysdev_unregister(&per_cpu(mce_dev, cpu));
1195 cpumask_clear_cpu(cpu, mce_dev_initialized);
Andi Kleen91c6d402005-07-28 21:15:39 -07001196}
Andi Kleen91c6d402005-07-28 21:15:39 -07001197
Andi Kleend6b75582009-02-12 13:39:31 +01001198/* Make sure there are no machine checks on offlined CPUs. */
H. Peter Anvinec5b3d32009-02-23 14:01:04 -08001199static void mce_disable_cpu(void *h)
Andi Kleend6b75582009-02-12 13:39:31 +01001200{
Andi Kleen88ccbed2009-02-12 13:49:36 +01001201 unsigned long action = *(unsigned long *)h;
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001202 int i;
Andi Kleend6b75582009-02-12 13:39:31 +01001203
1204 if (!mce_available(&current_cpu_data))
1205 return;
Andi Kleen88ccbed2009-02-12 13:49:36 +01001206 if (!(action & CPU_TASKS_FROZEN))
1207 cmci_clear();
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001208 for (i = 0; i < banks; i++) {
1209 if (!skip_bank_init(i))
1210 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1211 }
Andi Kleend6b75582009-02-12 13:39:31 +01001212}
1213
H. Peter Anvinec5b3d32009-02-23 14:01:04 -08001214static void mce_reenable_cpu(void *h)
Andi Kleend6b75582009-02-12 13:39:31 +01001215{
Andi Kleen88ccbed2009-02-12 13:49:36 +01001216 unsigned long action = *(unsigned long *)h;
Ingo Molnare9eee032009-04-08 12:31:17 +02001217 int i;
Andi Kleend6b75582009-02-12 13:39:31 +01001218
1219 if (!mce_available(&current_cpu_data))
1220 return;
Ingo Molnare9eee032009-04-08 12:31:17 +02001221
Andi Kleen88ccbed2009-02-12 13:49:36 +01001222 if (!(action & CPU_TASKS_FROZEN))
1223 cmci_reenable();
Andi Kleen06b7a7a2009-04-27 18:37:43 +02001224 for (i = 0; i < banks; i++) {
1225 if (!skip_bank_init(i))
1226 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1227 }
Andi Kleend6b75582009-02-12 13:39:31 +01001228}
1229
Andi Kleen91c6d402005-07-28 21:15:39 -07001230/* Get notified when a cpu comes on/off. Be hotplug friendly. */
Ingo Molnare9eee032009-04-08 12:31:17 +02001231static int __cpuinit
1232mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
Andi Kleen91c6d402005-07-28 21:15:39 -07001233{
1234 unsigned int cpu = (unsigned long)hcpu;
Andi Kleen52d168e2009-02-12 13:39:29 +01001235 struct timer_list *t = &per_cpu(mce_timer, cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07001236
1237 switch (action) {
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08001238 case CPU_ONLINE:
1239 case CPU_ONLINE_FROZEN:
1240 mce_create_device(cpu);
Rafael J. Wysocki87357282008-08-22 22:23:09 +02001241 if (threshold_cpu_callback)
1242 threshold_cpu_callback(action, cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07001243 break;
Andi Kleen91c6d402005-07-28 21:15:39 -07001244 case CPU_DEAD:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001245 case CPU_DEAD_FROZEN:
Rafael J. Wysocki87357282008-08-22 22:23:09 +02001246 if (threshold_cpu_callback)
1247 threshold_cpu_callback(action, cpu);
Andi Kleen91c6d402005-07-28 21:15:39 -07001248 mce_remove_device(cpu);
1249 break;
Andi Kleen52d168e2009-02-12 13:39:29 +01001250 case CPU_DOWN_PREPARE:
1251 case CPU_DOWN_PREPARE_FROZEN:
1252 del_timer_sync(t);
Andi Kleen88ccbed2009-02-12 13:49:36 +01001253 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
Andi Kleen52d168e2009-02-12 13:39:29 +01001254 break;
1255 case CPU_DOWN_FAILED:
1256 case CPU_DOWN_FAILED_FROZEN:
Andi Kleen6298c512009-04-09 12:28:22 +02001257 t->expires = round_jiffies(jiffies +
1258 __get_cpu_var(next_interval));
Andi Kleen52d168e2009-02-12 13:39:29 +01001259 add_timer_on(t, cpu);
Andi Kleen88ccbed2009-02-12 13:49:36 +01001260 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1261 break;
1262 case CPU_POST_DEAD:
1263 /* intentionally ignoring frozen here */
1264 cmci_rediscover(cpu);
Andi Kleen52d168e2009-02-12 13:39:29 +01001265 break;
Andi Kleen91c6d402005-07-28 21:15:39 -07001266 }
Andreas Herrmannbae19fe2007-11-14 17:00:44 -08001267 return NOTIFY_OK;
Andi Kleen91c6d402005-07-28 21:15:39 -07001268}
1269
Sam Ravnborg1e356692008-01-30 13:33:36 +01001270static struct notifier_block mce_cpu_notifier __cpuinitdata = {
Andi Kleen91c6d402005-07-28 21:15:39 -07001271 .notifier_call = mce_cpu_callback,
1272};
1273
Andi Kleen0d7482e32009-02-17 23:07:13 +01001274static __init int mce_init_banks(void)
1275{
1276 int i;
1277
1278 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1279 GFP_KERNEL);
1280 if (!bank_attrs)
1281 return -ENOMEM;
1282
1283 for (i = 0; i < banks; i++) {
1284 struct sysdev_attribute *a = &bank_attrs[i];
Ingo Molnare9eee032009-04-08 12:31:17 +02001285
1286 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
Andi Kleen0d7482e32009-02-17 23:07:13 +01001287 if (!a->attr.name)
1288 goto nomem;
Ingo Molnare9eee032009-04-08 12:31:17 +02001289
1290 a->attr.mode = 0644;
1291 a->show = show_bank;
1292 a->store = set_bank;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001293 }
1294 return 0;
1295
1296nomem:
1297 while (--i >= 0)
1298 kfree(bank_attrs[i].attr.name);
1299 kfree(bank_attrs);
1300 bank_attrs = NULL;
Ingo Molnare9eee032009-04-08 12:31:17 +02001301
Andi Kleen0d7482e32009-02-17 23:07:13 +01001302 return -ENOMEM;
1303}
1304
Andi Kleen91c6d402005-07-28 21:15:39 -07001305static __init int mce_init_device(void)
1306{
1307 int err;
1308 int i = 0;
1309
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 if (!mce_available(&boot_cpu_data))
1311 return -EIO;
Andi Kleen0d7482e32009-02-17 23:07:13 +01001312
Ingo Molnarcb491fc2009-04-08 12:31:17 +02001313 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
Rusty Russell996867d2009-03-13 14:49:51 +10301314
Andi Kleen0d7482e32009-02-17 23:07:13 +01001315 err = mce_init_banks();
1316 if (err)
1317 return err;
1318
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 err = sysdev_class_register(&mce_sysclass);
Akinobu Mitad435d862007-10-18 03:05:15 -07001320 if (err)
1321 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07001322
1323 for_each_online_cpu(i) {
Akinobu Mitad435d862007-10-18 03:05:15 -07001324 err = mce_create_device(i);
1325 if (err)
1326 return err;
Andi Kleen91c6d402005-07-28 21:15:39 -07001327 }
1328
Chandra Seetharamanbe6b5a32006-07-30 03:03:37 -07001329 register_hotcpu_notifier(&mce_cpu_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 misc_register(&mce_log_device);
Ingo Molnare9eee032009-04-08 12:31:17 +02001331
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333}
Andi Kleen91c6d402005-07-28 21:15:39 -07001334
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335device_initcall(mce_init_device);
Ingo Molnara988d332009-04-08 12:31:25 +02001336
Andi Kleen4efc0672009-04-28 19:07:31 +02001337#else /* CONFIG_X86_OLD_MCE: */
Ingo Molnara988d332009-04-08 12:31:25 +02001338
Ingo Molnara988d332009-04-08 12:31:25 +02001339int nr_mce_banks;
1340EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1341
Ingo Molnara988d332009-04-08 12:31:25 +02001342/* This has to be run for each processor */
1343void mcheck_init(struct cpuinfo_x86 *c)
1344{
1345 if (mce_disabled == 1)
1346 return;
1347
1348 switch (c->x86_vendor) {
1349 case X86_VENDOR_AMD:
1350 amd_mcheck_init(c);
1351 break;
1352
1353 case X86_VENDOR_INTEL:
1354 if (c->x86 == 5)
1355 intel_p5_mcheck_init(c);
1356 if (c->x86 == 6)
1357 intel_p6_mcheck_init(c);
1358 if (c->x86 == 15)
1359 intel_p4_mcheck_init(c);
1360 break;
1361
1362 case X86_VENDOR_CENTAUR:
1363 if (c->x86 == 5)
1364 winchip_mcheck_init(c);
1365 break;
1366
1367 default:
1368 break;
1369 }
Ingo Molnarb6592942009-04-08 12:31:27 +02001370 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
Ingo Molnara988d332009-04-08 12:31:25 +02001371}
1372
Ingo Molnara988d332009-04-08 12:31:25 +02001373static int __init mcheck_enable(char *str)
1374{
1375 mce_disabled = -1;
1376 return 1;
1377}
1378
Ingo Molnara988d332009-04-08 12:31:25 +02001379__setup("mce", mcheck_enable);
1380
Andi Kleend7c3c9a2009-04-28 23:07:25 +02001381#endif /* CONFIG_X86_OLD_MCE */
1382
1383/*
1384 * Old style boot options parsing. Only for compatibility.
1385 */
1386static int __init mcheck_disable(char *str)
1387{
1388 mce_disabled = 1;
1389 return 1;
1390}
1391__setup("nomce", mcheck_disable);