blob: acd5816b1a6f214d2dfc5253d674ed9d81492fe5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it.
6 */
7
8#include <linux/init.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/string.h>
13#include <linux/rcupdate.h>
14#include <linux/kallsyms.h>
15#include <linux/sysdev.h>
16#include <linux/miscdevice.h>
17#include <linux/fs.h>
Randy Dunlapa9415642006-01-11 12:17:48 -080018#include <linux/capability.h>
Andi Kleen91c6d402005-07-28 21:15:39 -070019#include <linux/cpu.h>
20#include <linux/percpu.h>
Andi Kleen8c566ef2005-09-12 18:49:24 +020021#include <linux/ctype.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <asm/processor.h>
23#include <asm/msr.h>
24#include <asm/mce.h>
25#include <asm/kdebug.h>
26#include <asm/uaccess.h>
Andi Kleen0a9c3ee2006-01-11 22:46:54 +010027#include <asm/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
29#define MISC_MCELOG_MINOR 227
Shaohua Li73ca5352006-01-11 22:43:06 +010030#define NR_BANKS 6
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Andi Kleen553f2652006-04-07 19:49:57 +020032atomic_t mce_entry;
33
Linus Torvalds1da177e2005-04-16 15:20:36 -070034static int mce_dont_init;
35
36/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
37 3: never panic or exit (for testing only) */
38static int tolerant = 1;
39static int banks;
40static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
41static unsigned long console_logged;
42static int notify_user;
Andi Kleen94ad8472005-04-16 15:25:09 -070043static int rip_msr;
Andi Kleene5835382005-11-05 17:25:54 +010044static int mce_bootlog = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46/*
47 * Lockless MCE logging infrastructure.
48 * This avoids deadlocks on printk locks without having to break locks. Also
49 * separate MCEs from kernel messages to avoid bogus bug reports.
50 */
51
52struct mce_log mcelog = {
53 MCE_LOG_SIGNATURE,
54 MCE_LOG_LEN,
55};
56
57void mce_log(struct mce *mce)
58{
59 unsigned next, entry;
60 mce->finished = 0;
Mike Waychison76441432005-09-30 00:01:27 +020061 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 for (;;) {
63 entry = rcu_dereference(mcelog.next);
Mike Waychison76441432005-09-30 00:01:27 +020064 /* The rmb forces the compiler to reload next in each
65 iteration */
66 rmb();
Andi Kleen673242c2005-09-12 18:49:24 +020067 for (;;) {
68 /* When the buffer fills up discard new entries. Assume
69 that the earlier errors are the more interesting. */
70 if (entry >= MCE_LOG_LEN) {
71 set_bit(MCE_OVERFLOW, &mcelog.flags);
72 return;
73 }
74 /* Old left over entry. Skip. */
75 if (mcelog.entry[entry].finished) {
76 entry++;
77 continue;
78 }
Mike Waychison76441432005-09-30 00:01:27 +020079 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 smp_rmb();
82 next = entry + 1;
83 if (cmpxchg(&mcelog.next, entry, next) == entry)
84 break;
85 }
86 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
Mike Waychison76441432005-09-30 00:01:27 +020087 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 mcelog.entry[entry].finished = 1;
Mike Waychison76441432005-09-30 00:01:27 +020089 wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
91 if (!test_and_set_bit(0, &console_logged))
92 notify_user = 1;
93}
94
95static void print_mce(struct mce *m)
96{
97 printk(KERN_EMERG "\n"
Andi Kleen48551702006-01-11 22:44:48 +010098 KERN_EMERG "HARDWARE ERROR\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -070099 KERN_EMERG
100 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
101 m->cpu, m->mcgstatus, m->bank, m->status);
102 if (m->rip) {
103 printk(KERN_EMERG
104 "RIP%s %02x:<%016Lx> ",
105 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
106 m->cs, m->rip);
107 if (m->cs == __KERNEL_CS)
108 print_symbol("{%s}", m->rip);
109 printk("\n");
110 }
111 printk(KERN_EMERG "TSC %Lx ", m->tsc);
112 if (m->addr)
113 printk("ADDR %Lx ", m->addr);
114 if (m->misc)
115 printk("MISC %Lx ", m->misc);
116 printk("\n");
Andi Kleen48551702006-01-11 22:44:48 +0100117 printk(KERN_EMERG "This is not a software problem!\n");
118 printk(KERN_EMERG
119 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120}
121
122static void mce_panic(char *msg, struct mce *backup, unsigned long start)
123{
124 int i;
125 oops_begin();
126 for (i = 0; i < MCE_LOG_LEN; i++) {
127 unsigned long tsc = mcelog.entry[i].tsc;
128 if (time_before(tsc, start))
129 continue;
130 print_mce(&mcelog.entry[i]);
131 if (backup && mcelog.entry[i].tsc == backup->tsc)
132 backup = NULL;
133 }
134 if (backup)
135 print_mce(backup);
136 if (tolerant >= 3)
137 printk("Fake panic: %s\n", msg);
138 else
139 panic(msg);
140}
141
142static int mce_available(struct cpuinfo_x86 *c)
143{
Akinobu Mita3d1712c2006-03-24 03:15:11 -0800144 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145}
146
Andi Kleen94ad8472005-04-16 15:25:09 -0700147static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
148{
149 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
150 m->rip = regs->rip;
151 m->cs = regs->cs;
152 } else {
153 m->rip = 0;
154 m->cs = 0;
155 }
156 if (rip_msr) {
157 /* Assume the RIP in the MSR is exact. Is this true? */
158 m->mcgstatus |= MCG_STATUS_EIPV;
159 rdmsrl(rip_msr, m->rip);
160 m->cs = 0;
161 }
162}
163
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164/*
165 * The actual machine check handler
166 */
167
168void do_machine_check(struct pt_regs * regs, long error_code)
169{
170 struct mce m, panicm;
171 int nowayout = (tolerant < 1);
172 int kill_it = 0;
173 u64 mcestart = 0;
174 int i;
175 int panicm_found = 0;
176
Andi Kleen553f2652006-04-07 19:49:57 +0200177 atomic_inc(&mce_entry);
178
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 if (regs)
Jan Beulich6e3f3612006-01-11 22:42:14 +0100180 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 if (!banks)
Andi Kleen553f2652006-04-07 19:49:57 +0200182 goto out2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184 memset(&m, 0, sizeof(struct mce));
Andi Kleen0a9c3ee2006-01-11 22:46:54 +0100185 m.cpu = safe_smp_processor_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
187 if (!(m.mcgstatus & MCG_STATUS_RIPV))
188 kill_it = 1;
189
190 rdtscll(mcestart);
191 barrier();
192
193 for (i = 0; i < banks; i++) {
194 if (!bank[i])
195 continue;
196
197 m.misc = 0;
198 m.addr = 0;
199 m.bank = i;
200 m.tsc = 0;
201
202 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
203 if ((m.status & MCI_STATUS_VAL) == 0)
204 continue;
205
206 if (m.status & MCI_STATUS_EN) {
207 /* In theory _OVER could be a nowayout too, but
208 assume any overflowed errors were no fatal. */
209 nowayout |= !!(m.status & MCI_STATUS_PCC);
210 kill_it |= !!(m.status & MCI_STATUS_UC);
211 }
212
213 if (m.status & MCI_STATUS_MISCV)
214 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
215 if (m.status & MCI_STATUS_ADDRV)
216 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
217
Andi Kleen94ad8472005-04-16 15:25:09 -0700218 mce_get_rip(&m, regs);
Andi Kleend5172f22005-08-07 09:42:07 -0700219 if (error_code >= 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 rdtscll(m.tsc);
221 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
Andi Kleend5172f22005-08-07 09:42:07 -0700222 if (error_code != -2)
223 mce_log(&m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224
225 /* Did this bank cause the exception? */
226 /* Assume that the bank with uncorrectable errors did it,
227 and that there is only a single one. */
228 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
229 panicm = m;
230 panicm_found = 1;
231 }
232
Randy Dunlap9f158332005-09-13 01:25:16 -0700233 add_taint(TAINT_MACHINE_CHECK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 }
235
236 /* Never do anything final in the polling timer */
237 if (!regs)
238 goto out;
239
240 /* If we didn't find an uncorrectable error, pick
241 the last one (shouldn't happen, just being safe). */
242 if (!panicm_found)
243 panicm = m;
244 if (nowayout)
245 mce_panic("Machine check", &panicm, mcestart);
246 if (kill_it) {
247 int user_space = 0;
248
249 if (m.mcgstatus & MCG_STATUS_RIPV)
250 user_space = panicm.rip && (panicm.cs & 3);
251
252 /* When the machine was in user space and the CPU didn't get
253 confused it's normally not necessary to panic, unless you
254 are paranoid (tolerant == 0)
255
256 RED-PEN could be more tolerant for MCEs in idle,
257 but most likely they occur at boot anyways, where
258 it is best to just halt the machine. */
259 if ((!user_space && (panic_on_oops || tolerant < 2)) ||
260 (unsigned)current->pid <= 1)
261 mce_panic("Uncorrected machine check", &panicm, mcestart);
262
263 /* do_exit takes an awful lot of locks and has as
264 slight risk of deadlocking. If you don't want that
265 don't set tolerant >= 2 */
266 if (tolerant < 3)
267 do_exit(SIGBUS);
268 }
269
270 out:
271 /* Last thing done in the machine check exception to clear state. */
272 wrmsrl(MSR_IA32_MCG_STATUS, 0);
Andi Kleen553f2652006-04-07 19:49:57 +0200273 out2:
274 atomic_dec(&mce_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277/*
278 * Periodic polling timer for "silent" machine check errors.
279 */
280
281static int check_interval = 5 * 60; /* 5 minutes */
282static void mcheck_timer(void *data);
283static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
284
285static void mcheck_check_cpu(void *info)
286{
287 if (mce_available(&current_cpu_data))
288 do_machine_check(NULL, 0);
289}
290
291static void mcheck_timer(void *data)
292{
293 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
294 schedule_delayed_work(&mcheck_work, check_interval * HZ);
295
296 /*
297 * It's ok to read stale data here for notify_user and
298 * console_logged as we'll simply get the updated versions
299 * on the next mcheck_timer execution and atomic operations
300 * on console_logged act as synchronization for notify_user
301 * writes.
302 */
303 if (notify_user && console_logged) {
304 notify_user = 0;
305 clear_bit(0, &console_logged);
306 printk(KERN_INFO "Machine check events logged\n");
307 }
308}
309
310
311static __init int periodic_mcheck_init(void)
312{
313 if (check_interval)
314 schedule_delayed_work(&mcheck_work, check_interval*HZ);
315 return 0;
316}
317__initcall(periodic_mcheck_init);
318
319
320/*
321 * Initialize Machine Checks for a CPU.
322 */
323static void mce_init(void *dummy)
324{
325 u64 cap;
326 int i;
327
328 rdmsrl(MSR_IA32_MCG_CAP, cap);
329 banks = cap & 0xff;
330 if (banks > NR_BANKS) {
331 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
332 banks = NR_BANKS;
333 }
Andi Kleen94ad8472005-04-16 15:25:09 -0700334 /* Use accurate RIP reporting if available. */
335 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
336 rip_msr = MSR_IA32_MCG_EIP;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
338 /* Log the machine checks left over from the previous reset.
339 This also clears all registers */
Andi Kleend5172f22005-08-07 09:42:07 -0700340 do_machine_check(NULL, mce_bootlog ? -1 : -2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
342 set_in_cr4(X86_CR4_MCE);
343
344 if (cap & MCG_CTL_P)
345 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
346
347 for (i = 0; i < banks; i++) {
348 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
349 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
350 }
351}
352
353/* Add per CPU specific workarounds here */
Ashok Raje6982c62005-06-25 14:54:58 -0700354static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355{
356 /* This should be disabled by the BIOS, but isn't always */
357 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
358 /* disable GART TBL walk error reporting, which trips off
359 incorrectly with the IOMMU & 3ware & Cerberus. */
360 clear_bit(10, &bank[4]);
Andi Kleene5835382005-11-05 17:25:54 +0100361 /* Lots of broken BIOS around that don't clear them
362 by default and leave crap in there. Don't log. */
363 mce_bootlog = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 }
Andi Kleene5835382005-11-05 17:25:54 +0100365
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366}
367
Ashok Raje6982c62005-06-25 14:54:58 -0700368static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369{
370 switch (c->x86_vendor) {
371 case X86_VENDOR_INTEL:
372 mce_intel_feature_init(c);
373 break;
Jacob Shin89b831e2005-11-05 17:25:53 +0100374 case X86_VENDOR_AMD:
375 mce_amd_feature_init(c);
376 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 default:
378 break;
379 }
380}
381
382/*
383 * Called for each booted CPU to set up machine checks.
384 * Must be called with preempt off.
385 */
Ashok Raje6982c62005-06-25 14:54:58 -0700386void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387{
Ashok Raj7ded5682006-02-03 21:51:23 +0100388 static cpumask_t mce_cpus = CPU_MASK_NONE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390 mce_cpu_quirks(c);
391
392 if (mce_dont_init ||
393 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
394 !mce_available(c))
395 return;
396
397 mce_init(NULL);
398 mce_cpu_features(c);
399}
400
401/*
402 * Character device to read and clear the MCE log.
403 */
404
405static void collect_tscs(void *data)
406{
407 unsigned long *cpu_tsc = (unsigned long *)data;
408 rdtscll(cpu_tsc[smp_processor_id()]);
409}
410
411static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
412{
Andi Kleenf0de53b2005-04-16 15:25:10 -0700413 unsigned long *cpu_tsc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 static DECLARE_MUTEX(mce_read_sem);
415 unsigned next;
416 char __user *buf = ubuf;
417 int i, err;
418
Andi Kleenf0de53b2005-04-16 15:25:10 -0700419 cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
420 if (!cpu_tsc)
421 return -ENOMEM;
422
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 down(&mce_read_sem);
424 next = rcu_dereference(mcelog.next);
425
426 /* Only supports full reads right now */
427 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
428 up(&mce_read_sem);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700429 kfree(cpu_tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 return -EINVAL;
431 }
432
433 err = 0;
Andi Kleen673242c2005-09-12 18:49:24 +0200434 for (i = 0; i < next; i++) {
435 unsigned long start = jiffies;
436 while (!mcelog.entry[i].finished) {
437 if (!time_before(jiffies, start + 2)) {
438 memset(mcelog.entry + i,0, sizeof(struct mce));
439 continue;
440 }
441 cpu_relax();
442 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 smp_rmb();
444 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
445 buf += sizeof(struct mce);
446 }
447
448 memset(mcelog.entry, 0, next * sizeof(struct mce));
449 mcelog.next = 0;
450
Paul E. McKenneyb2b18662005-06-25 14:55:38 -0700451 synchronize_sched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 /* Collect entries that were still getting written before the synchronize. */
454
455 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
456 for (i = next; i < MCE_LOG_LEN; i++) {
457 if (mcelog.entry[i].finished &&
458 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
459 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
460 smp_rmb();
461 buf += sizeof(struct mce);
462 memset(&mcelog.entry[i], 0, sizeof(struct mce));
463 }
464 }
465 up(&mce_read_sem);
Andi Kleenf0de53b2005-04-16 15:25:10 -0700466 kfree(cpu_tsc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 return err ? -EFAULT : buf - ubuf;
468}
469
470static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
471{
472 int __user *p = (int __user *)arg;
473 if (!capable(CAP_SYS_ADMIN))
474 return -EPERM;
475 switch (cmd) {
476 case MCE_GET_RECORD_LEN:
477 return put_user(sizeof(struct mce), p);
478 case MCE_GET_LOG_LEN:
479 return put_user(MCE_LOG_LEN, p);
480 case MCE_GETCLEAR_FLAGS: {
481 unsigned flags;
482 do {
483 flags = mcelog.flags;
484 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
485 return put_user(flags, p);
486 }
487 default:
488 return -ENOTTY;
489 }
490}
491
492static struct file_operations mce_chrdev_ops = {
493 .read = mce_read,
494 .ioctl = mce_ioctl,
495};
496
497static struct miscdevice mce_log_device = {
498 MISC_MCELOG_MINOR,
499 "mcelog",
500 &mce_chrdev_ops,
501};
502
503/*
504 * Old style boot options parsing. Only for compatibility.
505 */
506
507static int __init mcheck_disable(char *str)
508{
509 mce_dont_init = 1;
OGAWA Hirofumi9b410462006-03-31 02:30:33 -0800510 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511}
512
513/* mce=off disables machine check. Note you can reenable it later
Andi Kleend5172f22005-08-07 09:42:07 -0700514 using sysfs.
Andi Kleen8c566ef2005-09-12 18:49:24 +0200515 mce=TOLERANCELEVEL (number, see above)
Andi Kleene5835382005-11-05 17:25:54 +0100516 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
517 mce=nobootlog Don't log MCEs from before booting. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518static int __init mcheck_enable(char *str)
519{
Andi Kleend5172f22005-08-07 09:42:07 -0700520 if (*str == '=')
521 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 if (!strcmp(str, "off"))
523 mce_dont_init = 1;
Andi Kleene5835382005-11-05 17:25:54 +0100524 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
525 mce_bootlog = str[0] == 'b';
Andi Kleen8c566ef2005-09-12 18:49:24 +0200526 else if (isdigit(str[0]))
527 get_option(&str, &tolerant);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 else
529 printk("mce= argument %s ignored. Please use /sys", str);
OGAWA Hirofumi9b410462006-03-31 02:30:33 -0800530 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531}
532
533__setup("nomce", mcheck_disable);
534__setup("mce", mcheck_enable);
535
536/*
537 * Sysfs support
538 */
539
Andi Kleen413588c2005-09-12 18:49:24 +0200540/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
541 Only one CPU is active at this time, the others get readded later using
542 CPU hotplug. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543static int mce_resume(struct sys_device *dev)
544{
Andi Kleen413588c2005-09-12 18:49:24 +0200545 mce_init(NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 return 0;
547}
548
549/* Reinit MCEs after user configuration changes */
550static void mce_restart(void)
551{
552 if (check_interval)
553 cancel_delayed_work(&mcheck_work);
554 /* Timer race is harmless here */
555 on_each_cpu(mce_init, NULL, 1, 1);
556 if (check_interval)
557 schedule_delayed_work(&mcheck_work, check_interval*HZ);
558}
559
560static struct sysdev_class mce_sysclass = {
561 .resume = mce_resume,
562 set_kset_name("machinecheck"),
563};
564
Jacob Shinfff2e892006-06-26 13:58:50 +0200565DEFINE_PER_CPU(struct sys_device, device_mce);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566
567/* Why are there no generic functions for this? */
568#define ACCESSOR(name, var, start) \
569 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
570 return sprintf(buf, "%lx\n", (unsigned long)var); \
571 } \
572 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
573 char *end; \
574 unsigned long new = simple_strtoul(buf, &end, 0); \
575 if (end == buf) return -EINVAL; \
576 var = new; \
577 start; \
578 return end-buf; \
579 } \
580 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
581
582ACCESSOR(bank0ctl,bank[0],mce_restart())
583ACCESSOR(bank1ctl,bank[1],mce_restart())
584ACCESSOR(bank2ctl,bank[2],mce_restart())
585ACCESSOR(bank3ctl,bank[3],mce_restart())
586ACCESSOR(bank4ctl,bank[4],mce_restart())
Shaohua Li73ca5352006-01-11 22:43:06 +0100587ACCESSOR(bank5ctl,bank[5],mce_restart())
588static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
589 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
590 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591ACCESSOR(tolerant,tolerant,)
592ACCESSOR(check_interval,check_interval,mce_restart())
593
Andi Kleen91c6d402005-07-28 21:15:39 -0700594/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
595static __cpuinit int mce_create_device(unsigned int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596{
597 int err;
Shaohua Li73ca5352006-01-11 22:43:06 +0100598 int i;
Andi Kleen91c6d402005-07-28 21:15:39 -0700599 if (!mce_available(&cpu_data[cpu]))
600 return -EIO;
601
602 per_cpu(device_mce,cpu).id = cpu;
603 per_cpu(device_mce,cpu).cls = &mce_sysclass;
604
605 err = sysdev_register(&per_cpu(device_mce,cpu));
606
607 if (!err) {
Shaohua Li73ca5352006-01-11 22:43:06 +0100608 for (i = 0; i < banks; i++)
609 sysdev_create_file(&per_cpu(device_mce,cpu),
610 bank_attributes[i]);
Andi Kleen91c6d402005-07-28 21:15:39 -0700611 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
612 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
613 }
614 return err;
615}
616
617#ifdef CONFIG_HOTPLUG_CPU
618static __cpuinit void mce_remove_device(unsigned int cpu)
619{
Shaohua Li73ca5352006-01-11 22:43:06 +0100620 int i;
621
622 for (i = 0; i < banks; i++)
623 sysdev_remove_file(&per_cpu(device_mce,cpu),
624 bank_attributes[i]);
Andi Kleen91c6d402005-07-28 21:15:39 -0700625 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
626 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
627 sysdev_unregister(&per_cpu(device_mce,cpu));
628}
629#endif
630
631/* Get notified when a cpu comes on/off. Be hotplug friendly. */
Chandra Seetharaman83d722f2006-04-24 19:35:21 -0700632static int
Andi Kleen91c6d402005-07-28 21:15:39 -0700633mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
634{
635 unsigned int cpu = (unsigned long)hcpu;
636
637 switch (action) {
638 case CPU_ONLINE:
639 mce_create_device(cpu);
640 break;
641#ifdef CONFIG_HOTPLUG_CPU
642 case CPU_DEAD:
643 mce_remove_device(cpu);
644 break;
645#endif
646 }
647 return NOTIFY_OK;
648}
649
650static struct notifier_block mce_cpu_notifier = {
651 .notifier_call = mce_cpu_callback,
652};
653
654static __init int mce_init_device(void)
655{
656 int err;
657 int i = 0;
658
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 if (!mce_available(&boot_cpu_data))
660 return -EIO;
661 err = sysdev_class_register(&mce_sysclass);
Andi Kleen91c6d402005-07-28 21:15:39 -0700662
663 for_each_online_cpu(i) {
664 mce_create_device(i);
665 }
666
667 register_cpu_notifier(&mce_cpu_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 misc_register(&mce_log_device);
669 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670}
Andi Kleen91c6d402005-07-28 21:15:39 -0700671
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672device_initcall(mce_init_device);