[PATCH] s390: improved machine check handling
Improved machine check handling. Kernel is now able to receive machine checks
while in kernel mode (system call, interrupt and program check handling).
Also register validation is now performed.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index ffa996c..5bb255e 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -31,14 +31,14 @@
extern struct workqueue_struct *slow_path_wq;
extern struct work_struct slow_path_work;
-static void
+static NORET_TYPE void
s390_handle_damage(char *msg)
{
- printk(KERN_EMERG "%s\n", msg);
#ifdef CONFIG_SMP
smp_send_stop();
#endif
disabled_wait((unsigned long) __builtin_return_address(0));
+ for(;;);
}
/*
@@ -122,40 +122,39 @@
return 0;
}
+struct mcck_struct {
+ int kill_task;
+ int channel_report;
+ int warning;
+ unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
/*
- * machine check handler.
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
*/
void
-s390_do_machine_check(void)
+s390_handle_mcck(void)
{
- struct mci *mci;
+ unsigned long flags;
+ struct mcck_struct mcck;
- mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = __get_cpu_var(cpu_mcck);
+ memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+ clear_thread_flag(TIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
- if (mci->sd) /* system damage */
- s390_handle_damage("received system damage machine check\n");
-
- if (mci->pd) /* instruction processing damage */
- s390_handle_damage("received instruction processing "
- "damage machine check\n");
-
- if (mci->se) /* storage error uncorrected */
- s390_handle_damage("received storage error uncorrected "
- "machine check\n");
-
- if (mci->sc) /* storage error corrected */
- printk(KERN_WARNING
- "received storage error corrected machine check\n");
-
- if (mci->ke) /* storage key-error uncorrected */
- s390_handle_damage("received storage key-error uncorrected "
- "machine check\n");
-
- if (mci->ds && mci->fa) /* storage degradation */
- s390_handle_damage("received storage degradation machine "
- "check\n");
-
- if (mci->cp) /* channel report word pending */
+ if (mcck.channel_report)
up(&m_sem);
#ifdef CONFIG_MACHCHK_WARNING
@@ -168,7 +167,7 @@
* On VM we only get one interrupt per virtally presented machinecheck.
* Though one suffices, we may get one interrupt per (virtual) processor.
*/
- if (mci->w) { /* WARNING pending ? */
+ if (mcck.warning) { /* WARNING pending ? */
static int mchchk_wng_posted = 0;
/*
* Use single machine clear, as we cannot handle smp right now
@@ -178,6 +177,261 @@
kill_proc(1, SIGPWR, 1);
}
#endif
+
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int
+s390_revalidate_registers(struct mci *mci)
+{
+ int kill_task;
+ u64 tmpclock;
+ u64 zero;
+ void *fpt_save_area, *fpt_creg_save_area;
+
+ kill_task = 0;
+ zero = 0;
+ /* General purpose registers */
+ if (!mci->gr)
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Process needs to be terminated.
+ */
+ kill_task = 1;
+
+ /* Revalidate floating point registers */
+ if (!mci->fp)
+ /*
+ * Floating point registers can't be restored and
+ * therefore the process needs to be terminated.
+ */
+ kill_task = 1;
+
+#ifndef __s390x__
+ asm volatile("ld 0,0(%0)\n"
+ "ld 2,8(%0)\n"
+ "ld 4,16(%0)\n"
+ "ld 6,24(%0)"
+ : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+ if (MACHINE_HAS_IEEE) {
+#ifdef __s390x__
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
+ fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+ fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+ fpt_creg_save_area = fpt_save_area+128;
+#endif
+ /* Floating point control register */
+ if (!mci->fc) {
+ /*
+ * Floating point control register can't be restored.
+ * Task will be terminated.
+ */
+ asm volatile ("lfpc 0(%0)" : : "a" (&zero));
+ kill_task = 1;
+
+ }
+ else
+ asm volatile (
+ "lfpc 0(%0)"
+ : : "a" (fpt_creg_save_area));
+
+ asm volatile("ld 0,0(%0)\n"
+ "ld 1,8(%0)\n"
+ "ld 2,16(%0)\n"
+ "ld 3,24(%0)\n"
+ "ld 4,32(%0)\n"
+ "ld 5,40(%0)\n"
+ "ld 6,48(%0)\n"
+ "ld 7,56(%0)\n"
+ "ld 8,64(%0)\n"
+ "ld 9,72(%0)\n"
+ "ld 10,80(%0)\n"
+ "ld 11,88(%0)\n"
+ "ld 12,96(%0)\n"
+ "ld 13,104(%0)\n"
+ "ld 14,112(%0)\n"
+ "ld 15,120(%0)\n"
+ : : "a" (fpt_save_area));
+ }
+
+ /* Revalidate access registers */
+ asm volatile("lam 0,15,0(%0)"
+ : : "a" (&S390_lowcore.access_regs_save_area));
+ if (!mci->ar)
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+
+ /* Revalidate control registers */
+ if (!mci->cr)
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage("invalid control registers.");
+ else
+#ifdef __s390x__
+ asm volatile("lctlg 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#else
+ asm volatile("lctl 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+
+ /*
+ * We don't even try to revalidate the TOD register, since we simply
+ * can't write something sensible into that register.
+ */
+
+#ifdef __s390x__
+ /*
+ * See if we can revalidate the TOD programmable register with its
+ * old contents (should be zero) otherwise set it to zero.
+ */
+ if (!mci->pr)
+ asm volatile("sr 0,0\n"
+ "sckpf"
+ : : : "0", "cc");
+ else
+ asm volatile(
+ "l 0,0(%0)\n"
+ "sckpf"
+ : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
+#endif
+
+ /* Revalidate clock comparator register */
+ asm volatile ("stck 0(%1)\n"
+ "sckc 0(%1)"
+ : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+
+ /* Check if old PSW is valid */
+ if (!mci->wp)
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage("old psw invalid.");
+
+ if (!mci->ms || !mci->pm || !mci->ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+
+/*
+ * machine check handler.
+ */
+void
+s390_do_machine_check(struct pt_regs *regs)
+{
+ struct mci *mci;
+ struct mcck_struct *mcck;
+ int umode;
+
+ mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mcck = &__get_cpu_var(cpu_mcck);
+ umode = user_mode(regs);
+
+ if (mci->sd)
+ /* System damage -> stopping machine */
+ s390_handle_damage("received system damage machine check.");
+
+ if (mci->pd) {
+ if (mci->b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+#ifdef __s390x__
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+#else
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+ 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+ t_mcic = *(u64 *)mci;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage("processing backup machine "
+ "check with damage.");
+ }
+ if (!umode)
+ s390_handle_damage("processing backup machine "
+ "check in kernel mode.");
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ }
+ else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage("received instruction processing "
+ "damage machine check.");
+ }
+ }
+ if (s390_revalidate_registers(mci)) {
+ if (umode) {
+ /*
+ * Couldn't restore all register contents while in
+ * user mode -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+ else
+ /*
+ * Couldn't restore all register contents while in
+ * kernel mode -> stopping machine.
+ */
+ s390_handle_damage("unable to revalidate registers.");
+ }
+
+ if (mci->se)
+ /* Storage error uncorrected */
+ s390_handle_damage("received storage error uncorrected "
+ "machine check.");
+
+ if (mci->ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage("received storage key-error uncorrected "
+ "machine check.");
+
+ if (mci->ds && mci->fa)
+ /* Storage degradation */
+ s390_handle_damage("received storage degradation machine "
+ "check.");
+
+ if (mci->cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+
+ if (mci->w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
}
/*
@@ -189,9 +443,8 @@
machine_check_init(void)
{
init_MUTEX_LOCKED(&m_sem);
- ctl_clear_bit(14, 25); /* disable damage MCH */
- ctl_set_bit(14, 26); /* enable degradation MCH */
- ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_clear_bit(14, 25); /* disable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
#ifdef CONFIG_MACHCHK_WARNING
ctl_set_bit(14, 24); /* enable warning MCH */
#endif
diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h
index 7e26f0f..4eaa701 100644
--- a/drivers/s390/s390mach.h
+++ b/drivers/s390/s390mach.h
@@ -16,20 +16,45 @@
__u32 sd : 1; /* 00 system damage */
__u32 pd : 1; /* 01 instruction-processing damage */
__u32 sr : 1; /* 02 system recovery */
- __u32 to_be_defined_1 : 4; /* 03-06 */
+ __u32 to_be_defined_1 : 1; /* 03 */
+ __u32 cd : 1; /* 04 timing-facility damage */
+ __u32 ed : 1; /* 05 external damage */
+ __u32 to_be_defined_2 : 1; /* 06 */
__u32 dg : 1; /* 07 degradation */
__u32 w : 1; /* 08 warning pending */
__u32 cp : 1; /* 09 channel-report pending */
- __u32 to_be_defined_2 : 6; /* 10-15 */
+ __u32 sp : 1; /* 10 service-processor damage */
+ __u32 ck : 1; /* 11 channel-subsystem damage */
+ __u32 to_be_defined_3 : 2; /* 12-13 */
+ __u32 b : 1; /* 14 backed up */
+ __u32 to_be_defined_4 : 1; /* 15 */
__u32 se : 1; /* 16 storage error uncorrected */
__u32 sc : 1; /* 17 storage error corrected */
__u32 ke : 1; /* 18 storage-key error uncorrected */
__u32 ds : 1; /* 19 storage degradation */
- __u32 to_be_defined_3 : 4; /* 20-23 */
+ __u32 wp : 1; /* 20 psw mwp validity */
+ __u32 ms : 1; /* 21 psw mask and key validity */
+ __u32 pm : 1; /* 22 psw program mask and cc validity */
+ __u32 ia : 1; /* 23 psw instruction address validity */
__u32 fa : 1; /* 24 failing storage address validity */
- __u32 to_be_defined_4 : 7; /* 25-31 */
+ __u32 to_be_defined_5 : 1; /* 25 */
+ __u32 ec : 1; /* 26 external damage code validity */
+ __u32 fp : 1; /* 27 floating point register validity */
+ __u32 gr : 1; /* 28 general register validity */
+ __u32 cr : 1; /* 29 control register validity */
+ __u32 to_be_defined_6 : 1; /* 30 */
+ __u32 st : 1; /* 31 storage logical validity */
__u32 ie : 1; /* 32 indirect storage error */
- __u32 to_be_defined_5 : 31; /* 33-63 */
+ __u32 ar : 1; /* 33 access register validity */
+ __u32 da : 1; /* 34 delayed access exception */
+ __u32 to_be_defined_7 : 7; /* 35-41 */
+ __u32 pr : 1; /* 42 tod programmable register validity */
+ __u32 fc : 1; /* 43 fp control register validity */
+ __u32 ap : 1; /* 44 ancillary report */
+ __u32 to_be_defined_8 : 1; /* 45 */
+ __u32 ct : 1; /* 46 cpu timer validity */
+ __u32 cc : 1; /* 47 clock comparator validity */
+ __u32 to_be_defined_9 : 16; /* 47-63 */
};
/*