[PATCH] s390: improved machine check handling

Improved machine check handling.  Kernel is now able to receive machine checks
while in kernel mode (system call, interrupt and program check handling).
Also register validation is now performed.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index ffa996c..5bb255e 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -31,14 +31,14 @@
 extern struct workqueue_struct *slow_path_wq;
 extern struct work_struct slow_path_work;
 
-static void
+static NORET_TYPE void
 s390_handle_damage(char *msg)
 {
-	printk(KERN_EMERG "%s\n", msg);
 #ifdef CONFIG_SMP
 	smp_send_stop();
 #endif
 	disabled_wait((unsigned long) __builtin_return_address(0));
+	for(;;);
 }
 
 /*
@@ -122,40 +122,39 @@
 	return 0;
 }
 
+struct mcck_struct {
+	int kill_task;
+	int channel_report;
+	int warning;
+	unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
 /*
- * machine check handler.
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
  */
 void
-s390_do_machine_check(void)
+s390_handle_mcck(void)
 {
-	struct mci *mci;
+	unsigned long flags;
+	struct mcck_struct mcck;
 
-	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	/*
+	 * Disable machine checks and get the current state of accumulated
+	 * machine checks. Afterwards delete the old state and enable machine
+	 * checks again.
+	 */
+	local_irq_save(flags);
+	local_mcck_disable();
+	mcck = __get_cpu_var(cpu_mcck);
+	memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+	clear_thread_flag(TIF_MCCK_PENDING);
+	local_mcck_enable();
+	local_irq_restore(flags);
 
-	if (mci->sd)		/* system damage */
-		s390_handle_damage("received system damage machine check\n");
-
-	if (mci->pd)		/* instruction processing damage */
-		s390_handle_damage("received instruction processing "
-				   "damage machine check\n");
-
-	if (mci->se)		/* storage error uncorrected */
-		s390_handle_damage("received storage error uncorrected "
-				   "machine check\n");
-
-	if (mci->sc)		/* storage error corrected */
-		printk(KERN_WARNING
-		       "received storage error corrected machine check\n");
-
-	if (mci->ke)		/* storage key-error uncorrected */
-		s390_handle_damage("received storage key-error uncorrected "
-				   "machine check\n");
-
-	if (mci->ds && mci->fa)	/* storage degradation */
-		s390_handle_damage("received storage degradation machine "
-				   "check\n");
-
-	if (mci->cp)		/* channel report word pending */
+	if (mcck.channel_report)
 		up(&m_sem);
 
 #ifdef CONFIG_MACHCHK_WARNING
@@ -168,7 +167,7 @@
  * On VM we only get one interrupt per virtally presented machinecheck.
  * Though one suffices, we may get one interrupt per (virtual) processor.
  */
-	if (mci->w) {	/* WARNING pending ? */
+	if (mcck.warning) {	/* WARNING pending ? */
 		static int mchchk_wng_posted = 0;
 		/*
 		 * Use single machine clear, as we cannot handle smp right now
@@ -178,6 +177,261 @@
 			kill_proc(1, SIGPWR, 1);
 	}
 #endif
+
+	if (mcck.kill_task) {
+		local_irq_enable();
+		printk(KERN_EMERG "mcck: Terminating task because of machine "
+		       "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+		       current->comm, current->pid);
+		do_exit(SIGSEGV);
+	}
+}
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int
+s390_revalidate_registers(struct mci *mci)
+{
+	int kill_task;
+	u64 tmpclock;
+	u64 zero;
+	void *fpt_save_area, *fpt_creg_save_area;
+
+	kill_task = 0;
+	zero = 0;
+	/* General purpose registers */
+	if (!mci->gr)
+		/*
+		 * General purpose registers couldn't be restored and have
+		 * unknown contents. Process needs to be terminated.
+		 */
+		kill_task = 1;
+
+	/* Revalidate floating point registers */
+	if (!mci->fp)
+		/*
+		 * Floating point registers can't be restored and
+		 * therefore the process needs to be terminated.
+		 */
+		kill_task = 1;
+
+#ifndef __s390x__
+	asm volatile("ld 0,0(%0)\n"
+		     "ld 2,8(%0)\n"
+		     "ld 4,16(%0)\n"
+		     "ld 6,24(%0)"
+		     : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+	if (MACHINE_HAS_IEEE) {
+#ifdef __s390x__
+		fpt_save_area = &S390_lowcore.floating_pt_save_area;
+		fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+		fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+		fpt_creg_save_area = fpt_save_area+128;
+#endif
+		/* Floating point control register */
+		if (!mci->fc) {
+			/*
+			 * Floating point control register can't be restored.
+			 * Task will be terminated.
+			 */
+			asm volatile ("lfpc 0(%0)" : : "a" (&zero));
+			kill_task = 1;
+
+		}
+		else
+			asm volatile (
+				"lfpc 0(%0)"
+				: : "a" (fpt_creg_save_area));
+
+		asm volatile("ld  0,0(%0)\n"
+			     "ld  1,8(%0)\n"
+			     "ld  2,16(%0)\n"
+			     "ld  3,24(%0)\n"
+			     "ld  4,32(%0)\n"
+			     "ld  5,40(%0)\n"
+			     "ld  6,48(%0)\n"
+			     "ld  7,56(%0)\n"
+			     "ld  8,64(%0)\n"
+			     "ld  9,72(%0)\n"
+			     "ld 10,80(%0)\n"
+			     "ld 11,88(%0)\n"
+			     "ld 12,96(%0)\n"
+			     "ld 13,104(%0)\n"
+			     "ld 14,112(%0)\n"
+			     "ld 15,120(%0)\n"
+			     : : "a" (fpt_save_area));
+	}
+
+	/* Revalidate access registers */
+	asm volatile("lam 0,15,0(%0)"
+		     : : "a" (&S390_lowcore.access_regs_save_area));
+	if (!mci->ar)
+		/*
+		 * Access registers have unknown contents.
+		 * Terminating task.
+		 */
+		kill_task = 1;
+
+	/* Revalidate control registers */
+	if (!mci->cr)
+		/*
+		 * Control registers have unknown contents.
+		 * Can't recover and therefore stopping machine.
+		 */
+		s390_handle_damage("invalid control registers.");
+	else
+#ifdef __s390x__
+		asm volatile("lctlg 0,15,0(%0)"
+			     : : "a" (&S390_lowcore.cregs_save_area));
+#else
+		asm volatile("lctl 0,15,0(%0)"
+			     : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+
+	/*
+	 * We don't even try to revalidate the TOD register, since we simply
+	 * can't write something sensible into that register.
+	 */
+
+#ifdef __s390x__
+	/*
+	 * See if we can revalidate the TOD programmable register with its
+	 * old contents (should be zero) otherwise set it to zero.
+	 */
+	if (!mci->pr)
+		asm volatile("sr 0,0\n"
+			     "sckpf"
+			     : : : "0", "cc");
+	else
+		asm volatile(
+			"l 0,0(%0)\n"
+			"sckpf"
+			: : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
+#endif
+
+	/* Revalidate clock comparator register */
+	asm volatile ("stck 0(%1)\n"
+		      "sckc 0(%1)"
+		      : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+
+	/* Check if old PSW is valid */
+	if (!mci->wp)
+		/*
+		 * Can't tell if we come from user or kernel mode
+		 * -> stopping machine.
+		 */
+		s390_handle_damage("old psw invalid.");
+
+	if (!mci->ms || !mci->pm || !mci->ia)
+		kill_task = 1;
+
+	return kill_task;
+}
+
+/*
+ * machine check handler.
+ */
+void
+s390_do_machine_check(struct pt_regs *regs)
+{
+	struct mci *mci;
+	struct mcck_struct *mcck;
+	int umode;
+
+	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	mcck = &__get_cpu_var(cpu_mcck);
+	umode = user_mode(regs);
+
+	if (mci->sd)
+		/* System damage -> stopping machine */
+		s390_handle_damage("received system damage machine check.");
+
+	if (mci->pd) {
+		if (mci->b) {
+			/* Processing backup -> verify if we can survive this */
+			u64 z_mcic, o_mcic, t_mcic;
+#ifdef __s390x__
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+				  1ULL<<16);
+#else
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+				  1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+			t_mcic = *(u64 *)mci;
+
+			if (((t_mcic & z_mcic) != 0) ||
+			    ((t_mcic & o_mcic) != o_mcic)) {
+				s390_handle_damage("processing backup machine "
+						   "check with damage.");
+			}
+			if (!umode)
+				s390_handle_damage("processing backup machine "
+						   "check in kernel mode.");
+			mcck->kill_task = 1;
+			mcck->mcck_code = *(unsigned long long *) mci;
+		}
+		else {
+			/* Processing damage -> stopping machine */
+			s390_handle_damage("received instruction processing "
+					   "damage machine check.");
+		}
+	}
+	if (s390_revalidate_registers(mci)) {
+		if (umode) {
+			/*
+			 * Couldn't restore all register contents while in
+			 * user mode -> mark task for termination.
+			 */
+			mcck->kill_task = 1;
+			mcck->mcck_code = *(unsigned long long *) mci;
+			set_thread_flag(TIF_MCCK_PENDING);
+		}
+		else
+			/*
+			 * Couldn't restore all register contents while in
+			 * kernel mode -> stopping machine.
+			 */
+			s390_handle_damage("unable to revalidate registers.");
+	}
+
+	if (mci->se)
+		/* Storage error uncorrected */
+		s390_handle_damage("received storage error uncorrected "
+				   "machine check.");
+
+	if (mci->ke)
+		/* Storage key-error uncorrected */
+		s390_handle_damage("received storage key-error uncorrected "
+				   "machine check.");
+
+	if (mci->ds && mci->fa)
+		/* Storage degradation */
+		s390_handle_damage("received storage degradation machine "
+				   "check.");
+
+	if (mci->cp) {
+		/* Channel report word pending */
+		mcck->channel_report = 1;
+		set_thread_flag(TIF_MCCK_PENDING);
+	}
+
+	if (mci->w) {
+		/* Warning pending */
+		mcck->warning = 1;
+		set_thread_flag(TIF_MCCK_PENDING);
+	}
 }
 
 /*
@@ -189,9 +443,8 @@
 machine_check_init(void)
 {
 	init_MUTEX_LOCKED(&m_sem);
-	ctl_clear_bit(14, 25);	/* disable damage MCH */
-	ctl_set_bit(14, 26);	/* enable degradation MCH */
-	ctl_set_bit(14, 27);	/* enable system recovery MCH */
+	ctl_clear_bit(14, 25);	/* disable external damage MCH */
+	ctl_set_bit(14, 27);    /* enable system recovery MCH */
 #ifdef CONFIG_MACHCHK_WARNING
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 #endif