[PATCH] s390: improved machine check handling

Improved machine check handling.  Kernel is now able to receive machine checks
while in kernel mode (system call, interrupt and program check handling).
Also register validation is now performed.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index ffa996c..5bb255e 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -31,14 +31,14 @@
 extern struct workqueue_struct *slow_path_wq;
 extern struct work_struct slow_path_work;
 
-static void
+static NORET_TYPE void
 s390_handle_damage(char *msg)
 {
-	printk(KERN_EMERG "%s\n", msg);
 #ifdef CONFIG_SMP
 	smp_send_stop();
 #endif
 	disabled_wait((unsigned long) __builtin_return_address(0));
+	for(;;);
 }
 
 /*
@@ -122,40 +122,39 @@
 	return 0;
 }
 
+struct mcck_struct {
+	int kill_task;
+	int channel_report;
+	int warning;
+	unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
 /*
- * machine check handler.
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
  */
 void
-s390_do_machine_check(void)
+s390_handle_mcck(void)
 {
-	struct mci *mci;
+	unsigned long flags;
+	struct mcck_struct mcck;
 
-	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	/*
+	 * Disable machine checks and get the current state of accumulated
+	 * machine checks. Afterwards delete the old state and enable machine
+	 * checks again.
+	 */
+	local_irq_save(flags);
+	local_mcck_disable();
+	mcck = __get_cpu_var(cpu_mcck);
+	memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+	clear_thread_flag(TIF_MCCK_PENDING);
+	local_mcck_enable();
+	local_irq_restore(flags);
 
-	if (mci->sd)		/* system damage */
-		s390_handle_damage("received system damage machine check\n");
-
-	if (mci->pd)		/* instruction processing damage */
-		s390_handle_damage("received instruction processing "
-				   "damage machine check\n");
-
-	if (mci->se)		/* storage error uncorrected */
-		s390_handle_damage("received storage error uncorrected "
-				   "machine check\n");
-
-	if (mci->sc)		/* storage error corrected */
-		printk(KERN_WARNING
-		       "received storage error corrected machine check\n");
-
-	if (mci->ke)		/* storage key-error uncorrected */
-		s390_handle_damage("received storage key-error uncorrected "
-				   "machine check\n");
-
-	if (mci->ds && mci->fa)	/* storage degradation */
-		s390_handle_damage("received storage degradation machine "
-				   "check\n");
-
-	if (mci->cp)		/* channel report word pending */
+	if (mcck.channel_report)
 		up(&m_sem);
 
 #ifdef CONFIG_MACHCHK_WARNING
@@ -168,7 +167,7 @@
  * On VM we only get one interrupt per virtally presented machinecheck.
  * Though one suffices, we may get one interrupt per (virtual) processor.
  */
-	if (mci->w) {	/* WARNING pending ? */
+	if (mcck.warning) {	/* WARNING pending ? */
 		static int mchchk_wng_posted = 0;
 		/*
 		 * Use single machine clear, as we cannot handle smp right now
@@ -178,6 +177,261 @@
 			kill_proc(1, SIGPWR, 1);
 	}
 #endif
+
+	if (mcck.kill_task) {
+		local_irq_enable();
+		printk(KERN_EMERG "mcck: Terminating task because of machine "
+		       "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+		       current->comm, current->pid);
+		do_exit(SIGSEGV);
+	}
+}
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int
+s390_revalidate_registers(struct mci *mci)
+{
+	int kill_task;
+	u64 tmpclock;
+	u64 zero;
+	void *fpt_save_area, *fpt_creg_save_area;
+
+	kill_task = 0;
+	zero = 0;
+	/* General purpose registers */
+	if (!mci->gr)
+		/*
+		 * General purpose registers couldn't be restored and have
+		 * unknown contents. Process needs to be terminated.
+		 */
+		kill_task = 1;
+
+	/* Revalidate floating point registers */
+	if (!mci->fp)
+		/*
+		 * Floating point registers can't be restored and
+		 * therefore the process needs to be terminated.
+		 */
+		kill_task = 1;
+
+#ifndef __s390x__
+	asm volatile("ld 0,0(%0)\n"
+		     "ld 2,8(%0)\n"
+		     "ld 4,16(%0)\n"
+		     "ld 6,24(%0)"
+		     : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+	if (MACHINE_HAS_IEEE) {
+#ifdef __s390x__
+		fpt_save_area = &S390_lowcore.floating_pt_save_area;
+		fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+		fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+		fpt_creg_save_area = fpt_save_area+128;
+#endif
+		/* Floating point control register */
+		if (!mci->fc) {
+			/*
+			 * Floating point control register can't be restored.
+			 * Task will be terminated.
+			 */
+			asm volatile ("lfpc 0(%0)" : : "a" (&zero));
+			kill_task = 1;
+
+		}
+		else
+			asm volatile (
+				"lfpc 0(%0)"
+				: : "a" (fpt_creg_save_area));
+
+		asm volatile("ld  0,0(%0)\n"
+			     "ld  1,8(%0)\n"
+			     "ld  2,16(%0)\n"
+			     "ld  3,24(%0)\n"
+			     "ld  4,32(%0)\n"
+			     "ld  5,40(%0)\n"
+			     "ld  6,48(%0)\n"
+			     "ld  7,56(%0)\n"
+			     "ld  8,64(%0)\n"
+			     "ld  9,72(%0)\n"
+			     "ld 10,80(%0)\n"
+			     "ld 11,88(%0)\n"
+			     "ld 12,96(%0)\n"
+			     "ld 13,104(%0)\n"
+			     "ld 14,112(%0)\n"
+			     "ld 15,120(%0)\n"
+			     : : "a" (fpt_save_area));
+	}
+
+	/* Revalidate access registers */
+	asm volatile("lam 0,15,0(%0)"
+		     : : "a" (&S390_lowcore.access_regs_save_area));
+	if (!mci->ar)
+		/*
+		 * Access registers have unknown contents.
+		 * Terminating task.
+		 */
+		kill_task = 1;
+
+	/* Revalidate control registers */
+	if (!mci->cr)
+		/*
+		 * Control registers have unknown contents.
+		 * Can't recover and therefore stopping machine.
+		 */
+		s390_handle_damage("invalid control registers.");
+	else
+#ifdef __s390x__
+		asm volatile("lctlg 0,15,0(%0)"
+			     : : "a" (&S390_lowcore.cregs_save_area));
+#else
+		asm volatile("lctl 0,15,0(%0)"
+			     : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+
+	/*
+	 * We don't even try to revalidate the TOD register, since we simply
+	 * can't write something sensible into that register.
+	 */
+
+#ifdef __s390x__
+	/*
+	 * See if we can revalidate the TOD programmable register with its
+	 * old contents (should be zero) otherwise set it to zero.
+	 */
+	if (!mci->pr)
+		asm volatile("sr 0,0\n"
+			     "sckpf"
+			     : : : "0", "cc");
+	else
+		asm volatile(
+			"l 0,0(%0)\n"
+			"sckpf"
+			: : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
+#endif
+
+	/* Revalidate clock comparator register */
+	asm volatile ("stck 0(%1)\n"
+		      "sckc 0(%1)"
+		      : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+
+	/* Check if old PSW is valid */
+	if (!mci->wp)
+		/*
+		 * Can't tell if we come from user or kernel mode
+		 * -> stopping machine.
+		 */
+		s390_handle_damage("old psw invalid.");
+
+	if (!mci->ms || !mci->pm || !mci->ia)
+		kill_task = 1;
+
+	return kill_task;
+}
+
+/*
+ * machine check handler.
+ */
+void
+s390_do_machine_check(struct pt_regs *regs)
+{
+	struct mci *mci;
+	struct mcck_struct *mcck;
+	int umode;
+
+	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	mcck = &__get_cpu_var(cpu_mcck);
+	umode = user_mode(regs);
+
+	if (mci->sd)
+		/* System damage -> stopping machine */
+		s390_handle_damage("received system damage machine check.");
+
+	if (mci->pd) {
+		if (mci->b) {
+			/* Processing backup -> verify if we can survive this */
+			u64 z_mcic, o_mcic, t_mcic;
+#ifdef __s390x__
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+				  1ULL<<16);
+#else
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+				  1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+			t_mcic = *(u64 *)mci;
+
+			if (((t_mcic & z_mcic) != 0) ||
+			    ((t_mcic & o_mcic) != o_mcic)) {
+				s390_handle_damage("processing backup machine "
+						   "check with damage.");
+			}
+			if (!umode)
+				s390_handle_damage("processing backup machine "
+						   "check in kernel mode.");
+			mcck->kill_task = 1;
+			mcck->mcck_code = *(unsigned long long *) mci;
+		}
+		else {
+			/* Processing damage -> stopping machine */
+			s390_handle_damage("received instruction processing "
+					   "damage machine check.");
+		}
+	}
+	if (s390_revalidate_registers(mci)) {
+		if (umode) {
+			/*
+			 * Couldn't restore all register contents while in
+			 * user mode -> mark task for termination.
+			 */
+			mcck->kill_task = 1;
+			mcck->mcck_code = *(unsigned long long *) mci;
+			set_thread_flag(TIF_MCCK_PENDING);
+		}
+		else
+			/*
+			 * Couldn't restore all register contents while in
+			 * kernel mode -> stopping machine.
+			 */
+			s390_handle_damage("unable to revalidate registers.");
+	}
+
+	if (mci->se)
+		/* Storage error uncorrected */
+		s390_handle_damage("received storage error uncorrected "
+				   "machine check.");
+
+	if (mci->ke)
+		/* Storage key-error uncorrected */
+		s390_handle_damage("received storage key-error uncorrected "
+				   "machine check.");
+
+	if (mci->ds && mci->fa)
+		/* Storage degradation */
+		s390_handle_damage("received storage degradation machine "
+				   "check.");
+
+	if (mci->cp) {
+		/* Channel report word pending */
+		mcck->channel_report = 1;
+		set_thread_flag(TIF_MCCK_PENDING);
+	}
+
+	if (mci->w) {
+		/* Warning pending */
+		mcck->warning = 1;
+		set_thread_flag(TIF_MCCK_PENDING);
+	}
 }
 
 /*
@@ -189,9 +443,8 @@
 machine_check_init(void)
 {
 	init_MUTEX_LOCKED(&m_sem);
-	ctl_clear_bit(14, 25);	/* disable damage MCH */
-	ctl_set_bit(14, 26);	/* enable degradation MCH */
-	ctl_set_bit(14, 27);	/* enable system recovery MCH */
+	ctl_clear_bit(14, 25);	/* disable external damage MCH */
+	ctl_set_bit(14, 27);    /* enable system recovery MCH */
 #ifdef CONFIG_MACHCHK_WARNING
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 #endif
diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h
index 7e26f0f..4eaa701 100644
--- a/drivers/s390/s390mach.h
+++ b/drivers/s390/s390mach.h
@@ -16,20 +16,45 @@
 	__u32   sd              :  1; /* 00 system damage */
 	__u32   pd              :  1; /* 01 instruction-processing damage */
 	__u32   sr              :  1; /* 02 system recovery */
-	__u32   to_be_defined_1 :  4; /* 03-06 */
+	__u32   to_be_defined_1 :  1; /* 03 */
+	__u32   cd              :  1; /* 04 timing-facility damage */
+	__u32   ed              :  1; /* 05 external damage */
+	__u32   to_be_defined_2 :  1; /* 06 */
 	__u32   dg              :  1; /* 07 degradation */
 	__u32   w               :  1; /* 08 warning pending */
 	__u32   cp              :  1; /* 09 channel-report pending */
-	__u32   to_be_defined_2 :  6; /* 10-15 */
+	__u32   sp              :  1; /* 10 service-processor damage */
+	__u32   ck              :  1; /* 11 channel-subsystem damage */
+	__u32   to_be_defined_3 :  2; /* 12-13 */
+	__u32   b               :  1; /* 14 backed up */
+	__u32   to_be_defined_4 :  1; /* 15 */
 	__u32   se              :  1; /* 16 storage error uncorrected */
 	__u32   sc              :  1; /* 17 storage error corrected */
 	__u32   ke              :  1; /* 18 storage-key error uncorrected */
 	__u32   ds              :  1; /* 19 storage degradation */
-	__u32	to_be_defined_3 :  4; /* 20-23 */
+	__u32   wp              :  1; /* 20 psw mwp validity */
+	__u32   ms              :  1; /* 21 psw mask and key validity */
+	__u32   pm              :  1; /* 22 psw program mask and cc validity */
+	__u32   ia              :  1; /* 23 psw instruction address validity */
 	__u32   fa              :  1; /* 24 failing storage address validity */
-	__u32   to_be_defined_4 :  7; /* 25-31 */
+	__u32   to_be_defined_5 :  1; /* 25 */
+	__u32   ec              :  1; /* 26 external damage code validity */
+	__u32   fp              :  1; /* 27 floating point register validity */
+	__u32   gr              :  1; /* 28 general register validity */
+	__u32   cr              :  1; /* 29 control register validity */
+	__u32   to_be_defined_6 :  1; /* 30 */
+	__u32   st              :  1; /* 31 storage logical validity */
 	__u32   ie              :  1; /* 32 indirect storage error */
-	__u32	to_be_defined_5 : 31; /* 33-63 */
+	__u32   ar              :  1; /* 33 access register validity */
+	__u32   da              :  1; /* 34 delayed access exception */
+	__u32   to_be_defined_7 :  7; /* 35-41 */
+	__u32   pr              :  1; /* 42 tod programmable register validity */
+	__u32   fc              :  1; /* 43 fp control register validity */
+	__u32   ap              :  1; /* 44 ancillary report */
+	__u32   to_be_defined_8 :  1; /* 45 */
+	__u32   ct              :  1; /* 46 cpu timer validity */
+	__u32   cc              :  1; /* 47 clock comparator validity */
+	__u32	to_be_defined_9 : 16; /* 47-63 */
 };
 
 /*