[PATCH] kexec: s390 support

Add kexec support for s390 architecture.

From: Milton Miller <miltonm@bga.com>

- Fix passing of first argument to relocate_kernel assembly.
- Fix Kconfig description.
- Remove wrong comment and comments that describe obvious things.
- Allow only KEXEC_TYPE_DEFAULT as image type -> dump not supported.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 32696c1..6600ee8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -455,6 +455,14 @@
 	  The HZ timer is switched off in idle by default. That means the
 	  HZ timer is already disabled at boot time.
 
+config KEXEC
+	bool "kexec system call (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but is independent of hardware/microcode support.
+
 endmenu
 
 config PCMCIA
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index b41e0e1..ab1e49d2 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -25,6 +25,16 @@
 
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
 
+# Kexec part
+S390_KEXEC_OBJS := machine_kexec.o crash.o
+ifeq ($(CONFIG_ARCH_S390X),y)
+S390_KEXEC_OBJS += relocate_kernel64.o
+else
+S390_KEXEC_OBJS += relocate_kernel.o
+endif
+obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS)
+
+
 #
 # This is just to get the dependencies...
 #
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 7a607b1..bf52973 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1441,3 +1441,11 @@
 	lgfr	%r5,%r5			# int
 	llgtr	%r6,%r6			# struct rusage_emu31 *
 	jg	compat_sys_waitid
+
+	.globl	compat_sys_kexec_load_wrapper
+compat_sys_kexec_load_wrapper:
+	llgfr	%r2,%r2			# unsigned long
+	llgfr	%r3,%r3			# unsigned long
+	llgtr	%r4,%r4			# struct kexec_segment *
+	llgfr	%r5,%r5			# unsigned long
+	jg	compat_sys_kexec_load
diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c
new file mode 100644
index 0000000..db38283
--- /dev/null
+++ b/arch/s390/kernel/crash.c
@@ -0,0 +1,17 @@
+/*
+ * arch/s390/kernel/crash.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/threads.h>
+#include <linux/kexec.h>
+
+note_buf_t crash_notes[NR_CPUS];
+
+void machine_crash_shutdown(void)
+{
+}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 0000000..7a94db7
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,98 @@
+/*
+ * arch/s390/kernel/machine_kexec.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+/*
+ * s390_machine_kexec.c - handle the transition of Linux booting another kernel
+ * on the S390 architecture.
+ */
+
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/system.h>
+
+static void kexec_halt_all_cpus(void *);
+
+typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long);
+
+const extern unsigned char relocate_kernel[];
+const extern unsigned long long relocate_kernel_len;
+
+int
+machine_kexec_prepare(struct kimage *image)
+{
+	unsigned long reboot_code_buffer;
+
+	/* We don't support anything but the default image type for now. */
+	if (image->type != KEXEC_TYPE_DEFAULT)
+		return -EINVAL;
+
+	/* Get the destination where the assembler code should be copied to.*/
+	reboot_code_buffer = page_to_pfn(image->control_code_page)<<PAGE_SHIFT;
+
+	/* Then copy it */
+	memcpy((void *) reboot_code_buffer, relocate_kernel,
+	       relocate_kernel_len);
+	return 0;
+}
+
+void
+machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void
+machine_shutdown(void)
+{
+	printk(KERN_INFO "kexec: machine_shutdown called\n");
+}
+
+NORET_TYPE void
+machine_kexec(struct kimage *image)
+{
+	clear_all_subchannels();
+
+	/* Disable lowcore protection */
+	ctl_clear_bit(0,28);
+
+	on_each_cpu(kexec_halt_all_cpus, image, 0, 0);
+	for(;;);
+}
+
+static void
+kexec_halt_all_cpus(void *kernel_image)
+{
+	static atomic_t cpuid = ATOMIC_INIT(-1);
+	int cpu;
+	struct kimage *image;
+	relocate_kernel_t data_mover;
+
+	if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid))
+		signal_processor(smp_processor_id(), sigp_stop);
+
+	/* Wait for all other cpus to enter stopped state */
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		while(!smp_cpu_not_running(cpu))
+			cpu_relax();
+	}
+
+	image = (struct kimage *) kernel_image;
+	data_mover = (relocate_kernel_t)
+		(page_to_pfn(image->control_code_page) << PAGE_SHIFT);
+
+	/* Call the moving routine */
+	(*data_mover) (&image->head, image->start);
+}
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 0000000..d5e4a62
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,81 @@
+/*
+ * arch/s390/kernel/relocate_kernel.S
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ * %r10 is a page mask
+ */
+
+	.text
+	.globl		relocate_kernel
+	relocate_kernel:
+		basr	%r13,0		#base address
+	.base:
+		spx	zero64-.base(%r13)	#absolute addressing mode
+		stnsm	sys_msk-.base(%r13),0xf8	#disable DAT and IRQ (external)
+		lhi	%r10,-1		#preparing the mask
+		sll	%r10,12		#shift it such that it becomes 0xf000
+	.top:
+		lhi	%r7,4096	#load PAGE_SIZE in r7
+		lhi	%r9,4096	#load PAGE_SIZE in r9
+		l	%r5,0(%r2)	#read another word for indirection page
+		ahi	%r2,4		#increment pointer
+		tml	%r5,0x1		#is it a destination page?
+		je	.indir_check	#NO, goto "indir_check"
+		lr	%r6,%r5		#r6 = r5
+		nr	%r6,%r10	#mask it out and...
+		j	.top		#...next iteration
+	.indir_check:
+		tml	%r5,0x2		#is it a indirection page?
+		je	.done_test	#NO, goto "done_test"
+		nr	%r5,%r10	#YES, mask out,
+		lr	%r2,%r5		#move it into the right register,
+		j	.top		#and read next...
+	.done_test:
+		tml	%r5,0x4		#is it the done indicator?
+		je	.source_test	#NO! Well, then it should be the source indicator...
+		j	.done		#ok, lets finish it here...
+	.source_test:
+		tml	%r5,0x8		#it should be a source indicator...
+		je	.top		#NO, ignore it...
+		lr	%r8,%r5		#r8 = r5
+		nr	%r8,%r10	#masking
+	0:	mvcle	%r6,%r8,0x0	#copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+		jo	0b
+		j	.top
+	.done:
+		sr	%r0,%r0		#clear register r0
+		la	%r4,load_psw-.base(%r13)	#load psw-address into the register
+		o	%r3,4(%r4)	#or load address into psw
+		st	%r3,4(%r4)
+		mvc	0(8,%r0),0(%r4)	#copy psw to absolute address 0
+		sr	%r1,%r1		#clear %r1
+		sr	%r2,%r2		#clear %r2
+		sigp	%r1,%r2,0x12	#set cpuid to zero
+		lpsw	0		#hopefully start new kernel...
+
+		.align	8
+	zero64:
+		.quad	0
+	load_psw:
+		.long	0x00080000,0x80000000
+	sys_msk:
+		.quad	0
+	relocate_kernel_end:
+	.globl	relocate_kernel_len
+	relocate_kernel_len:
+		.quad	relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S
new file mode 100644
index 0000000..96290cc
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel64.S
@@ -0,0 +1,82 @@
+/*
+ * arch/s390/kernel/relocate_kernel64.S
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+	.text
+	.globl		relocate_kernel
+	relocate_kernel:
+		basr	%r13,0		#base address
+	.base:
+		spx	zero64-.base(%r13)	#absolute addressing mode
+		stnsm	sys_msk-.base(%r13),0xf8	#disable DAT and IRQ (external)
+	.top:
+		lghi	%r7,4096	#load PAGE_SIZE in r7
+		lghi	%r9,4096	#load PAGE_SIZE in r9
+		lg	%r5,0(%r2)	#read another word for indirection page
+		aghi	%r2,8		#increment pointer
+		tml	%r5,0x1		#is it a destination page?
+		je	.indir_check	#NO, goto "indir_check"
+		lgr	%r6,%r5		#r6 = r5
+		nill	%r6,0xf000	#mask it out and...
+		j	.top		#...next iteration
+	.indir_check:
+		tml     %r5,0x2		#is it a indirection page?
+		je      .done_test	#NO, goto "done_test"
+		nill    %r5,0xf000	#YES, mask out,
+		lgr     %r2,%r5		#move it into the right register,
+		j       .top		#and read next...
+	.done_test:
+		tml     %r5,0x4		#is it the done indicator?
+		je      .source_test	#NO! Well, then it should be the source indicator...
+		j       .done		#ok, lets finish it here...
+	.source_test:
+		tml     %r5,0x8		#it should be a source indicator...
+		je      .top		#NO, ignore it...
+		lgr     %r8,%r5		#r8 = r5
+		nill    %r8,0xf000	#masking
+	0:	mvcle   %r6,%r8,0x0	#copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+		jo	0b
+		j       .top
+	.done:
+		sgr     %r0,%r0		#clear register r0
+		la      %r4,load_psw-.base(%r13)	#load psw-address into the register
+		o	%r3,4(%r4)	#or load address into psw
+		st	%r3,4(%r4)
+		mvc     0(8,%r0),0(%r4)	#copy psw to absolute address 0
+		sam31			#31 bit mode
+		sr      %r1,%r1		#erase register r1
+		sr      %r2,%r2		#erase register r2
+		sigp    %r1,%r2,0x12	#set cpuid to zero
+		lpsw    0		#hopefully start new kernel...
+
+	        .align	8
+	zero64:
+		.quad	0
+	load_psw:
+		.long	0x00080000,0x80000000
+	sys_msk:
+		.quad	0
+	relocate_kernel_end:
+	.globl	relocate_kernel_len
+	relocate_kernel_len:
+		.quad	relocate_kernel_end - relocate_kernel
+
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 5159386..a8668af 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -285,7 +285,7 @@
 SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper)
 SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */
 SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper)
-NI_SYSCALL							/* reserved for kexec */
+SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper)
 SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper)
 SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper)
 SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl)		/* 280 */