[S390] smp: delay idle task creation

Delay idle task creation until a cpu gets set online instead of
creating them for all possible cpus at system startup.
For one cpu system this should safe more than 1 MB.
On my debug system with lots of debug stuff enabled this saves 2 MB.

Same as on x86.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a9702df..8e84b5a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -23,6 +23,7 @@
 #define KMSG_COMPONENT "cpu"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/workqueue.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -477,18 +478,20 @@
 	return 0;
 }
 
-static void __init smp_create_idle(unsigned int cpu)
-{
-	struct task_struct *p;
+struct create_idle {
+	struct work_struct work;
+	struct task_struct *idle;
+	struct completion done;
+	int cpu;
+};
 
-	/*
-	 *  don't care about the psw and regs settings since we'll never
-	 *  reschedule the forked task.
-	 */
-	p = fork_idle(cpu);
-	if (IS_ERR(p))
-		panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
-	current_set[cpu] = p;
+static void __cpuinit smp_fork_idle(struct work_struct *work)
+{
+	struct create_idle *c_idle;
+
+	c_idle = container_of(work, struct create_idle, work);
+	c_idle->idle = fork_idle(c_idle->cpu);
+	complete(&c_idle->done);
 }
 
 static int __cpuinit smp_alloc_lowcore(int cpu)
@@ -552,6 +555,7 @@
 int __cpuinit __cpu_up(unsigned int cpu)
 {
 	struct _lowcore *cpu_lowcore;
+	struct create_idle c_idle;
 	struct task_struct *idle;
 	struct stack_frame *sf;
 	u32 lowcore;
@@ -559,6 +563,18 @@
 
 	if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
 		return -EIO;
+	idle = current_set[cpu];
+	if (!idle) {
+		c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done);
+		INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle);
+		c_idle.cpu = cpu;
+		schedule_work(&c_idle.work);
+		wait_for_completion(&c_idle.done);
+		if (IS_ERR(c_idle.idle))
+			return PTR_ERR(c_idle.idle);
+		idle = c_idle.idle;
+		current_set[cpu] = c_idle.idle;
+	}
 	if (smp_alloc_lowcore(cpu))
 		return -ENOMEM;
 	do {
@@ -573,7 +589,6 @@
 	while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy)
 		udelay(10);
 
-	idle = current_set[cpu];
 	cpu_lowcore = lowcore_ptr[cpu];
 	cpu_lowcore->kernel_stack = (unsigned long)
 		task_stack_page(idle) + THREAD_SIZE;
@@ -685,7 +700,6 @@
 #endif
 	unsigned long async_stack, panic_stack;
 	struct _lowcore *lowcore;
-	unsigned int cpu;
 
 	smp_detect_cpus();
 
@@ -720,9 +734,6 @@
 	if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore))
 		BUG();
 #endif
-	for_each_possible_cpu(cpu)
-		if (cpu != smp_processor_id())
-			smp_create_idle(cpu);
 }
 
 void __init smp_prepare_boot_cpu(void)