sparc32: switch to generic kernel_thread()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e47eb32..edc4ede 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -40,6 +40,7 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
 
 config SPARC32
 	def_bool !64BIT
@@ -74,7 +75,6 @@
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_C_RECORDMCOUNT
 	select NO_BOOTMEM
-	select GENERIC_KERNEL_THREAD
 	select GENERIC_KERNEL_EXECVE
 
 config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index f74ac9e..c1e0191 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -106,7 +106,6 @@
 
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while(0)
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long get_wchan(struct task_struct *);
 
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index dc08970..6114672 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -983,6 +983,16 @@
 	b	ret_sys_call
 	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
 
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	call	schedule_tail
+	 ld	[%g3 + TI_TASK], %o0
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %l0
+	call	%l0
+	 ld	[%sp + STACKFRAME_SZ + PT_G2], %o0
+	call	do_exit	/* won't return */
+	 clr	%o0
+
 	/* Linux native system calls enter here... */
 	.align	4
 	.globl	linux_sparc_syscall
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 487bffb..7276435 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -316,9 +316,10 @@
  * XXX See comment above sys_vfork in sparc64. todo.
  */
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct thread_info *ti = task_thread_info(p);
@@ -336,16 +337,13 @@
 	}
 
 	/*
-	 *  p->thread_info         new_stack   childregs
-	 *  !                      !           !             {if(PSR_PS) }
-	 *  V                      V (stk.fr.) V  (pt_regs)  { (stk.fr.) }
-	 *  +----- - - - - - ------+===========+============={+==========}+
+	 *  p->thread_info         new_stack   childregs stack bottom
+	 *  !                      !           !             !
+	 *  V                      V (stk.fr.) V  (pt_regs)  V
+	 *  +----- - - - - - ------+===========+=============+
 	 */
 	new_stack = task_stack_page(p) + THREAD_SIZE;
-	if (regs->psr & PSR_PS)
-		new_stack -= STACKFRAME_SZ;
 	new_stack -= STACKFRAME_SZ + TRACEREG_SZ;
-	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
 	childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ);
 
 	/*
@@ -356,55 +354,58 @@
 	 * Thus, kpsr|=PSR_PIL.
 	 */
 	ti->ksp = (unsigned long) new_stack;
+	p->thread.kregs = childregs;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		extern int nwindows;
+		unsigned long psr;
+		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
+		p->thread.flags |= SPARC_FLAG_KTHREAD;
+		p->thread.current_ds = KERNEL_DS;
+		ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
+		childregs->u_regs[UREG_G1] = sp; /* function */
+		childregs->u_regs[UREG_G2] = arg;
+		psr = childregs->psr = get_psr();
+		ti->kpsr = psr | PSR_PIL;
+		ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows);
+		return 0;
+	}
+	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
+	childregs->u_regs[UREG_FP] = sp;
+	p->thread.flags &= ~SPARC_FLAG_KTHREAD;
+	p->thread.current_ds = USER_DS;
 	ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
 	ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
 	ti->kwim = current->thread.fork_kwim;
 
-	if(regs->psr & PSR_PS) {
-		extern struct pt_regs fake_swapper_regs;
+	if (sp != regs->u_regs[UREG_FP]) {
+		struct sparc_stackf __user *childstack;
+		struct sparc_stackf __user *parentstack;
 
-		p->thread.kregs = &fake_swapper_regs;
-		new_stack += STACKFRAME_SZ + TRACEREG_SZ;
-		childregs->u_regs[UREG_FP] = (unsigned long) new_stack;
-		p->thread.flags |= SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = KERNEL_DS;
-		memcpy(new_stack, (void *)regs->u_regs[UREG_FP], STACKFRAME_SZ);
-		childregs->u_regs[UREG_G6] = (unsigned long) ti;
-	} else {
-		p->thread.kregs = childregs;
-		childregs->u_regs[UREG_FP] = sp;
-		p->thread.flags &= ~SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = USER_DS;
-
-		if (sp != regs->u_regs[UREG_FP]) {
-			struct sparc_stackf __user *childstack;
-			struct sparc_stackf __user *parentstack;
-
-			/*
-			 * This is a clone() call with supplied user stack.
-			 * Set some valid stack frames to give to the child.
-			 */
-			childstack = (struct sparc_stackf __user *)
-				(sp & ~0xfUL);
-			parentstack = (struct sparc_stackf __user *)
-				regs->u_regs[UREG_FP];
+		/*
+		 * This is a clone() call with supplied user stack.
+		 * Set some valid stack frames to give to the child.
+		 */
+		childstack = (struct sparc_stackf __user *)
+			(sp & ~0xfUL);
+		parentstack = (struct sparc_stackf __user *)
+			regs->u_regs[UREG_FP];
 
 #if 0
-			printk("clone: parent stack:\n");
-			show_stackframe(parentstack);
+		printk("clone: parent stack:\n");
+		show_stackframe(parentstack);
 #endif
 
-			childstack = clone_stackframe(childstack, parentstack);
-			if (!childstack)
-				return -EFAULT;
+		childstack = clone_stackframe(childstack, parentstack);
+		if (!childstack)
+			return -EFAULT;
 
 #if 0
-			printk("clone: child stack:\n");
-			show_stackframe(childstack);
+		printk("clone: child stack:\n");
+		show_stackframe(childstack);
 #endif
 
-			childregs->u_regs[UREG_FP] = (unsigned long)childstack;
-		}
+		childregs->u_regs[UREG_FP] = (unsigned long)childstack;
 	}
 
 #ifdef CONFIG_SMP
@@ -503,41 +504,6 @@
 	return error;
 }
 
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-
-	__asm__ __volatile__("mov %4, %%g2\n\t"    /* Set aside fn ptr... */
-			     "mov %5, %%g3\n\t"    /* and arg. */
-			     "mov %1, %%g1\n\t"
-			     "mov %2, %%o0\n\t"    /* Clone flags. */
-			     "mov 0, %%o1\n\t"     /* usp arg == 0 */
-			     "t 0x10\n\t"          /* Linux/Sparc clone(). */
-			     "cmp %%o1, 0\n\t"
-			     "be 1f\n\t"           /* The parent, just return. */
-			     " nop\n\t"            /* Delay slot. */
-			     "jmpl %%g2, %%o7\n\t" /* Call the function. */
-			     " mov %%g3, %%o0\n\t" /* Get back the arg in delay. */
-			     "mov %3, %%g1\n\t"
-			     "t 0x10\n\t"          /* Linux/Sparc exit(). */
-			     /* Notreached by child. */
-			     "1: mov %%o0, %0\n\t" :
-			     "=r" (retval) :
-			     "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
-			     "i" (__NR_exit),  "r" (fn), "r" (arg) :
-			     "g1", "g2", "g3", "o0", "o1", "memory", "cc");
-	return retval;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;