tile: support GENERIC_KERNEL_THREAD and GENERIC_KERNEL_EXECVE

Also provide an optimized current_pt_regs() while we're at it.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 875d008..ea7f61e 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -21,6 +21,8 @@
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
index 1d48c5f..b8f888c 100644
--- a/arch/tile/include/asm/switch_to.h
+++ b/arch/tile/include/asm/switch_to.h
@@ -68,7 +68,10 @@
 /* Support function for forking a new task. */
 void ret_from_fork(void);
 
-/* Called from ret_from_fork() when a new process starts up. */
+/* Support function for forking a new kernel thread. */
+void ret_from_kernel_thread(void *fn, void *arg);
+
+/* Called from ret_from_xxx() when a new process starts up. */
 struct task_struct *sim_notify_fork(struct task_struct *prev);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index c31637b..f116cb0 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -28,17 +28,6 @@
 	STD_ENDPROC(current_text_addr)
 
 /*
- * Implement execve().  The i386 code has a note that forking from kernel
- * space results in no copy on write until the execve, so we should be
- * careful not to write to the stack here.
- */
-STD_ENTRY(kernel_execve)
-	moveli TREG_SYSCALL_NR_NAME, __NR_execve
-	swint1
-	jrp lr
-	STD_ENDPROC(kernel_execve)
-
-/*
  * We don't run this function directly, but instead copy it to a page
  * we map into every user process.  See vdso_setup().
  *
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 6943515..58aad519 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1291,6 +1291,21 @@
 	}
 	STD_ENDPROC(ret_from_fork)
 
+STD_ENTRY(ret_from_kernel_thread)
+	jal     sim_notify_fork
+	jal     schedule_tail
+	FEEDBACK_REENTER(ret_from_fork)
+	{
+	 move   r0, r31
+	 jalr   r30
+	}
+	FEEDBACK_REENTER(ret_from_kernel_thread)
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
+	STD_ENDPROC(ret_from_kernel_thread)
+
 	/*
 	 * Code for ill interrupt.
 	 */
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 73f6c0a..f66bd5f 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1150,6 +1150,21 @@
 	}
 	STD_ENDPROC(ret_from_fork)
 
+STD_ENTRY(ret_from_kernel_thread)
+	jal     sim_notify_fork
+	jal     schedule_tail
+	FEEDBACK_REENTER(ret_from_fork)
+	{
+	 move   r0, r31
+	 jalr   r30
+	}
+	FEEDBACK_REENTER(ret_from_kernel_thread)
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
+	STD_ENDPROC(ret_from_kernel_thread)
+
 /* Various stub interrupt handlers and syscall handlers */
 
 STD_ENTRY_LOCAL(_kernel_double_fault)
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 9dc1391..da6e4d7 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -157,24 +157,44 @@
 static void save_arch_state(struct thread_struct *t);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long stack_size,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 	unsigned long ksp;
+	unsigned long *callee_regs;
 
 	/*
-	 * When creating a new kernel thread we pass sp as zero.
-	 * Assign it to a reasonable value now that we have the stack.
+	 * Set up the stack and stack pointer appropriately for the
+	 * new child to find itself woken up in __switch_to().
+	 * The callee-saved registers must be on the stack to be read;
+	 * the new task will then jump to assembly support to handle
+	 * calling schedule_tail(), etc., and (for userspace tasks)
+	 * returning to the context set up in the pt_regs.
 	 */
-	if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0))
-		sp = KSTK_TOP(p);
+	ksp = (unsigned long) childregs;
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* interrupt-entry save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
+	callee_regs = (unsigned long *)ksp;
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* __switch_to() save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	p->thread.ksp = ksp;
 
-	/*
-	 * Do not clone step state from the parent; each thread
-	 * must make its own lazily.
-	 */
-	task_thread_info(p)->step_state = NULL;
+	/* Record the pid of the task that created this one. */
+	p->thread.creator_pid = current->pid;
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		memset(&callee_regs[2], 0,
+		       (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
+		callee_regs[0] = sp;   /* r30 = function */
+		callee_regs[1] = arg;  /* r31 = arg */
+		childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		return 0;
+	}
 
 	/*
 	 * Start new thread in ret_from_fork so it schedules properly
@@ -182,20 +202,24 @@
 	 */
 	p->thread.pc = (unsigned long) ret_from_fork;
 
+	/*
+	 * Do not clone step state from the parent; each thread
+	 * must make its own lazily.
+	 */
+	task_thread_info(p)->step_state = NULL;
+
 	/* Save user stack top pointer so we can ID the stack vm area later. */
 	p->thread.usp0 = sp;
 
-	/* Record the pid of the process that created this one. */
-	p->thread.creator_pid = current->pid;
-
 	/*
 	 * Copy the registers onto the kernel stack so the
 	 * return-from-interrupt code will reload it into registers.
 	 */
-	childregs = task_pt_regs(p);
 	*childregs = *regs;
 	childregs->regs[0] = 0;         /* return value is zero */
 	childregs->sp = sp;  /* override with new user stack pointer */
+	memcpy(callee_regs, &regs->regs[CALLEE_SAVED_FIRST_REG],
+	       CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
 
 	/*
 	 * If CLONE_SETTLS is set, set "tp" in the new task to "r4",
@@ -204,24 +228,6 @@
 	if (clone_flags & CLONE_SETTLS)
 		childregs->tp = regs->regs[4];
 
-	/*
-	 * Copy the callee-saved registers from the passed pt_regs struct
-	 * into the context-switch callee-saved registers area.
-	 * This way when we start the interrupt-return sequence, the
-	 * callee-save registers will be correctly in registers, which
-	 * is how we assume the compiler leaves them as we start doing
-	 * the normal return-from-interrupt path after calling C code.
-	 * Zero out the C ABI save area to mark the top of the stack.
-	 */
-	ksp = (unsigned long) childregs;
-	ksp -= C_ABI_SAVE_AREA_SIZE;   /* interrupt-entry save area */
-	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
-	ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
-	memcpy((void *)ksp, &regs->regs[CALLEE_SAVED_FIRST_REG],
-	       CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
-	ksp -= C_ABI_SAVE_AREA_SIZE;   /* __switch_to() save area */
-	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
-	p->thread.ksp = ksp;
 
 #if CHIP_HAS_TILE_DMA()
 	/*
@@ -650,37 +656,6 @@
 	return 0;
 }
 
-/*
- * We pass in lr as zero (cleared in kernel_thread) and the caller
- * part of the backtrace ABI on the stack also zeroed (in copy_thread)
- * so that backtraces will stop with this function.
- * Note that we don't use r0, since copy_thread() clears it.
- */
-static void start_kernel_thread(int dummy, int (*fn)(int), int arg)
-{
-	do_exit(fn(arg));
-}
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0);  /* run at kernel PL, no ICS */
-	regs.pc = (long) start_kernel_thread;
-	regs.flags = PT_FLAGS_CALLER_SAVES;   /* need to restore r1 and r2 */
-	regs.regs[1] = (long) fn;             /* function pointer */
-	regs.regs[2] = (long) arg;            /* parameter register */
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs,
-		       0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 /* Flush thread state. */
 void flush_thread(void)
 {