Merge commit '517ffce4e1a03aea979fe3a18a3dd1761a24fafb' into arch-sparc

Backmerge from the point in mainline where a trivial conflict had been
introduced (arch/sparc/kernel/sys_sparc_64.c had grown sys_kern_features()
right after where kernel_execve() used to be)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index b6b442b..e52f3c2a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -40,6 +40,8 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index f74ac9e..c1e0191 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -106,7 +106,6 @@
 
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while(0)
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long get_wchan(struct task_struct *);
 
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 4e5a483..0305d56 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -94,6 +94,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/fpumacro.h>
 
 /* Return saved PC of a blocked thread. */
 struct task_struct;
@@ -143,6 +144,10 @@
 	: \
 	: "r" (regs), "r" (sp - sizeof(struct reg_window) - STACK_BIAS), \
 	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
 } while (0)
 
 #define start_thread32(regs, pc, sp) \
@@ -183,13 +188,15 @@
 	: \
 	: "r" (regs), "r" (sp - sizeof(struct reg_window32)), \
 	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
 } while (0)
 
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while (0)
 
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long get_wchan(struct task_struct *task);
 
 #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index da43bdc..bdfafd7 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -32,6 +32,9 @@
 #define arch_ptrace_stop(exit_code, info) \
 	synchronize_user_stack()
 
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
 struct global_reg_snapshot {
 	unsigned long		tstate;
 	unsigned long		tpc;
@@ -55,9 +58,7 @@
 
 extern union global_cpu_snapshot global_cpu_snapshot[NR_CPUS];
 
-#define force_successful_syscall_return()	    \
-do {	current_thread_info()->syscall_noerror = 1; \
-} while (0)
+#define force_successful_syscall_return() set_thread_noerror(1)
 #define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
 #define instruction_pointer(regs) ((regs)->tpc)
 #define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
@@ -100,6 +101,9 @@
 #define arch_ptrace_stop(exit_code, info) \
 	synchronize_user_stack()
 
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
 #define user_mode(regs) (!((regs)->psr & PSR_PS))
 #define instruction_pointer(regs) ((regs)->pc)
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h
index 7923c4a..cad36f5 100644
--- a/arch/sparc/include/asm/switch_to_64.h
+++ b/arch/sparc/include/asm/switch_to_64.h
@@ -23,7 +23,7 @@
 	/* If you are tempted to conditionalize the following */	\
 	/* so that ASI is only written if it changes, think again. */	\
 	__asm__ __volatile__("wr %%g0, %0, %%asi"			\
-	: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+	: : "r" (task_thread_info(next)->current_ds));\
 	trap_block[current_thread_info()->cpu].thread =			\
 		task_thread_info(next);					\
 	__asm__ __volatile__(						\
diff --git a/arch/sparc/include/asm/syscalls.h b/arch/sparc/include/asm/syscalls.h
index 45a43f6..bf8972a 100644
--- a/arch/sparc/include/asm/syscalls.h
+++ b/arch/sparc/include/asm/syscalls.h
@@ -8,6 +8,4 @@
 				     struct pt_regs *regs,
 				     unsigned long stack_size);
 
-extern asmlinkage int sparc_execve(struct pt_regs *regs);
-
 #endif /* _SPARC64_SYSCALLS_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index a3fe4dc..269bd92 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -14,12 +14,12 @@
 #define TI_FLAG_FAULT_CODE_SHIFT	56
 #define TI_FLAG_BYTE_WSTATE		1
 #define TI_FLAG_WSTATE_SHIFT		48
-#define TI_FLAG_BYTE_CWP		2
-#define TI_FLAG_CWP_SHIFT		40
-#define TI_FLAG_BYTE_CURRENT_DS		3
-#define TI_FLAG_CURRENT_DS_SHIFT	32
-#define TI_FLAG_BYTE_FPDEPTH		4
-#define TI_FLAG_FPDEPTH_SHIFT		24
+#define TI_FLAG_BYTE_NOERROR		2
+#define TI_FLAG_BYTE_NOERROR_SHIFT	40
+#define TI_FLAG_BYTE_FPDEPTH		3
+#define TI_FLAG_FPDEPTH_SHIFT		32
+#define TI_FLAG_BYTE_CWP		4
+#define TI_FLAG_CWP_SHIFT		24
 #define TI_FLAG_BYTE_WSAVED		5
 #define TI_FLAG_WSAVED_SHIFT		16
 
@@ -47,7 +47,7 @@
 	struct exec_domain	*exec_domain;
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u8			new_child;
-	__u8			syscall_noerror;
+	__u8			current_ds;
 	__u16			cpu;
 
 	unsigned long		*utraps;
@@ -74,9 +74,9 @@
 #define TI_FAULT_CODE	(TI_FLAGS + TI_FLAG_BYTE_FAULT_CODE)
 #define TI_WSTATE	(TI_FLAGS + TI_FLAG_BYTE_WSTATE)
 #define TI_CWP		(TI_FLAGS + TI_FLAG_BYTE_CWP)
-#define TI_CURRENT_DS	(TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
 #define TI_FPDEPTH	(TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
 #define TI_WSAVED	(TI_FLAGS + TI_FLAG_BYTE_WSAVED)
+#define TI_SYS_NOERROR	(TI_FLAGS + TI_FLAG_BYTE_NOERROR)
 #define TI_FPSAVED	0x00000010
 #define TI_KSP		0x00000018
 #define TI_FAULT_ADDR	0x00000020
@@ -84,7 +84,7 @@
 #define TI_EXEC_DOMAIN	0x00000030
 #define TI_PRE_COUNT	0x00000038
 #define TI_NEW_CHILD	0x0000003c
-#define TI_SYS_NOERROR	0x0000003d
+#define TI_CURRENT_DS	0x0000003d
 #define TI_CPU		0x0000003e
 #define TI_UTRAPS	0x00000040
 #define TI_REG_WINDOW	0x00000048
@@ -121,7 +121,7 @@
 #define INIT_THREAD_INFO(tsk)				\
 {							\
 	.task		=	&tsk,			\
-	.flags		= ((unsigned long)ASI_P) << TI_FLAG_CURRENT_DS_SHIFT,	\
+	.current_ds	=	ASI_P,			\
 	.exec_domain	=	&default_exec_domain,	\
 	.preempt_count	=	INIT_PREEMPT_COUNT,	\
 	.restart_block	= {				\
@@ -153,13 +153,12 @@
 #define set_thread_wstate(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSTATE] = (val))
 #define get_thread_cwp()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP])
 #define set_thread_cwp(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP] = (val))
-#define get_thread_current_ds()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CURRENT_DS])
-#define set_thread_current_ds(val)	(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CURRENT_DS] = (val))
+#define get_thread_noerror()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR])
+#define set_thread_noerror(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR] = (val))
 #define get_thread_fpdepth()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH])
 #define set_thread_fpdepth(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH] = (val))
 #define get_thread_wsaved()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED])
 #define set_thread_wsaved(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED] = (val))
-
 #endif /* !(__ASSEMBLY__) */
 
 /*
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 73083e1..e562d3c 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -38,14 +38,14 @@
 #define VERIFY_READ	0
 #define VERIFY_WRITE	1
 
-#define get_fs() ((mm_segment_t) { get_thread_current_ds() })
+#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
 #define get_ds() (KERNEL_DS)
 
 #define segment_eq(a,b)  ((a).seg == (b).seg)
 
 #define set_fs(val)								\
 do {										\
-	set_thread_current_ds((val).seg);					\
+	current_thread_info()->current_ds =(val).seg;				\
 	__asm__ __volatile__ ("wr %%g0, %0, %%asi" : : "r" ((val).seg));	\
 } while(0)
 
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 0ecea6e..c3e5d8b 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -46,6 +46,7 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
+#define __ARCH_WANT_SYS_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index dcaa1cf..21fd1a8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -806,23 +806,10 @@
 	call	c_sys_nis_syscall
 	 mov	%l5, %o7
 
-	.align	4
-	.globl	sys_execve
-sys_execve:
-	mov	%o7, %l5
-	add	%sp, STACKFRAME_SZ, %o0		! pt_regs *regs arg
-	call	sparc_execve
-	 mov	%l5, %o7
-
-	.globl	sunos_execv
 sunos_execv:
-	st	%g0, [%sp + STACKFRAME_SZ + PT_I2]
-
-	call	sparc_execve
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	b	ret_sys_call
-	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
+	.globl	sunos_execv
+	b	sys_execve
+	 clr	%i2
 
 	.align	4
 	.globl	sys_sparc_pipe
@@ -959,17 +946,9 @@
         .align  4
 linux_sparc_ni_syscall:
 	sethi   %hi(sys_ni_syscall), %l7
-	b       syscall_is_too_hard
+	b       do_syscall
 	 or     %l7, %lo(sys_ni_syscall), %l7
 
-linux_fast_syscall:
-	andn	%l7, 3, %l7
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov 	%i2, %o2
-	jmpl	%l7 + %g0, %g0
-	 mov	%i3, %o3
-
 linux_syscall_trace:
 	add	%sp, STACKFRAME_SZ, %o0
 	call	syscall_trace
@@ -991,6 +970,23 @@
 	b	ret_sys_call
 	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
 
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	call	schedule_tail
+	 ld	[%g3 + TI_TASK], %o0
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %l0
+	call	%l0
+	 ld	[%sp + STACKFRAME_SZ + PT_G2], %o0
+	rd	%psr, %l1
+	ld	[%sp + STACKFRAME_SZ + PT_PSR], %l0
+	andn	%l0, PSR_CWP, %l0
+	nop
+	and	%l1, PSR_CWP, %l1
+	or	%l0, %l1, %l0
+	st	%l0, [%sp + STACKFRAME_SZ + PT_PSR]
+	b	ret_sys_call
+	 mov	0, %o0
+
 	/* Linux native system calls enter here... */
 	.align	4
 	.globl	linux_sparc_syscall
@@ -1002,11 +998,8 @@
 	bgeu	linux_sparc_ni_syscall
 	 sll	%g1, 2, %l4
 	ld	[%l7 + %l4], %l7
-	andcc	%l7, 1, %g0
-	bne	linux_fast_syscall
-	 /* Just do first insn from SAVE_ALL in the delay slot */
 
-syscall_is_too_hard:
+do_syscall:
 	SAVE_ALL_HEAD
 	 rd	%wim, %l3
 
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
index 786b185..1276ca2 100644
--- a/arch/sparc/kernel/etrap_64.S
+++ b/arch/sparc/kernel/etrap_64.S
@@ -92,8 +92,10 @@
 		rdpr	%wstate, %g2
 		wrpr	%g0, 0, %canrestore
 		sll	%g2, 3, %g2
+
+		/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR.  */
 		mov	1, %l5
-		stb	%l5, [%l6 + TI_FPDEPTH]
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
 
 		wrpr	%g3, 0, %otherwin
 		wrpr	%g2, 0, %wstate
@@ -152,7 +154,9 @@
 		add	%l6, TI_FPSAVED + 1, %l4
 		srl	%l5, 1, %l3
 		add	%l5, 2, %l5
-		stb	%l5, [%l6 + TI_FPDEPTH]
+
+		/* Set TI_SYS_FPDEPTH to %l5 and clear TI_SYS_NOERROR.  */
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
 		ba,pt	%xcc, 2b
 		 stb	%g0, [%l4 + %l3]
 		nop
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 487bffb..bf4c6ad 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -316,9 +316,10 @@
  * XXX See comment above sys_vfork in sparc64. todo.
  */
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct thread_info *ti = task_thread_info(p);
@@ -336,16 +337,13 @@
 	}
 
 	/*
-	 *  p->thread_info         new_stack   childregs
-	 *  !                      !           !             {if(PSR_PS) }
-	 *  V                      V (stk.fr.) V  (pt_regs)  { (stk.fr.) }
-	 *  +----- - - - - - ------+===========+============={+==========}+
+	 *  p->thread_info         new_stack   childregs stack bottom
+	 *  !                      !           !             !
+	 *  V                      V (stk.fr.) V  (pt_regs)  V
+	 *  +----- - - - - - ------+===========+=============+
 	 */
 	new_stack = task_stack_page(p) + THREAD_SIZE;
-	if (regs->psr & PSR_PS)
-		new_stack -= STACKFRAME_SZ;
 	new_stack -= STACKFRAME_SZ + TRACEREG_SZ;
-	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
 	childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ);
 
 	/*
@@ -356,55 +354,58 @@
 	 * Thus, kpsr|=PSR_PIL.
 	 */
 	ti->ksp = (unsigned long) new_stack;
+	p->thread.kregs = childregs;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		extern int nwindows;
+		unsigned long psr;
+		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
+		p->thread.flags |= SPARC_FLAG_KTHREAD;
+		p->thread.current_ds = KERNEL_DS;
+		ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
+		childregs->u_regs[UREG_G1] = sp; /* function */
+		childregs->u_regs[UREG_G2] = arg;
+		psr = childregs->psr = get_psr();
+		ti->kpsr = psr | PSR_PIL;
+		ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows);
+		return 0;
+	}
+	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
+	childregs->u_regs[UREG_FP] = sp;
+	p->thread.flags &= ~SPARC_FLAG_KTHREAD;
+	p->thread.current_ds = USER_DS;
 	ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
 	ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
 	ti->kwim = current->thread.fork_kwim;
 
-	if(regs->psr & PSR_PS) {
-		extern struct pt_regs fake_swapper_regs;
+	if (sp != regs->u_regs[UREG_FP]) {
+		struct sparc_stackf __user *childstack;
+		struct sparc_stackf __user *parentstack;
 
-		p->thread.kregs = &fake_swapper_regs;
-		new_stack += STACKFRAME_SZ + TRACEREG_SZ;
-		childregs->u_regs[UREG_FP] = (unsigned long) new_stack;
-		p->thread.flags |= SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = KERNEL_DS;
-		memcpy(new_stack, (void *)regs->u_regs[UREG_FP], STACKFRAME_SZ);
-		childregs->u_regs[UREG_G6] = (unsigned long) ti;
-	} else {
-		p->thread.kregs = childregs;
-		childregs->u_regs[UREG_FP] = sp;
-		p->thread.flags &= ~SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = USER_DS;
-
-		if (sp != regs->u_regs[UREG_FP]) {
-			struct sparc_stackf __user *childstack;
-			struct sparc_stackf __user *parentstack;
-
-			/*
-			 * This is a clone() call with supplied user stack.
-			 * Set some valid stack frames to give to the child.
-			 */
-			childstack = (struct sparc_stackf __user *)
-				(sp & ~0xfUL);
-			parentstack = (struct sparc_stackf __user *)
-				regs->u_regs[UREG_FP];
+		/*
+		 * This is a clone() call with supplied user stack.
+		 * Set some valid stack frames to give to the child.
+		 */
+		childstack = (struct sparc_stackf __user *)
+			(sp & ~0xfUL);
+		parentstack = (struct sparc_stackf __user *)
+			regs->u_regs[UREG_FP];
 
 #if 0
-			printk("clone: parent stack:\n");
-			show_stackframe(parentstack);
+		printk("clone: parent stack:\n");
+		show_stackframe(parentstack);
 #endif
 
-			childstack = clone_stackframe(childstack, parentstack);
-			if (!childstack)
-				return -EFAULT;
+		childstack = clone_stackframe(childstack, parentstack);
+		if (!childstack)
+			return -EFAULT;
 
 #if 0
-			printk("clone: child stack:\n");
-			show_stackframe(childstack);
+		printk("clone: child stack:\n");
+		show_stackframe(childstack);
 #endif
 
-			childregs->u_regs[UREG_FP] = (unsigned long)childstack;
-		}
+		childregs->u_regs[UREG_FP] = (unsigned long)childstack;
 	}
 
 #ifdef CONFIG_SMP
@@ -475,69 +476,6 @@
 	return 1;
 }
 
-/*
- * sparc_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage int sparc_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* Check for indirect call. */
-	if(regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname((char __user *)regs->u_regs[base + UREG_I0]);
-	error = PTR_ERR(filename);
-	if(IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const  __user *)
-			  regs->u_regs[base + UREG_I1],
-			  (const char __user *const  __user *)
-			  regs->u_regs[base + UREG_I2],
-			  regs);
-	putname(filename);
-out:
-	return error;
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-
-	__asm__ __volatile__("mov %4, %%g2\n\t"    /* Set aside fn ptr... */
-			     "mov %5, %%g3\n\t"    /* and arg. */
-			     "mov %1, %%g1\n\t"
-			     "mov %2, %%o0\n\t"    /* Clone flags. */
-			     "mov 0, %%o1\n\t"     /* usp arg == 0 */
-			     "t 0x10\n\t"          /* Linux/Sparc clone(). */
-			     "cmp %%o1, 0\n\t"
-			     "be 1f\n\t"           /* The parent, just return. */
-			     " nop\n\t"            /* Delay slot. */
-			     "jmpl %%g2, %%o7\n\t" /* Call the function. */
-			     " mov %%g3, %%o0\n\t" /* Get back the arg in delay. */
-			     "mov %3, %%g1\n\t"
-			     "t 0x10\n\t"          /* Linux/Sparc exit(). */
-			     /* Notreached by child. */
-			     "1: mov %%o0, %0\n\t" :
-			     "=r" (retval) :
-			     "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
-			     "i" (__NR_exit),  "r" (fn), "r" (arg) :
-			     "g1", "g2", "g3", "o0", "o1", "memory", "cc");
-	return retval;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index c6e0c29..dff54f4 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -622,65 +622,56 @@
  * Child  -->  %o0 == parents pid, %o1 == 1
  */
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct thread_info *t = task_thread_info(p);
 	struct sparc_stackf *parent_sf;
 	unsigned long child_stack_sz;
 	char *child_trap_frame;
-	int kernel_thread;
-
-	kernel_thread = (regs->tstate & TSTATE_PRIV) ? 1 : 0;
-	parent_sf = ((struct sparc_stackf *) regs) - 1;
 
 	/* Calculate offset to stack_frame & pt_regs */
-	child_stack_sz = ((STACKFRAME_SZ + TRACEREG_SZ) +
-			  (kernel_thread ? STACKFRAME_SZ : 0));
+	child_stack_sz = (STACKFRAME_SZ + TRACEREG_SZ);
 	child_trap_frame = (task_stack_page(p) +
 			    (THREAD_SIZE - child_stack_sz));
-	memcpy(child_trap_frame, parent_sf, child_stack_sz);
 
-	t->flags = (t->flags & ~((0xffUL << TI_FLAG_CWP_SHIFT) |
-				 (0xffUL << TI_FLAG_CURRENT_DS_SHIFT))) |
-		(((regs->tstate + 1) & TSTATE_CWP) << TI_FLAG_CWP_SHIFT);
 	t->new_child = 1;
 	t->ksp = ((unsigned long) child_trap_frame) - STACK_BIAS;
 	t->kregs = (struct pt_regs *) (child_trap_frame +
 				       sizeof(struct sparc_stackf));
 	t->fpsaved[0] = 0;
 
-	if (kernel_thread) {
-		struct sparc_stackf *child_sf = (struct sparc_stackf *)
-			(child_trap_frame + (STACKFRAME_SZ + TRACEREG_SZ));
-
-		/* Zero terminate the stack backtrace.  */
-		child_sf->fp = NULL;
-		t->kregs->u_regs[UREG_FP] =
-		  ((unsigned long) child_sf) - STACK_BIAS;
-
-		t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
-		t->kregs->u_regs[UREG_G6] = (unsigned long) t;
-		t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
-	} else {
-		if (t->flags & _TIF_32BIT) {
-			sp &= 0x00000000ffffffffUL;
-			regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
-		}
-		t->kregs->u_regs[UREG_FP] = sp;
-		t->flags |= ((long)ASI_AIUS << TI_FLAG_CURRENT_DS_SHIFT);
-		if (sp != regs->u_regs[UREG_FP]) {
-			unsigned long csp;
-
-			csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
-			if (!csp)
-				return -EFAULT;
-			t->kregs->u_regs[UREG_FP] = csp;
-		}
-		if (t->utraps)
-			t->utraps[0]++;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(child_trap_frame, 0, child_stack_sz);
+		__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+			(current_pt_regs()->tstate + 1) & TSTATE_CWP;
+		t->current_ds = ASI_P;
+		t->kregs->u_regs[UREG_G1] = sp; /* function */
+		t->kregs->u_regs[UREG_G2] = arg;
+		return 0;
 	}
 
+	parent_sf = ((struct sparc_stackf *) regs) - 1;
+	memcpy(child_trap_frame, parent_sf, child_stack_sz);
+	if (t->flags & _TIF_32BIT) {
+		sp &= 0x00000000ffffffffUL;
+		regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	}
+	t->kregs->u_regs[UREG_FP] = sp;
+	__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+		(regs->tstate + 1) & TSTATE_CWP;
+	t->current_ds = ASI_AIUS;
+	if (sp != regs->u_regs[UREG_FP]) {
+		unsigned long csp;
+
+		csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
+		if (!csp)
+			return -EFAULT;
+		t->kregs->u_regs[UREG_FP] = csp;
+	}
+	if (t->utraps)
+		t->utraps[0]++;
+
 	/* Set the return value for the child. */
 	t->kregs->u_regs[UREG_I0] = current->pid;
 	t->kregs->u_regs[UREG_I1] = 1;
@@ -694,45 +685,6 @@
 	return 0;
 }
 
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-
-	/* If the parent runs before fn(arg) is called by the child,
-	 * the input registers of this function can be clobbered.
-	 * So we stash 'fn' and 'arg' into global registers which
-	 * will not be modified by the parent.
-	 */
-	__asm__ __volatile__("mov %4, %%g2\n\t"	   /* Save FN into global */
-			     "mov %5, %%g3\n\t"	   /* Save ARG into global */
-			     "mov %1, %%g1\n\t"	   /* Clone syscall nr. */
-			     "mov %2, %%o0\n\t"	   /* Clone flags. */
-			     "mov 0, %%o1\n\t"	   /* usp arg == 0 */
-			     "t 0x6d\n\t"	   /* Linux/Sparc clone(). */
-			     "brz,a,pn %%o1, 1f\n\t" /* Parent, just return. */
-			     " mov %%o0, %0\n\t"
-			     "jmpl %%g2, %%o7\n\t"   /* Call the function. */
-			     " mov %%g3, %%o0\n\t"   /* Set arg in delay. */
-			     "mov %3, %%g1\n\t"
-			     "t 0x6d\n\t"	   /* Linux/Sparc exit(). */
-			     /* Notreached by child. */
-			     "1:" :
-			     "=r" (retval) :
-			     "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
-			     "i" (__NR_exit),  "r" (fn), "r" (arg) :
-			     "g1", "g2", "g3", "o0", "o1", "memory", "cc");
-	return retval;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 typedef struct {
 	union {
 		unsigned int	pr_regs[32];
@@ -799,41 +751,6 @@
 }
 EXPORT_SYMBOL(dump_fpu);
 
-/*
- * sparc_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage int sparc_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* User register window flush is done by entry.S */
-
-	/* Check for indirect call. */
-	if (regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname((char __user *)regs->u_regs[base + UREG_I0]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const __user *)
-			  regs->u_regs[base + UREG_I1],
-			  (const char __user *const __user *)
-			  regs->u_regs[base + UREG_I2], regs);
-	putname(filename);
-	if (!error) {
-		fprs_write(0);
-		current_thread_info()->xfsr[0] = 0;
-		current_thread_info()->fpsaved[0] = 0;
-		regs->tstate &= ~TSTATE_PEF;
-	}
-out:
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index c323981..03c7e92 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -396,42 +396,6 @@
         return ret;
 }
 
-/*
- * sparc32_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage long sparc32_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* User register window flush is done by entry.S */
-
-	/* Check for indirect call. */
-	if ((u32)regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname(compat_ptr(regs->u_regs[base + UREG_I0]));
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = compat_do_execve(filename->name,
-				 compat_ptr(regs->u_regs[base + UREG_I1]),
-				 compat_ptr(regs->u_regs[base + UREG_I2]), regs);
-
-	putname(filename);
-
-	if (!error) {
-		fprs_write(0);
-		current_thread_info()->xfsr[0] = 0;
-		current_thread_info()->fpsaved[0] = 0;
-		regs->tstate &= ~TSTATE_PEF;
-	}
-out:
-	return error;
-}
-
 #ifdef CONFIG_MODULES
 
 asmlinkage long sys32_init_module(void __user *umod, u32 len,
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 0c9b31b..a8e6eb0 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -258,27 +258,3 @@
 	up_read(&uts_sem);
 	return err;
 }
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	register long __g1 __asm__ ("g1") = __NR_execve;
-	register long __o0 __asm__ ("o0") = (long)(filename);
-	register long __o1 __asm__ ("o1") = (long)(argv);
-	register long __o2 __asm__ ("o2") = (long)(envp);
-	asm volatile ("t 0x10\n\t"
-		      "bcc 1f\n\t"
-		      "mov %%o0, %0\n\t"
-		      "sub %%g0, %%o0, %0\n\t"
-		      "1:\n\t"
-		      : "=r" (__res), "=&r" (__o0)
-		      : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
-		      : "cc");
-	return __res;
-}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 878ef3d..51b85fe 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -730,28 +730,6 @@
 	return ret;
 }
 
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	register long __g1 __asm__ ("g1") = __NR_execve;
-	register long __o0 __asm__ ("o0") = (long)(filename);
-	register long __o1 __asm__ ("o1") = (long)(argv);
-	register long __o2 __asm__ ("o2") = (long)(envp);
-	asm volatile ("t 0x6d\n\t"
-		      "sub %%g0, %%o0, %0\n\t"
-		      "movcc %%xcc, %%o0, %0\n\t"
-		      : "=r" (__res), "=&r" (__o0)
-		      : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
-		      : "cc");
-	return __res;
-}
-
 asmlinkage long sys_kern_features(void)
 {
 	return KERN_FEATURE_MIXED_MODE_STACK;
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index 7f5f65d..2ef41e6 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -1,23 +1,19 @@
 	/* SunOS's execv() call only specifies the argv argument, the
 	 * environment settings are the same as the calling processes.
 	 */
-sys_execve:
-	sethi	%hi(sparc_execve), %g1
-	ba,pt	%xcc, execve_merge
-	 or	%g1, %lo(sparc_execve), %g1
+sys64_execve:
+	set	sys_execve, %g1
+	jmpl	%g1, %g0
+	 flushw
 
 #ifdef CONFIG_COMPAT
 sunos_execv:
-	stx	%g0, [%sp + PTREGS_OFF + PT_V9_I2]
+	mov	%g0, %o2
 sys32_execve:
-	sethi	%hi(sparc32_execve), %g1
-	or	%g1, %lo(sparc32_execve), %g1
-#endif
-
-execve_merge:
-	flushw
+	set	compat_sys_execve, %g1
 	jmpl	%g1, %g0
-	 add	%sp, PTREGS_OFF, %o0
+	 flushw
+#endif
 
 	.align	32
 sys_sparc_pipe:
@@ -112,11 +108,16 @@
 ret_from_syscall:
 	/* Clear current_thread_info()->new_child. */
 	stb	%g0, [%g6 + TI_NEW_CHILD]
-	ldx	[%g6 + TI_FLAGS], %l0
 	call	schedule_tail
 	 mov	%g7, %o0
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
+	brnz,pt	%o0, ret_sys_call
+	 ldx	[%g6 + TI_FLAGS], %l0
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %l1
+	call	%l1
+	 ldx	[%sp + PTREGS_OFF + PT_V9_G2], %o0
 	ba,pt	%xcc, ret_sys_call
-	 ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
+	 mov	0, %o0
 
 	.globl	sparc_exit
 	.type	sparc_exit,#function
@@ -222,7 +223,6 @@
 	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
 
 2:
-	stb	%g0, [%g6 + TI_SYS_NOERROR]
 	/* System call success, clear Carry condition code. */
 	andn	%g3, %g2, %g3
 3:
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 45ce6be..d8b22b3 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -107,7 +107,7 @@
 /*40*/	.word sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_nis_syscall
 	.word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
 /*50*/	.word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl
-	.word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve
+	.word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys64_execve
 /*60*/	.word sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
 	.word sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_nis_syscall
 /*70*/	.word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys_64_munmap, sys_mprotect
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index b66a779..e7ecf15 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2688,8 +2688,8 @@
 		     TI_PRE_COUNT != offsetof(struct thread_info,
 					      preempt_count) ||
 		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
-		     TI_SYS_NOERROR != offsetof(struct thread_info,
-						syscall_noerror) ||
+		     TI_CURRENT_DS != offsetof(struct thread_info,
+						current_ds) ||
 		     TI_RESTART_BLOCK != offsetof(struct thread_info,
 						  restart_block) ||
 		     TI_KUNA_REGS != offsetof(struct thread_info,
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 9e28a11..85be1ca 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -624,7 +624,7 @@
 void prom_world(int enter)
 {
 	if (!enter)
-		set_fs((mm_segment_t) { get_thread_current_ds() });
+		set_fs(get_fs());
 
 	__asm__ __volatile__("flushw");
 }