Merge tag 'xtensa-next-20121101' of git://github.com/czankel/xtensa-linux

Pull Xtensa fixes from Chris Zankel:
 "Some important bug fixes.

  With the change to uapi, there was a bug introduced that results in an
  empty syscall table (mult-inclusion bug).  Switching to the generic
  thread/execve allowed us to fix a bug we had in vfork()."

* tag 'xtensa-next-20121101' of git://github.com/czankel/xtensa-linux:
  xtensa: switch to generic sys_execve()
  xtensa: switch to generic kernel_execve()
  xtensa: switch to generic kernel_thread()
  xtensa: reset windowbase/windowstart when cloning the VM
  xtensa: use physical addresses for bus addresses
  xtensa: allow multi-inclusion for uapi/unistd.h
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index cdcb48a..0d1f36a 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -13,6 +13,8 @@
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
 	select GENERIC_PCI_IOMAP
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index e6be5b9..700c2e6 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -62,6 +62,10 @@
 static inline void iounmap(volatile void __iomem *addr)
 {
 }
+
+#define virt_to_bus     virt_to_phys
+#define bus_to_virt     phys_to_virt
+
 #endif /* CONFIG_MMU */
 
 /*
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 5c371d8..2d630e7 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -152,6 +152,7 @@
 
 /* Clearing a0 terminates the backtrace. */
 #define start_thread(regs, new_pc, new_sp) \
+	memset(regs, 0, sizeof(*regs)); \
 	regs->pc = new_pc; \
 	regs->ps = USER_PS_VALUE; \
 	regs->areg[1] = new_sp; \
@@ -168,9 +169,6 @@
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-/* Create a kernel thread without removing it from tasklists */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)	do { } while(0)
 #define release_segments(mm)	do { } while(0)
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index c1dacca..124aeee 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -10,7 +10,7 @@
 
 struct pt_regs;
 struct sigaction;
-asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*);
+asmlinkage long sys_execve(char*, char**, char**, struct pt_regs*);
 asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index 9ef1c31..f4e6eaa 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -1,16 +1,9 @@
-/*
- * include/asm-xtensa/unistd.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
+#ifndef _XTENSA_UNISTD_H
+#define _XTENSA_UNISTD_H
 
+#define __ARCH_WANT_SYS_EXECVE
 #include <uapi/asm/unistd.h>
 
-
 /*
  * "Conditional" syscalls
  *
@@ -37,3 +30,5 @@
 #define __IGNORE_mmap				/* use mmap2 */
 #define __IGNORE_vfork				/* use clone */
 #define __IGNORE_fadvise64			/* use fadvise64_64 */
+
+#endif /* _XTENSA_UNISTD_H */
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index 479abae..9f36d0e 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -1,14 +1,4 @@
-/*
- * include/asm-xtensa/unistd.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2012 Tensilica Inc.
- */
-
-#ifndef _UAPI_XTENSA_UNISTD_H
+#if !defined(_UAPI_XTENSA_UNISTD_H) || defined(__SYSCALL)
 #define _UAPI_XTENSA_UNISTD_H
 
 #ifndef __SYSCALL
@@ -272,7 +262,7 @@
 #define __NR_clone 				116
 __SYSCALL(116, xtensa_clone, 5)
 #define __NR_execve 				117
-__SYSCALL(117, xtensa_execve, 3)
+__SYSCALL(117, sys_execve, 3)
 #define __NR_exit 				118
 __SYSCALL(118, sys_exit, 1)
 #define __NR_exit_group 			119
@@ -759,4 +749,6 @@
 
 #define SYS_XTENSA_COUNT                  5     /* count */
 
+#undef __SYSCALL
+
 #endif /* _UAPI_XTENSA_UNISTD_H */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 18453067..90bfc1d 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1833,50 +1833,6 @@
 
 
 /*
- * Create a kernel thread
- *
- * int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
- * a2                    a2                 a3             a4
- */
-
-ENTRY(kernel_thread)
-	entry	a1, 16
-
-	mov	a5, a2			# preserve fn over syscall
-	mov	a7, a3			# preserve args over syscall
-
-	movi	a3, _CLONE_VM | _CLONE_UNTRACED
-	movi	a2, __NR_clone
-	or	a6, a4, a3		# arg0: flags
-	mov	a3, a1			# arg1: sp
-	syscall
-
-	beq	a3, a1, 1f		# branch if parent
-	mov	a6, a7			# args
-	callx4	a5			# fn(args)
-
-	movi	a2, __NR_exit
-	syscall				# return value of fn(args) still in a6
-
-1:	retw
-
-/*
- * Do a system call from kernel instead of calling sys_execve, so we end up
- * with proper pt_regs.
- *
- * int kernel_execve(const char *fname, char *const argv[], charg *const envp[])
- * a2                        a2               a3                  a4
- */
-
-ENTRY(kernel_execve)
-	entry	a1, 16
-	mov	a6, a2			# arg0 is in a6
-	movi	a2, __NR_execve
-	syscall
-
-	retw
-
-/*
  * Task switch.
  *
  * struct task*  _switch_to (struct task* prev, struct task* next)
@@ -1958,3 +1914,16 @@
 
 	j	common_exception_return
 
+/*
+ * Kernel thread creation helper
+ * On entry, set up by copy_thread: a2 = thread_fn, a3 = thread_fn arg
+ *           left from _switch_to: a6 = prev
+ */
+ENTRY(ret_from_kernel_thread)
+
+	call4	schedule_tail
+	mov	a6, a3
+	callx4	a2
+	j	common_exception_return
+
+ENDPROC(ret_from_kernel_thread)
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 1908f66..09ae7bf 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -45,6 +45,7 @@
 #include <asm/regs.h>
 
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 struct task_struct *current_set[NR_CPUS] = {&init_task, };
 
@@ -158,18 +159,30 @@
 /*
  * Copy thread.
  *
+ * There are two modes in which this function is called:
+ * 1) Userspace thread creation,
+ *    regs != NULL, usp_thread_fn is userspace stack pointer.
+ *    It is expected to copy parent regs (in case CLONE_VM is not set
+ *    in the clone_flags) and set up passed usp in the childregs.
+ * 2) Kernel thread creation,
+ *    regs == NULL, usp_thread_fn is the function to run in the new thread
+ *    and thread_fn_arg is its parameter.
+ *    childregs are not used for the kernel threads.
+ *
  * The stack layout for the new thread looks like this:
  *
- *	+------------------------+ <- sp in childregs (= tos)
+ *	+------------------------+
  *	|       childregs        |
  *	+------------------------+ <- thread.sp = sp in dummy-frame
  *	|      dummy-frame       |    (saved in dummy-frame spill-area)
  *	+------------------------+
  *
- * We create a dummy frame to return to ret_from_fork:
- *   a0 points to ret_from_fork (simulating a call4)
+ * We create a dummy frame to return to either ret_from_fork or
+ *   ret_from_kernel_thread:
+ *   a0 points to ret_from_fork/ret_from_kernel_thread (simulating a call4)
  *   sp points to itself (thread.sp)
- *   a2, a3 are unused.
+ *   a2, a3 are unused for userspace threads,
+ *   a2 points to thread_fn, a3 holds thread_fn arg for kernel threads.
  *
  * Note: This is a pristine frame, so we don't need any spill region on top of
  *       childregs.
@@ -185,43 +198,63 @@
  * involved.  Much simpler to just not copy those live frames across.
  */
 
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-                struct task_struct * p, struct pt_regs * regs)
+int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
+		unsigned long thread_fn_arg,
+		struct task_struct *p, struct pt_regs *unused)
 {
-	struct pt_regs *childregs;
-	unsigned long tos;
-	int user_mode = user_mode(regs);
+	struct pt_regs *childregs = task_pt_regs(p);
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
 	struct thread_info *ti;
 #endif
 
-	/* Set up new TSS. */
-	tos = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-	if (user_mode)
-		childregs = (struct pt_regs*)(tos - PT_USER_SIZE);
-	else
-		childregs = (struct pt_regs*)tos - 1;
-
-	/* This does not copy all the regs.  In a bout of brilliance or madness,
-	   ARs beyond a0-a15 exist past the end of the struct. */
-	*childregs = *regs;
-
 	/* Create a call4 dummy-frame: a0 = 0, a1 = childregs. */
 	*((int*)childregs - 3) = (unsigned long)childregs;
 	*((int*)childregs - 4) = 0;
 
-	childregs->areg[2] = 0;
-	p->set_child_tid = p->clear_child_tid = NULL;
-	p->thread.ra = MAKE_RA_FOR_CALL((unsigned long)ret_from_fork, 0x1);
 	p->thread.sp = (unsigned long)childregs;
 
-	if (user_mode(regs)) {
+	if (!(p->flags & PF_KTHREAD)) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long usp = usp_thread_fn ?
+			usp_thread_fn : regs->areg[1];
 
+		p->thread.ra = MAKE_RA_FOR_CALL(
+				(unsigned long)ret_from_fork, 0x1);
+
+		/* This does not copy all the regs.
+		 * In a bout of brilliance or madness,
+		 * ARs beyond a0-a15 exist past the end of the struct.
+		 */
+		*childregs = *regs;
 		childregs->areg[1] = usp;
+		childregs->areg[2] = 0;
+
+		/* When sharing memory with the parent thread, the child
+		   usually starts on a pristine stack, so we have to reset
+		   windowbase, windowstart and wmask.
+		   (Note that such a new thread is required to always create
+		   an initial call4 frame)
+		   The exception is vfork, where the new thread continues to
+		   run on the parent's stack until it calls execve. This could
+		   be a call8 or call12, which requires a legal stack frame
+		   of the previous caller for the overflow handlers to work.
+		   (Note that it's always legal to overflow live registers).
+		   In this case, ensure to spill at least the stack pointer
+		   of that frame. */
+
 		if (clone_flags & CLONE_VM) {
-			childregs->wmask = 1;	/* can't share live windows */
+			/* check that caller window is live and same stack */
+			int len = childregs->wmask & ~0xf;
+			if (regs->areg[1] == usp && len != 0) {
+				int callinc = (regs->areg[0] >> 30) & 3;
+				int caller_ars = XCHAL_NUM_AREGS - callinc * 4;
+				put_user(regs->areg[caller_ars+1],
+					 (unsigned __user*)(usp - 12));
+			}
+			childregs->wmask = 1;
+			childregs->windowstart = 1;
+			childregs->windowbase = 0;
 		} else {
 			int len = childregs->wmask & ~0xf;
 			memcpy(&childregs->areg[XCHAL_NUM_AREGS - len/4],
@@ -230,11 +263,19 @@
 // FIXME: we need to set THREADPTR in thread_info...
 		if (clone_flags & CLONE_SETTLS)
 			childregs->areg[2] = childregs->areg[6];
-
 	} else {
-		/* In kernel space, we start a new thread with a new stack. */
-		childregs->wmask = 1;
-		childregs->areg[1] = tos;
+		p->thread.ra = MAKE_RA_FOR_CALL(
+				(unsigned long)ret_from_kernel_thread, 1);
+
+		/* pass parameters to ret_from_kernel_thread:
+		 * a2 = thread_fn, a3 = thread_fn arg
+		 */
+		*((int *)childregs - 1) = thread_fn_arg;
+		*((int *)childregs - 2) = usp_thread_fn;
+
+		/* Childregs are only used when we're going to userspace
+		 * in which case start_thread will set them up.
+		 */
 	}
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
@@ -330,32 +371,5 @@
                   void __user *child_tid, long a5,
                   struct pt_regs *regs)
 {
-        if (!newsp)
-                newsp = regs->areg[1];
         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
-
-/*
- * xtensa_execve() executes a new program.
- */
-
-asmlinkage
-long xtensa_execve(const char __user *name,
-		   const char __user *const __user *argv,
-                   const char __user *const __user *envp,
-                   long a3, long a4, long a5,
-                   struct pt_regs *regs)
-{
-	long error;
-	struct filename *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index a5c01e7..5702065 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -32,10 +32,8 @@
 syscall_t sys_call_table[__NR_syscall_count] /* FIXME __cacheline_aligned */= {
 	[0 ... __NR_syscall_count - 1] = (syscall_t)&sys_ni_syscall,
 
-#undef __SYSCALL
 #define __SYSCALL(nr,symbol,nargs) [ nr ] = (syscall_t)symbol,
-#undef  __KERNEL_SYSCALLS__
-#include <asm/unistd.h>
+#include <uapi/asm/unistd.h>
 };
 
 asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
@@ -49,7 +47,8 @@
 	return (long)ret;
 }
 
-asmlinkage long xtensa_fadvise64_64(int fd, int advice, unsigned long long offset, unsigned long long len)
+asmlinkage long xtensa_fadvise64_64(int fd, int advice,
+		unsigned long long offset, unsigned long long len)
 {
 	return sys_fadvise64_64(fd, offset, len, advice);
 }
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index a8b9f1f..afe058b 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -43,7 +43,6 @@
 EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(copy_page);
 
-EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(empty_zero_page);
 
 /*