[MIPS] kpsd and other AP/SP improvements.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7b49aa5..a7bac04 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1476,6 +1476,16 @@
 	depends on MIPS_VPE_LOADER
 	help
 
+config MIPS_APSP_KSPD
+	bool "Enable KSPD"
+	depends on MIPS_VPE_APSP_API
+	default y
+	help
+	  KSPD is a kernel daemon that accepts syscall requests from the SP
+	  side, actions them and returns the results. It also handles the
+	  "exit" syscall notifying other kernel modules the SP program is
+	  exiting.  You probably want to say yes here.
+
 config SB1_PASS_1_WORKAROUNDS
 	bool
 	depends on CPU_SB1_PASS_1
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 309d54c..9ec01de 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -36,6 +36,7 @@
 
 obj-$(CONFIG_MIPS_MT_SMP)	+= smp_mt.o
 
+obj-$(CONFIG_MIPS_APSP_KSPD)	+= kspd.o
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o
 obj-$(CONFIG_MIPS_VPE_APSP_API)	+= rtlx.o
 
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
new file mode 100644
index 0000000..f06a144
--- /dev/null
+++ b/arch/mips/kernel/kspd.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unistd.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+
+#include <asm/vpe.h>
+#include <asm/rtlx.h>
+#include <asm/kspd.h>
+
+static struct workqueue_struct *workqueue = NULL;
+static struct work_struct work;
+
+extern unsigned long cpu_khz;
+
+struct mtsp_syscall {
+	int cmd;
+	unsigned char abi;
+	unsigned char size;
+};
+
+struct mtsp_syscall_ret {
+	int retval;
+	int errno;
+};
+
+struct mtsp_syscall_generic {
+	int arg0;
+	int arg1;
+	int arg2;
+	int arg3;
+	int arg4;
+	int arg5;
+	int arg6;
+};
+
+static struct list_head kspd_notifylist;
+static int sp_stopping = 0;
+
+/* these should match with those in the SDE kit */
+#define MTSP_SYSCALL_BASE	0
+#define MTSP_SYSCALL_EXIT	(MTSP_SYSCALL_BASE + 0)
+#define MTSP_SYSCALL_OPEN	(MTSP_SYSCALL_BASE + 1)
+#define MTSP_SYSCALL_READ	(MTSP_SYSCALL_BASE + 2)
+#define MTSP_SYSCALL_WRITE	(MTSP_SYSCALL_BASE + 3)
+#define MTSP_SYSCALL_CLOSE	(MTSP_SYSCALL_BASE + 4)
+#define MTSP_SYSCALL_LSEEK32	(MTSP_SYSCALL_BASE + 5)
+#define MTSP_SYSCALL_ISATTY	(MTSP_SYSCALL_BASE + 6)
+#define MTSP_SYSCALL_GETTIME	(MTSP_SYSCALL_BASE + 7)
+#define MTSP_SYSCALL_PIPEFREQ	(MTSP_SYSCALL_BASE + 8)
+#define MTSP_SYSCALL_GETTOD	(MTSP_SYSCALL_BASE + 9)
+
+#define MTSP_O_RDONLY		0x0000
+#define MTSP_O_WRONLY		0x0001
+#define MTSP_O_RDWR		0x0002
+#define MTSP_O_NONBLOCK		0x0004
+#define MTSP_O_APPEND		0x0008
+#define MTSP_O_SHLOCK		0x0010
+#define MTSP_O_EXLOCK		0x0020
+#define MTSP_O_ASYNC		0x0040
+#define MTSP_O_FSYNC		O_SYNC
+#define MTSP_O_NOFOLLOW		0x0100
+#define MTSP_O_SYNC		0x0080
+#define MTSP_O_CREAT		0x0200
+#define MTSP_O_TRUNC		0x0400
+#define MTSP_O_EXCL		0x0800
+#define MTSP_O_BINARY		0x8000
+
+#define SP_VPE 1
+
+struct apsp_table  {
+	int sp;
+	int ap;
+};
+
+/* we might want to do the mode flags too */
+struct apsp_table open_flags_table[] = {
+	{ MTSP_O_RDWR, O_RDWR },
+	{ MTSP_O_WRONLY, O_WRONLY },
+	{ MTSP_O_CREAT, O_CREAT },
+	{ MTSP_O_TRUNC, O_TRUNC },
+	{ MTSP_O_NONBLOCK, O_NONBLOCK },
+	{ MTSP_O_APPEND, O_APPEND },
+	{ MTSP_O_NOFOLLOW, O_NOFOLLOW }
+};
+
+struct apsp_table syscall_command_table[] = {
+	{ MTSP_SYSCALL_OPEN, __NR_open },
+	{ MTSP_SYSCALL_CLOSE, __NR_close },
+	{ MTSP_SYSCALL_READ, __NR_read },
+	{ MTSP_SYSCALL_WRITE, __NR_write },
+	{ MTSP_SYSCALL_LSEEK32, __NR_lseek }
+};
+
+static int sp_syscall(int num, int arg0, int arg1, int arg2, int arg3)
+{
+	register long int _num  __asm__ ("$2") = num;
+	register long int _arg0  __asm__ ("$4") = arg0;
+	register long int _arg1  __asm__ ("$5") = arg1;
+	register long int _arg2  __asm__ ("$6") = arg2;
+	register long int _arg3  __asm__ ("$7") = arg3;
+
+	mm_segment_t old_fs;
+
+	old_fs = get_fs();
+ 	set_fs(KERNEL_DS);
+
+  	__asm__ __volatile__ (
+ 	"	syscall					\n"
+ 	: "=r" (_num), "=r" (_arg3)
+ 	: "r" (_num), "r" (_arg0), "r" (_arg1), "r" (_arg2), "r" (_arg3));
+
+	set_fs(old_fs);
+
+	/* $a3 is error flag */
+	if (_arg3)
+		return -_num;
+
+	return _num;
+}
+
+static int translate_syscall_command(int cmd)
+{
+	int i;
+	int ret = -1;
+
+	for (i = 0; i < ARRAY_SIZE(syscall_command_table); i++) {
+		if ((cmd == syscall_command_table[i].sp))
+			return syscall_command_table[i].ap;
+	}
+
+	return ret;
+}
+
+static unsigned int translate_open_flags(int flags)
+{
+	int i;
+	unsigned int ret = 0;
+
+	for (i = 0; i < (sizeof(open_flags_table) / sizeof(struct apsp_table));
+	     i++) {
+		if( (flags & open_flags_table[i].sp) ) {
+			ret |= open_flags_table[i].ap;
+		}
+	}
+
+	return ret;
+}
+
+
+static void sp_setfsuidgid( uid_t uid, gid_t gid)
+{
+	current->fsuid = uid;
+	current->fsgid = gid;
+
+	key_fsuid_changed(current);
+	key_fsgid_changed(current);
+}
+
+/*
+ * Expects a request to be on the sysio channel. Reads it.  Decides whether
+ * its a linux syscall and runs it, or whatever.  Puts the return code back
+ * into the request and sends the whole thing back.
+ */
+void sp_work_handle_request(void)
+{
+	struct mtsp_syscall sc;
+	struct mtsp_syscall_generic generic;
+	struct mtsp_syscall_ret ret;
+	struct kspd_notifications *n;
+	struct timeval tv;
+	struct timezone tz;
+	int cmd;
+
+	char *vcwd;
+	mm_segment_t old_fs;
+	int size;
+
+	ret.retval = -1;
+
+	if (!rtlx_read(RTLX_CHANNEL_SYSIO, &sc, sizeof(struct mtsp_syscall), 0)) {
+		printk(KERN_ERR "Expected request but nothing to read\n");
+		return;
+	}
+
+	size = sc.size;
+
+	if (size) {
+		if (!rtlx_read(RTLX_CHANNEL_SYSIO, &generic, size, 0)) {
+			printk(KERN_ERR "Expected request but nothing to read\n");
+			return;
+		}
+	}
+
+	/* Run the syscall at the priviledge of the user who loaded the
+	   SP program */
+
+	if (vpe_getuid(SP_VPE))
+		sp_setfsuidgid( vpe_getuid(SP_VPE), vpe_getgid(SP_VPE));
+
+	switch (sc.cmd) {
+	/* needs the flags argument translating from SDE kit to
+	   linux */
+ 	case MTSP_SYSCALL_PIPEFREQ:
+ 		ret.retval = cpu_khz * 1000;
+ 		ret.errno = 0;
+ 		break;
+
+ 	case MTSP_SYSCALL_GETTOD:
+ 		memset(&tz, 0, sizeof(tz));
+ 		if ((ret.retval = sp_syscall(__NR_gettimeofday, (int)&tv,
+ 		                             (int)&tz, 0,0)) == 0)
+		ret.retval = tv.tv_sec;
+
+		ret.errno = errno;
+		break;
+
+ 	case MTSP_SYSCALL_EXIT:
+		list_for_each_entry(n, &kspd_notifylist, list)
+ 			n->kspd_sp_exit(SP_VPE);
+		sp_stopping = 1;
+
+		printk(KERN_DEBUG "KSPD got exit syscall from SP exitcode %d\n",
+		       generic.arg0);
+ 		break;
+
+ 	case MTSP_SYSCALL_OPEN:
+ 		generic.arg1 = translate_open_flags(generic.arg1);
+
+ 		vcwd = vpe_getcwd(SP_VPE);
+
+ 		/* change to the cwd of the process that loaded the SP program */
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		sys_chdir(vcwd);
+		set_fs(old_fs);
+
+ 		sc.cmd = __NR_open;
+
+		/* fall through */
+
+  	default:
+ 		if ((sc.cmd >= __NR_Linux) &&
+		    (sc.cmd <= (__NR_Linux +  __NR_Linux_syscalls)) )
+			cmd = sc.cmd;
+		else
+			cmd = translate_syscall_command(sc.cmd);
+
+		if (cmd >= 0) {
+			ret.retval = sp_syscall(cmd, generic.arg0, generic.arg1,
+			                        generic.arg2, generic.arg3);
+			ret.errno = errno;
+		} else
+ 			printk(KERN_WARNING
+			       "KSPD: Unknown SP syscall number %d\n", sc.cmd);
+		break;
+ 	} /* switch */
+
+	if (vpe_getuid(SP_VPE))
+		sp_setfsuidgid( 0, 0);
+
+	if ((rtlx_write(RTLX_CHANNEL_SYSIO, &ret, sizeof(struct mtsp_syscall_ret), 0))
+	    < sizeof(struct mtsp_syscall_ret))
+		printk("KSPD: sp_work_handle_request failed to send to SP\n");
+}
+
+static void sp_cleanup(void)
+{
+	struct files_struct *files = current->files;
+	int i, j;
+	struct fdtable *fdt;
+
+	j = 0;
+
+	/*
+	 * It is safe to dereference the fd table without RCU or
+	 * ->file_lock
+	 */
+	fdt = files_fdtable(files);
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+			break;
+		set = fdt->open_fds->fds_bits[j++];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&fdt->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+	}
+}
+
+static int channel_open = 0;
+
+/* the work handler */
+static void sp_work(void *data)
+{
+	if (!channel_open) {
+		if( rtlx_open(RTLX_CHANNEL_SYSIO, 1) != 0) {
+			printk("KSPD: unable to open sp channel\n");
+			sp_stopping = 1;
+		} else {
+			channel_open++;
+			printk(KERN_DEBUG "KSPD: SP channel opened\n");
+		}
+	} else {
+		/* wait for some data, allow it to sleep */
+		rtlx_read_poll(RTLX_CHANNEL_SYSIO, 1);
+
+		/* Check we haven't been woken because we are stopping */
+		if (!sp_stopping)
+			sp_work_handle_request();
+	}
+
+	if (!sp_stopping)
+		queue_work(workqueue, &work);
+	else
+		sp_cleanup();
+}
+
+static void startwork(int vpe)
+{
+	sp_stopping = channel_open = 0;
+
+	if (workqueue == NULL) {
+		if ((workqueue = create_singlethread_workqueue("kspd")) == NULL) {
+			printk(KERN_ERR "unable to start kspd\n");
+			return;
+		}
+
+		INIT_WORK(&work, sp_work, NULL);
+		queue_work(workqueue, &work);
+	} else
+		queue_work(workqueue, &work);
+
+}
+
+static void stopwork(int vpe)
+{
+	sp_stopping = 1;
+
+	printk(KERN_DEBUG "KSPD: SP stopping\n");
+}
+
+void kspd_notify(struct kspd_notifications *notify)
+{
+	list_add(&notify->list, &kspd_notifylist);
+}
+
+static struct vpe_notifications notify;
+static int kspd_module_init(void)
+{
+	INIT_LIST_HEAD(&kspd_notifylist);
+
+	notify.start = startwork;
+	notify.stop = stopwork;
+	vpe_notify(SP_VPE, &notify);
+
+	return 0;
+}
+
+static void kspd_module_exit(void)
+{
+
+}
+
+module_init(kspd_module_init);
+module_exit(kspd_module_exit);
+
+MODULE_DESCRIPTION("MIPS KSPD");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
+MODULE_LICENSE("GPL");
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 986a9cf..6179805 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -21,45 +21,44 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/moduleloader.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
-
 #include <asm/mipsmtregs.h>
-#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/vpe.h>
 #include <asm/rtlx.h>
-#include <asm/uaccess.h>
 
 #define RTLX_TARG_VPE 1
 
 static struct rtlx_info *rtlx;
 static int major;
 static char module_name[] = "rtlx";
-static struct irqaction irq;
-static int irq_num;
-
-static inline int spacefree(int read, int write, int size)
-{
-	if (read == write) {
-		/*
-		 * never fill the buffer completely, so indexes are always
-		 * equal if empty and only empty, or !equal if data available
-		 */
-		return size - 1;
-	}
-
-	return ((read + size - write) % size) - 1;
-}
 
 static struct chan_waitqueues {
 	wait_queue_head_t rt_queue;
 	wait_queue_head_t lx_queue;
+	int in_open;
 } channel_wqs[RTLX_CHANNELS];
 
+static struct irqaction irq;
+static int irq_num;
+static struct vpe_notifications notify;
+static int sp_stopping = 0;
+
 extern void *vpe_get_shared(int index);
 
 static void rtlx_dispatch(struct pt_regs *regs)
@@ -67,174 +66,298 @@
 	do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ, regs);
 }
 
+
+/* Interrupt handler may be called before rtlx_init has otherwise had
+   a chance to run.
+*/
 static irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	int i;
 
 	for (i = 0; i < RTLX_CHANNELS; i++) {
-		struct rtlx_channel *chan = &rtlx->channel[i];
-
-		if (chan->lx_read != chan->lx_write)
-			wake_up_interruptible(&channel_wqs[i].lx_queue);
+			wake_up(&channel_wqs[i].lx_queue);
+			wake_up(&channel_wqs[i].rt_queue);
 	}
 
 	return IRQ_HANDLED;
 }
 
-/* call when we have the address of the shared structure from the SP side. */
-static int rtlx_init(struct rtlx_info *rtlxi)
+static __attribute_used__ void dump_rtlx(void)
 {
 	int i;
 
+	printk("id 0x%lx state %d\n", rtlx->id, rtlx->state);
+
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		struct rtlx_channel *chan = &rtlx->channel[i];
+
+		printk(" rt_state %d lx_state %d buffer_size %d\n",
+		       chan->rt_state, chan->lx_state, chan->buffer_size);
+
+		printk(" rt_read %d rt_write %d\n",
+		       chan->rt_read, chan->rt_write);
+
+		printk(" lx_read %d lx_write %d\n",
+		       chan->lx_read, chan->lx_write);
+
+		printk(" rt_buffer <%s>\n", chan->rt_buffer);
+		printk(" lx_buffer <%s>\n", chan->lx_buffer);
+	}
+}
+
+/* call when we have the address of the shared structure from the SP side. */
+static int rtlx_init(struct rtlx_info *rtlxi)
+{
 	if (rtlxi->id != RTLX_ID) {
-		printk(KERN_WARNING "no valid RTLX id at 0x%p\n", rtlxi);
+		printk(KERN_ERR "no valid RTLX id at 0x%p 0x%x\n", rtlxi, rtlxi->id);
 		return -ENOEXEC;
 	}
 
-	/* initialise the wait queues */
-	for (i = 0; i < RTLX_CHANNELS; i++) {
-		init_waitqueue_head(&channel_wqs[i].rt_queue);
-		init_waitqueue_head(&channel_wqs[i].lx_queue);
-	}
-
-	/* set up for interrupt handling */
-	memset(&irq, 0, sizeof(struct irqaction));
-
-	if (cpu_has_vint)
-		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
-
-	irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
-	irq.handler = rtlx_interrupt;
-	irq.flags = SA_INTERRUPT;
-	irq.name = "RTLX";
-	irq.dev_id = rtlx;
-	setup_irq(irq_num, &irq);
-
 	rtlx = rtlxi;
 
 	return 0;
 }
 
-/* only allow one open process at a time to open each channel */
-static int rtlx_open(struct inode *inode, struct file *filp)
+/* notifications */
+static void starting(int vpe)
 {
-	int minor, ret;
-	struct rtlx_channel *chan;
+	int i;
+	sp_stopping = 0;
 
-	/* assume only 1 device at the mo. */
-	minor = MINOR(inode->i_rdev);
+	/* force a reload of rtlx */
+	rtlx=NULL;
+
+	/* wake up any sleeping rtlx_open's */
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		wake_up_interruptible(&channel_wqs[i].lx_queue);
+}
+
+static void stopping(int vpe)
+{
+	int i;
+
+	sp_stopping = 1;
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		wake_up_interruptible(&channel_wqs[i].lx_queue);
+}
+
+
+int rtlx_open(int index, int can_sleep)
+{
+	int ret;
+	struct rtlx_channel *chan;
+	volatile struct rtlx_info **p;
+
+	if (index >= RTLX_CHANNELS) {
+		printk(KERN_DEBUG "rtlx_open index out of range\n");
+		return -ENOSYS;
+	}
+
+	if (channel_wqs[index].in_open) {
+		printk(KERN_DEBUG "rtlx_open channel %d already opened\n", index);
+		return -EBUSY;
+	}
+
+	channel_wqs[index].in_open++;
 
 	if (rtlx == NULL) {
-		struct rtlx_info **p;
 		if( (p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
-			printk(KERN_ERR "vpe_get_shared is NULL. "
-			       "Has an SP program been loaded?\n");
-			return -EFAULT;
+			if (can_sleep) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				/* go to sleep */
+				add_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				set_current_state(TASK_INTERRUPTIBLE);
+				while ((p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
+					schedule();
+					set_current_state(TASK_INTERRUPTIBLE);
+				}
+
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				/* back running */
+			} else {
+				printk( KERN_DEBUG "No SP program loaded, and device "
+					"opened with O_NONBLOCK\n");
+				channel_wqs[index].in_open = 0;
+				return -ENOSYS;
+			}
 		}
 
 		if (*p == NULL) {
-			printk(KERN_ERR "vpe_shared %p %p\n", p, *p);
-			return -EFAULT;
+			if (can_sleep) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				/* go to sleep */
+				add_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				set_current_state(TASK_INTERRUPTIBLE);
+				while (*p == NULL) {
+					schedule();
+
+					/* reset task state to interruptable otherwise
+					   we'll whizz round here like a very fast loopy
+					   thing. schedule() appears to return with state
+					   set to TASK_RUNNING.
+
+					   If the loaded SP program, for whatever reason,
+					   doesn't set up the shared structure *p will never
+					   become true. So whoever connected to either /dev/rt?
+					   or if it was kspd, will then take up rather a lot of
+					   processor cycles.
+					*/
+
+					set_current_state(TASK_INTERRUPTIBLE);
+				}
+
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				/* back running */
+			}
+			else {
+				printk(" *vpe_get_shared is NULL. "
+				       "Has an SP program been loaded?\n");
+				channel_wqs[index].in_open = 0;
+				return -ENOSYS;
+			}
 		}
 
-		if ((ret = rtlx_init(*p)) < 0)
-			return ret;
+		if ((unsigned int)*p < KSEG0) {
+			printk(KERN_WARNING "vpe_get_shared returned an invalid pointer "
+			       "maybe an error code %d\n", (int)*p);
+ 			channel_wqs[index].in_open = 0;
+			return -ENOSYS;
+		}
+
+ 		if ((ret = rtlx_init(*p)) < 0) {
+ 			channel_wqs[index].in_open = 0;
+  			return ret;
+ 		}
 	}
 
-	chan = &rtlx->channel[minor];
+	chan = &rtlx->channel[index];
 
-	if (test_and_set_bit(RTLX_STATE_OPENED, &chan->lx_state))
-		return -EBUSY;
+ 	if (chan->lx_state == RTLX_STATE_OPENED) {
+ 		channel_wqs[index].in_open = 0;
+  		return -EBUSY;
+ 	}
 
+  	chan->lx_state = RTLX_STATE_OPENED;
+ 	channel_wqs[index].in_open = 0;
 	return 0;
 }
 
-static int rtlx_release(struct inode *inode, struct file *filp)
+int rtlx_release(int index)
 {
-	int minor = MINOR(inode->i_rdev);
-
-	clear_bit(RTLX_STATE_OPENED, &rtlx->channel[minor].lx_state);
-	smp_mb__after_clear_bit();
-
+	rtlx->channel[index].lx_state = RTLX_STATE_UNUSED;
 	return 0;
 }
 
-static unsigned int rtlx_poll(struct file *file, poll_table * wait)
+unsigned int rtlx_read_poll(int index, int can_sleep)
 {
-	int minor;
-	unsigned int mask = 0;
-	struct rtlx_channel *chan;
+ 	struct rtlx_channel *chan;
 
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
-	chan = &rtlx->channel[minor];
+ 	if (rtlx == NULL)
+ 		return 0;
 
-	poll_wait(file, &channel_wqs[minor].rt_queue, wait);
-	poll_wait(file, &channel_wqs[minor].lx_queue, wait);
+ 	chan = &rtlx->channel[index];
 
 	/* data available to read? */
-	if (chan->lx_read != chan->lx_write)
-		mask |= POLLIN | POLLRDNORM;
+	if (chan->lx_read == chan->lx_write) {
+		if (can_sleep) {
+			DECLARE_WAITQUEUE(wait, current);
 
-	/* space to write */
-	if (spacefree(chan->rt_read, chan->rt_write, chan->buffer_size))
-		mask |= POLLOUT | POLLWRNORM;
+			/* go to sleep */
+			add_wait_queue(&channel_wqs[index].lx_queue, &wait);
 
-	return mask;
+			set_current_state(TASK_INTERRUPTIBLE);
+			while (chan->lx_read == chan->lx_write) {
+				schedule();
+
+				set_current_state(TASK_INTERRUPTIBLE);
+
+				if (sp_stopping) {
+					set_current_state(TASK_RUNNING);
+					remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+					return 0;
+				}
+			}
+
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+			/* back running */
+		}
+		else
+			return 0;
+	}
+
+	return (chan->lx_write + chan->buffer_size - chan->lx_read)
+	       % chan->buffer_size;
 }
 
-static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
-			 loff_t * ppos)
+static inline int write_spacefree(int read, int write, int size)
 {
-	unsigned long failed;
-	size_t fl = 0L;
-	int minor;
-	struct rtlx_channel *lx;
-	DECLARE_WAITQUEUE(wait, current);
-
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
-	lx = &rtlx->channel[minor];
-
-	/* data available? */
-	if (lx->lx_write == lx->lx_read) {
-		if (file->f_flags & O_NONBLOCK)
-			return 0;	/* -EAGAIN makes cat whinge */
-
-		/* go to sleep */
-		add_wait_queue(&channel_wqs[minor].lx_queue, &wait);
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		while (lx->lx_write == lx->lx_read)
-			schedule();
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&channel_wqs[minor].lx_queue, &wait);
-
-		/* back running */
+	if (read == write) {
+		/*
+		 * Never fill the buffer completely, so indexes are always
+		 * equal if empty and only empty, or !equal if data available
+		 */
+		return size - 1;
 	}
 
+	return ((read + size - write) % size) - 1;
+}
+
+unsigned int rtlx_write_poll(int index)
+{
+	struct rtlx_channel *chan = &rtlx->channel[index];
+	return write_spacefree(chan->rt_read, chan->rt_write, chan->buffer_size);
+}
+
+static inline void copy_to(void *dst, void *src, size_t count, int user)
+{
+	if (user)
+		copy_to_user(dst, src, count);
+	else
+		memcpy(dst, src, count);
+}
+
+static inline void copy_from(void *dst, void *src, size_t count, int user)
+{
+	if (user)
+		copy_from_user(dst, src, count);
+	else
+		memcpy(dst, src, count);
+}
+
+ssize_t rtlx_read(int index, void *buff, size_t count, int user)
+{
+	size_t fl = 0L;
+	struct rtlx_channel *lx;
+
+	if (rtlx == NULL)
+		return -ENOSYS;
+
+	lx = &rtlx->channel[index];
+
 	/* find out how much in total */
 	count = min(count,
-		    (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
+		     (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read)
+		     % lx->buffer_size);
 
 	/* then how much from the read pointer onwards */
-	fl = min(count, (size_t)lx->buffer_size - lx->lx_read);
+	fl = min( count, (size_t)lx->buffer_size - lx->lx_read);
 
-	failed = copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
-	if (failed) {
-		count = fl - failed;
-		goto out;
-	}
+	copy_to(buff, &lx->lx_buffer[lx->lx_read], fl, user);
 
 	/* and if there is anything left at the beginning of the buffer */
-	if (count - fl) {
-		failed = copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
-		if (failed) {
-			count -= failed;
-			goto out;
-		}
-	}
+	if ( count - fl )
+		copy_to (buff + fl, lx->lx_buffer, count - fl, user);
 
-out:
 	/* update the index */
 	lx->lx_read += count;
 	lx->lx_read %= lx->buffer_size;
@@ -242,20 +365,101 @@
 	return count;
 }
 
-static ssize_t rtlx_write(struct file *file, const char __user * buffer,
-			  size_t count, loff_t * ppos)
+ssize_t rtlx_write(int index, void *buffer, size_t count, int user)
 {
-	unsigned long failed;
-	int minor;
 	struct rtlx_channel *rt;
 	size_t fl;
+
+	if (rtlx == NULL)
+		return(-ENOSYS);
+
+	rt = &rtlx->channel[index];
+
+	/* total number of bytes to copy */
+	count = min(count,
+		    (size_t)write_spacefree(rt->rt_read, rt->rt_write,
+					    rt->buffer_size));
+
+	/* first bit from write pointer to the end of the buffer, or count */
+	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
+
+	copy_from (&rt->rt_buffer[rt->rt_write], buffer, fl, user);
+
+	/* if there's any left copy to the beginning of the buffer */
+	if( count - fl )
+		copy_from (rt->rt_buffer, buffer + fl, count - fl, user);
+
+	rt->rt_write += count;
+	rt->rt_write %= rt->buffer_size;
+
+	return(count);
+}
+
+
+static int file_open(struct inode *inode, struct file *filp)
+{
+	int minor = MINOR(inode->i_rdev);
+
+	return rtlx_open(minor, (filp->f_flags & O_NONBLOCK) ? 0 : 1);
+}
+
+static int file_release(struct inode *inode, struct file *filp)
+{
+	int minor;
+	minor = MINOR(inode->i_rdev);
+
+	return rtlx_release(minor);
+}
+
+static unsigned int file_poll(struct file *file, poll_table * wait)
+{
+	int minor;
+	unsigned int mask = 0;
+
+	minor = MINOR(file->f_dentry->d_inode->i_rdev);
+
+	poll_wait(file, &channel_wqs[minor].rt_queue, wait);
+	poll_wait(file, &channel_wqs[minor].lx_queue, wait);
+
+	if (rtlx == NULL)
+		return 0;
+
+	/* data available to read? */
+	if (rtlx_read_poll(minor, 0))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* space to write */
+	if (rtlx_write_poll(minor))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+static ssize_t file_read(struct file *file, char __user * buffer, size_t count,
+			 loff_t * ppos)
+{
+	int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+
+	/* data available? */
+	if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1)) {
+		return 0;	// -EAGAIN makes cat whinge
+	}
+
+	return rtlx_read(minor, buffer, count, 1);
+}
+
+static ssize_t file_write(struct file *file, const char __user * buffer,
+			  size_t count, loff_t * ppos)
+{
+	int minor;
+	struct rtlx_channel *rt;
 	DECLARE_WAITQUEUE(wait, current);
 
 	minor = MINOR(file->f_dentry->d_inode->i_rdev);
 	rt = &rtlx->channel[minor];
 
 	/* any space left... */
-	if (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size)) {
+	if (!rtlx_write_poll(minor)) {
 
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
@@ -263,61 +467,64 @@
 		add_wait_queue(&channel_wqs[minor].rt_queue, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		while (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size))
+		while (!rtlx_write_poll(minor))
 			schedule();
 
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&channel_wqs[minor].rt_queue, &wait);
 	}
 
-	/* total number of bytes to copy */
-	count = min(count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
-
-	/* first bit from write pointer to the end of the buffer, or count */
-	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
-
-	failed = copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
-	if (failed) {
-		count = fl - failed;
-		goto out;
-	}
-
-	/* if there's any left copy to the beginning of the buffer */
-	if (count - fl) {
-		failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
-		if (failed) {
-			count -= failed;
-			goto out;
-		}
-	}
-
-out:
-	rt->rt_write += count;
-	rt->rt_write %= rt->buffer_size;
-
-	return count;
+	return rtlx_write(minor, (void *)buffer, count, 1);
 }
 
 static struct file_operations rtlx_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rtlx_open,
-	.release	= rtlx_release,
-	.write		= rtlx_write,
-	.read		= rtlx_read,
-	.poll		= rtlx_poll
+	.owner =   THIS_MODULE,
+	.open =    file_open,
+	.release = file_release,
+	.write =   file_write,
+	.read =    file_read,
+	.poll =    file_poll
 };
 
+static struct irqaction rtlx_irq = {
+	.handler	= rtlx_interrupt,
+	.flags		= SA_INTERRUPT,
+	.name		= "RTLX",
+};
+
+static int rtlx_irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
+
 static char register_chrdev_failed[] __initdata =
 	KERN_ERR "rtlx_module_init: unable to register device\n";
 
-static int __init rtlx_module_init(void)
+static int rtlx_module_init(void)
 {
+	int i;
+
 	major = register_chrdev(0, module_name, &rtlx_fops);
 	if (major < 0) {
 		printk(register_chrdev_failed);
 		return major;
 	}
 
+	/* initialise the wait queues */
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		init_waitqueue_head(&channel_wqs[i].rt_queue);
+		init_waitqueue_head(&channel_wqs[i].lx_queue);
+		channel_wqs[i].in_open = 0;
+	}
+
+	/* set up notifiers */
+	notify.start = starting;
+	notify.stop = stopping;
+	vpe_notify(RTLX_TARG_VPE, &notify);
+
+	if (cpu_has_vint)
+		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
+
+	rtlx_irq.dev_id = rtlx;
+	setup_irq(rtlx_irq_num, &rtlx_irq);
+
 	return 0;
 }
 
@@ -330,5 +537,5 @@
 module_exit(rtlx_module_exit);
 
 MODULE_DESCRIPTION("MIPS RTLX");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc.");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index ae83b75..80ffaa6 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -13,7 +13,6 @@
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
  */
 
 /*
@@ -27,11 +26,8 @@
  *
  * To load and run, simply cat a SP 'program file' to /dev/vpe1.
  * i.e cat spapp >/dev/vpe1.
- *
- * You'll need to have the following device files.
- * mknod /dev/vpe0 c 63 0
- * mknod /dev/vpe1 c 63 1
  */
+
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -55,6 +51,8 @@
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/vpe.h>
+#include <asm/kspd.h>
 
 typedef void *vpe_handle;
 
@@ -68,6 +66,11 @@
 static char module_name[] = "vpe";
 static int major;
 
+#ifdef CONFIG_MIPS_APSP_KSPD
+ static struct kspd_notifications kspd_events;
+static int kspd_events_reqd = 0;
+#endif
+
 /* grab the likely amount of memory we will need. */
 #ifdef CONFIG_MIPS_VPE_LOADER_TOM
 #define P_SIZE (2 * 1024 * 1024)
@@ -76,7 +79,10 @@
 #define P_SIZE (256 * 1024)
 #endif
 
+extern unsigned long physical_memsize;
+
 #define MAX_VPES 16
+#define VPE_PATH_MAX 256
 
 enum vpe_state {
 	VPE_STATE_UNUSED = 0,
@@ -102,6 +108,8 @@
 	unsigned long len;
 	char *pbuffer;
 	unsigned long plen;
+	unsigned int uid, gid;
+	char cwd[VPE_PATH_MAX];
 
 	unsigned long __start;
 
@@ -113,6 +121,9 @@
 
 	/* shared symbol address */
 	void *shared_ptr;
+
+	/* the list of who wants to know when something major happens */
+	struct list_head notify;
 };
 
 struct tc {
@@ -138,7 +149,7 @@
 } vpecontrol;
 
 static void release_progmem(void *ptr);
-static void dump_vpe(struct vpe * v);
+/* static __attribute_used__ void dump_vpe(struct vpe * v); */
 extern void save_gp_address(unsigned int secbase, unsigned int rel);
 
 /* get the vpe associated with this minor */
@@ -146,12 +157,14 @@
 {
 	struct vpe *v;
 
+	if (!cpu_has_mipsmt)
+		return NULL;
+
 	list_for_each_entry(v, &vpecontrol.vpe_list, list) {
 		if (v->minor == minor)
 			return v;
 	}
 
-	printk(KERN_DEBUG "VPE: get_vpe minor %d not found\n", minor);
 	return NULL;
 }
 
@@ -165,8 +178,6 @@
 			return t;
 	}
 
-	printk(KERN_DEBUG "VPE: get_tc index %d not found\n", index);
-
 	return NULL;
 }
 
@@ -179,8 +190,6 @@
 			return t;
 	}
 
-	printk(KERN_DEBUG "VPE: All TC's are in use\n");
-
 	return NULL;
 }
 
@@ -190,13 +199,13 @@
 	struct vpe *v;
 
 	if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
-		printk(KERN_WARNING "VPE: alloc_vpe no mem\n");
 		return NULL;
 	}
 
 	INIT_LIST_HEAD(&v->tc);
 	list_add_tail(&v->list, &vpecontrol.vpe_list);
 
+	INIT_LIST_HEAD(&v->notify);
 	v->minor = minor;
 	return v;
 }
@@ -207,7 +216,6 @@
 	struct tc *t;
 
 	if ((t = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
-		printk(KERN_WARNING "VPE: alloc_tc no mem\n");
 		return NULL;
 	}
 
@@ -236,20 +244,16 @@
 	printk("config3 0x%lx MT %ld\n", val,
 	       (val & CONFIG3_MT) >> CONFIG3_MT_SHIFT);
 
-	val = read_c0_mvpconf0();
-	printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
-	       (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
-	       val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
-
 	val = read_c0_mvpcontrol();
 	printk("MVPControl 0x%lx, STLB %ld VPC %ld EVP %ld\n", val,
 	       (val & MVPCONTROL_STLB) >> MVPCONTROL_STLB_SHIFT,
 	       (val & MVPCONTROL_VPC) >> MVPCONTROL_VPC_SHIFT,
 	       (val & MVPCONTROL_EVP));
 
-	val = read_c0_vpeconf0();
-	printk("VPEConf0 0x%lx MVP %ld\n", val,
-	       (val & VPECONF0_MVP) >> VPECONF0_MVP_SHIFT);
+	val = read_c0_mvpconf0();
+	printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
+	       (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
+	       val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
 }
 
 /* Find some VPE program space  */
@@ -354,9 +358,9 @@
 	}
 
 	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_ERR
-		       "apply_r_mips_gprel16: relative address out of range 0x%x %d\n",
-		       rel, rel);
+		printk(KERN_DEBUG "VPE loader: apply_r_mips_gprel16: "
+		       "relative address 0x%x out of range of gp register\n",
+		       rel);
 		return -ENOEXEC;
 	}
 
@@ -374,8 +378,8 @@
 	rel -= 1;		// and one instruction less due to the branch delay slot.
 
 	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_ERR
-		       "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
+		printk(KERN_DEBUG "VPE loader: "
+ 		       "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
 		return -ENOEXEC;
 	}
 
@@ -396,7 +400,8 @@
 			   Elf32_Addr v)
 {
 	if (v % 4) {
-		printk(KERN_ERR "module %s: dangerous relocation mod4\n", me->name);
+		printk(KERN_DEBUG "VPE loader: apply_r_mips_26 "
+		       " unaligned relocation\n");
 		return -ENOEXEC;
 	}
 
@@ -459,12 +464,13 @@
 			/*
 			 * The value for the HI16 had best be the same.
 			 */
-			if (v != l->value) {
-				printk("%d != %d\n", v, l->value);
-				goto out_danger;
+ 			if (v != l->value) {
+				printk(KERN_DEBUG "VPE loader: "
+				       "apply_r_mips_lo16/hi16: 	"
+				       "inconsistent value information\n");
+				return -ENOEXEC;
 			}
 
-
 			/*
 			 * Do the HI16 relocation.  Note that we actually don't
 			 * need to know anything about the LO16 itself, except
@@ -500,11 +506,6 @@
 	*location = insnlo;
 
 	return 0;
-
-out_danger:
-	printk(KERN_ERR "module %s: dangerous " "relocation\n", me->name);
-
-	return -ENOEXEC;
 }
 
 static int (*reloc_handlers[]) (struct module *me, uint32_t *location,
@@ -518,6 +519,15 @@
 	[R_MIPS_PC16] = apply_r_mips_pc16
 };
 
+static char *rstrs[] = {
+    	[R_MIPS_NONE]	= "MIPS_NONE",
+	[R_MIPS_32]	= "MIPS_32",
+	[R_MIPS_26]	= "MIPS_26",
+	[R_MIPS_HI16]	= "MIPS_HI16",
+	[R_MIPS_LO16]	= "MIPS_LO16",
+	[R_MIPS_GPREL16] = "MIPS_GPREL16",
+	[R_MIPS_PC16] = "MIPS_PC16"
+};
 
 int apply_relocations(Elf32_Shdr *sechdrs,
 		      const char *strtab,
@@ -552,15 +562,13 @@
 
 		res = reloc_handlers[ELF32_R_TYPE(r_info)](me, location, v);
 		if( res ) {
-			printk(KERN_DEBUG
-			       "relocation error 0x%x sym refer <%s> value 0x%x "
-			       "type 0x%x r_info 0x%x\n",
-			       (unsigned int)location, strtab + sym->st_name, v,
-			       r_info, ELF32_R_TYPE(r_info));
-		}
-
-		if (res)
+			char *r = rstrs[ELF32_R_TYPE(r_info)];
+		    	printk(KERN_WARNING "VPE loader: .text+0x%x "
+			       "relocation type %s for symbol \"%s\" failed\n",
+			       rel[i].r_offset, r ? r : "UNKNOWN",
+			       strtab + sym->st_name);
 			return res;
+		}
 	}
 
 	return 0;
@@ -576,7 +584,7 @@
 
 
 /* Change all symbols so that sh_value encodes the pointer directly. */
-static int simplify_symbols(Elf_Shdr * sechdrs,
+static void simplify_symbols(Elf_Shdr * sechdrs,
 			    unsigned int symindex,
 			    const char *strtab,
 			    const char *secstrings,
@@ -585,18 +593,21 @@
 	Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
 	unsigned long secbase, bssbase = 0;
 	unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
-	int ret = 0, size;
+	int size;
 
 	/* find the .bss section for COMMON symbols */
 	for (i = 0; i < nsecs; i++) {
-		if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0)
+		if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0) {
 			bssbase = sechdrs[i].sh_addr;
+			break;
+		}
 	}
 
 	for (i = 1; i < n; i++) {
 		switch (sym[i].st_shndx) {
 		case SHN_COMMON:
-			/* Allocate space for the symbol in the .bss section. st_value is currently size.
+			/* Allocate space for the symbol in the .bss section.
+			   st_value is currently size.
 			   We want it to have the address of the symbol. */
 
 			size = sym[i].st_value;
@@ -614,11 +625,9 @@
 			break;
 
 		case SHN_MIPS_SCOMMON:
-
-			printk(KERN_DEBUG
-			       "simplify_symbols: ignoring SHN_MIPS_SCOMMON symbol <%s> st_shndx %d\n",
-			       strtab + sym[i].st_name, sym[i].st_shndx);
-
+			printk(KERN_DEBUG "simplify_symbols: ignoring SHN_MIPS_SCOMMON"
+			       "symbol <%s> st_shndx %d\n", strtab + sym[i].st_name,
+			       sym[i].st_shndx);
 			// .sbss section
 			break;
 
@@ -632,10 +641,7 @@
 			sym[i].st_value += secbase;
 			break;
 		}
-
 	}
-
-	return ret;
 }
 
 #ifdef DEBUG_ELFLOADER
@@ -655,9 +661,26 @@
 
 static void dump_tc(struct tc *t)
 {
-	printk(KERN_WARNING "VPE: TC index %d TCStatus 0x%lx halt 0x%lx\n",
-	       t->index, read_tc_c0_tcstatus(), read_tc_c0_tchalt());
-	printk(KERN_WARNING "VPE: tcrestart 0x%lx\n", read_tc_c0_tcrestart());
+  	unsigned long val;
+
+  	settc(t->index);
+ 	printk(KERN_DEBUG "VPE loader: TC index %d targtc %ld "
+ 	       "TCStatus 0x%lx halt 0x%lx\n",
+  	       t->index, read_c0_vpecontrol() & VPECONTROL_TARGTC,
+  	       read_tc_c0_tcstatus(), read_tc_c0_tchalt());
+
+ 	printk(KERN_DEBUG " tcrestart 0x%lx\n", read_tc_c0_tcrestart());
+ 	printk(KERN_DEBUG " tcbind 0x%lx\n", read_tc_c0_tcbind());
+
+  	val = read_c0_vpeconf0();
+ 	printk(KERN_DEBUG " VPEConf0 0x%lx MVP %ld\n", val,
+  	       (val & VPECONF0_MVP) >> VPECONF0_MVP_SHIFT);
+
+ 	printk(KERN_DEBUG " c0 status 0x%lx\n", read_vpe_c0_status());
+ 	printk(KERN_DEBUG " c0 cause 0x%lx\n", read_vpe_c0_cause());
+
+ 	printk(KERN_DEBUG " c0 badvaddr 0x%lx\n", read_vpe_c0_badvaddr());
+ 	printk(KERN_DEBUG " c0 epc 0x%lx\n", read_vpe_c0_epc());
 }
 
 static void dump_tclist(void)
@@ -672,96 +695,108 @@
 /* We are prepared so configure and start the VPE... */
 int vpe_run(struct vpe * v)
 {
-	unsigned long val;
+	struct vpe_notifications *n;
+	unsigned long val, dmt_flag;
 	struct tc *t;
 
 	/* check we are the Master VPE */
 	val = read_c0_vpeconf0();
 	if (!(val & VPECONF0_MVP)) {
 		printk(KERN_WARNING
-		       "VPE: only Master VPE's are allowed to configure MT\n");
+		       "VPE loader: only Master VPE's are allowed to configure MT\n");
 		return -1;
 	}
 
 	/* disable MT (using dvpe) */
 	dvpe();
 
+	if (!list_empty(&v->tc)) {
+                if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
+                        printk(KERN_WARNING "VPE loader: TC %d is already in use.\n",
+                               t->index);
+                        return -ENOEXEC;
+                }
+        } else {
+                printk(KERN_WARNING "VPE loader: No TC's associated with VPE %d\n",
+                       v->minor);
+                return -ENOEXEC;
+        }
+
 	/* Put MVPE's into 'configuration state' */
 	set_c0_mvpcontrol(MVPCONTROL_VPC);
 
-	if (!list_empty(&v->tc)) {
-		if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
-			printk(KERN_WARNING "VPE: TC %d is already in use.\n",
-			       t->index);
-			return -ENOEXEC;
-		}
-	} else {
-		printk(KERN_WARNING "VPE: No TC's associated with VPE %d\n",
-		       v->minor);
-		return -ENOEXEC;
-	}
-
 	settc(t->index);
 
-	val = read_vpe_c0_vpeconf0();
-
 	/* should check it is halted, and not activated */
 	if ((read_tc_c0_tcstatus() & TCSTATUS_A) || !(read_tc_c0_tchalt() & TCHALT_H)) {
-		printk(KERN_WARNING "VPE: TC %d is already doing something!\n",
+		printk(KERN_WARNING "VPE loader: TC %d is already doing something!\n",
 		       t->index);
-
 		dump_tclist();
 		return -ENOEXEC;
 	}
 
+	/*
+	 * Disable multi-threaded execution whilst we activate, clear the
+	 * halt bit and bound the tc to the other VPE...
+	 */
+	dmt_flag = dmt();
+
 	/* Write the address we want it to start running from in the TCPC register. */
 	write_tc_c0_tcrestart((unsigned long)v->__start);
-
-	/* write the sivc_info address to tccontext */
 	write_tc_c0_tccontext((unsigned long)0);
-
-	/* Set up the XTC bit in vpeconf0 to point at our tc */
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | (t->index << VPECONF0_XTC_SHIFT));
-
-	/* mark the TC as activated, not interrupt exempt and not dynamically allocatable */
+	/*
+	 * Mark the TC as activated, not interrupt exempt and not dynamically
+	 * allocatable
+	 */
 	val = read_tc_c0_tcstatus();
 	val = (val & ~(TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A;
 	write_tc_c0_tcstatus(val);
 
 	write_tc_c0_tchalt(read_tc_c0_tchalt() & ~TCHALT_H);
 
-	/* set up VPE1 */
-	write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);	// no multiple TC's
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);	// enable this VPE
-
 	/*
 	 * The sde-kit passes 'memsize' to __start in $a3, so set something
-	 * here...
-	 * Or set $a3 (register 7) to zero and define DFLT_STACK_SIZE and
+	 * here...  Or set $a3 to zero and define DFLT_STACK_SIZE and
 	 * DFLT_HEAP_SIZE when you compile your program
 	 */
+ 	mttgpr(7, physical_memsize);
 
-	mttgpr(7, 0);
 
-	/* set config to be the same as vpe0, particularly kseg0 coherency alg */
-	write_vpe_c0_config(read_c0_config());
+	/* set up VPE1 */
+	/*
+	 * bind the TC to VPE 1 as late as possible so we only have the final
+	 * VPE registers to set up, and so an EJTAG probe can trigger on it
+	 */
+ 	write_tc_c0_tcbind((read_tc_c0_tcbind() & ~TCBIND_CURVPE) | v->minor);
+
+        /* Set up the XTC bit in vpeconf0 to point at our tc */
+        write_vpe_c0_vpeconf0( (read_vpe_c0_vpeconf0() & ~(VPECONF0_XTC))
+                               | (t->index << VPECONF0_XTC_SHIFT));
+
+        /* enable this VPE */
+        write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
 
 	/* clear out any left overs from a previous program */
+	write_vpe_c0_status(0);
 	write_vpe_c0_cause(0);
 
 	/* take system out of configuration state */
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
-	/* clear interrupts enabled IE, ERL, EXL, and KSU from c0 status */
-	write_vpe_c0_status(read_vpe_c0_status() & ~(ST0_ERL | ST0_KSU | ST0_IE | ST0_EXL));
+	/* now safe to re-enable multi-threading */
+	emt(dmt_flag);
 
 	/* set it running */
 	evpe(EVPE_ENABLE);
 
+	list_for_each_entry(n, &v->notify, list) {
+		n->start(v->minor);
+	}
+
 	return 0;
 }
 
-static unsigned long find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
+static int find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
 				      unsigned int symindex, const char *strtab,
 				      struct module *mod)
 {
@@ -778,26 +813,28 @@
 		}
 	}
 
+	if ( (v->__start == 0) || (v->shared_ptr == NULL))
+		return -1;
+
 	return 0;
 }
 
 /*
- * Allocates a VPE with some program code space(the load address), copies
- * the contents of the program (p)buffer performing relocatations/etc,
- * free's it when finished.
-*/
+ * Allocates a VPE with some program code space(the load address), copies the
+ * contents of the program (p)buffer performing relocatations/etc, free's it
+ * when finished.
+ */
 int vpe_elfload(struct vpe * v)
 {
 	Elf_Ehdr *hdr;
 	Elf_Shdr *sechdrs;
 	long err = 0;
 	char *secstrings, *strtab = NULL;
-	unsigned int len, i, symindex = 0, strindex = 0;
-
+	unsigned int len, i, symindex = 0, strindex = 0, relocate = 0;
 	struct module mod;	// so we can re-use the relocations code
 
 	memset(&mod, 0, sizeof(struct module));
-	strcpy(mod.name, "VPE dummy prog module");
+	strcpy(mod.name, "VPE loader");
 
 	hdr = (Elf_Ehdr *) v->pbuffer;
 	len = v->plen;
@@ -805,16 +842,22 @@
 	/* Sanity checks against insmoding binaries or wrong arch,
 	   weird elf version */
 	if (memcmp(hdr->e_ident, ELFMAG, 4) != 0
-	    || hdr->e_type != ET_REL || !elf_check_arch(hdr)
+	    || (hdr->e_type != ET_REL && hdr->e_type != ET_EXEC)
+	    || !elf_check_arch(hdr)
 	    || hdr->e_shentsize != sizeof(*sechdrs)) {
 		printk(KERN_WARNING
-		       "VPE program, wrong arch or weird elf version\n");
+		       "VPE loader: program wrong arch or weird elf version\n");
 
 		return -ENOEXEC;
 	}
 
+	if (hdr->e_type == ET_REL)
+		relocate = 1;
+
 	if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
-		printk(KERN_ERR "VPE program length %u truncated\n", len);
+		printk(KERN_ERR "VPE loader: program length %u truncated\n",
+		       len);
+
 		return -ENOEXEC;
 	}
 
@@ -826,82 +869,126 @@
 	/* And these should exist, but gcc whinges if we don't init them */
 	symindex = strindex = 0;
 
-	for (i = 1; i < hdr->e_shnum; i++) {
+	if (relocate) {
+		for (i = 1; i < hdr->e_shnum; i++) {
+			if (sechdrs[i].sh_type != SHT_NOBITS
+			    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
+				printk(KERN_ERR "VPE program length %u truncated\n",
+				       len);
+				return -ENOEXEC;
+			}
 
-		if (sechdrs[i].sh_type != SHT_NOBITS
-		    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
-			printk(KERN_ERR "VPE program length %u truncated\n",
-			       len);
-			return -ENOEXEC;
+			/* Mark all sections sh_addr with their address in the
+			   temporary image. */
+			sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+
+			/* Internal symbols and strings. */
+			if (sechdrs[i].sh_type == SHT_SYMTAB) {
+				symindex = i;
+				strindex = sechdrs[i].sh_link;
+				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+			}
 		}
-
-		/* Mark all sections sh_addr with their address in the
-		   temporary image. */
-		sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
-
-		/* Internal symbols and strings. */
-		if (sechdrs[i].sh_type == SHT_SYMTAB) {
-			symindex = i;
-			strindex = sechdrs[i].sh_link;
-			strtab = (char *)hdr + sechdrs[strindex].sh_offset;
-		}
+		layout_sections(&mod, hdr, sechdrs, secstrings);
 	}
 
-	layout_sections(&mod, hdr, sechdrs, secstrings);
-
 	v->load_addr = alloc_progmem(mod.core_size);
 	memset(v->load_addr, 0, mod.core_size);
 
-	printk("VPE elf_loader: loading to %p\n", v->load_addr);
+	printk("VPE loader: loading to %p\n", v->load_addr);
 
-	for (i = 0; i < hdr->e_shnum; i++) {
-		void *dest;
+	if (relocate) {
+		for (i = 0; i < hdr->e_shnum; i++) {
+			void *dest;
 
-		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
+			if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+				continue;
 
-		dest = v->load_addr + sechdrs[i].sh_entsize;
+			dest = v->load_addr + sechdrs[i].sh_entsize;
 
-		if (sechdrs[i].sh_type != SHT_NOBITS)
-			memcpy(dest, (void *)sechdrs[i].sh_addr,
-			       sechdrs[i].sh_size);
-		/* Update sh_addr to point to copy in image. */
-		sechdrs[i].sh_addr = (unsigned long)dest;
-	}
+			if (sechdrs[i].sh_type != SHT_NOBITS)
+				memcpy(dest, (void *)sechdrs[i].sh_addr,
+				       sechdrs[i].sh_size);
+			/* Update sh_addr to point to copy in image. */
+			sechdrs[i].sh_addr = (unsigned long)dest;
 
-	/* Fix up syms, so that st_value is a pointer to location. */
-	err =
-		simplify_symbols(sechdrs, symindex, strtab, secstrings,
-				 hdr->e_shnum, &mod);
-	if (err < 0) {
-		printk(KERN_WARNING "VPE: unable to simplify symbols\n");
-		goto cleanup;
-	}
+			printk(KERN_DEBUG " section sh_name %s sh_addr 0x%x\n",
+			       secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr);
+		}
 
-	/* Now do relocations. */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		const char *strtab = (char *)sechdrs[strindex].sh_addr;
-		unsigned int info = sechdrs[i].sh_info;
+ 		/* Fix up syms, so that st_value is a pointer to location. */
+ 		simplify_symbols(sechdrs, symindex, strtab, secstrings,
+ 				 hdr->e_shnum, &mod);
 
-		/* Not a valid relocation section? */
-		if (info >= hdr->e_shnum)
-			continue;
+ 		/* Now do relocations. */
+ 		for (i = 1; i < hdr->e_shnum; i++) {
+ 			const char *strtab = (char *)sechdrs[strindex].sh_addr;
+ 			unsigned int info = sechdrs[i].sh_info;
 
-		/* Don't bother with non-allocated sections */
-		if (!(sechdrs[info].sh_flags & SHF_ALLOC))
-			continue;
+ 			/* Not a valid relocation section? */
+ 			if (info >= hdr->e_shnum)
+ 				continue;
 
-		if (sechdrs[i].sh_type == SHT_REL)
-			err =
-				apply_relocations(sechdrs, strtab, symindex, i, &mod);
-		else if (sechdrs[i].sh_type == SHT_RELA)
-			err = apply_relocate_add(sechdrs, strtab, symindex, i,
-						 &mod);
-		if (err < 0) {
-			printk(KERN_WARNING
-			       "vpe_elfload: error in relocations err %ld\n",
-			       err);
-			goto cleanup;
+ 			/* Don't bother with non-allocated sections */
+ 			if (!(sechdrs[info].sh_flags & SHF_ALLOC))
+ 				continue;
+
+ 			if (sechdrs[i].sh_type == SHT_REL)
+ 				err = apply_relocations(sechdrs, strtab, symindex, i,
+ 							&mod);
+ 			else if (sechdrs[i].sh_type == SHT_RELA)
+ 				err = apply_relocate_add(sechdrs, strtab, symindex, i,
+ 							 &mod);
+ 			if (err < 0)
+ 				return err;
+
+  		}
+  	} else {
+  		for (i = 0; i < hdr->e_shnum; i++) {
+
+ 			/* Internal symbols and strings. */
+ 			if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ 				symindex = i;
+ 				strindex = sechdrs[i].sh_link;
+ 				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+
+ 				/* mark the symtab's address for when we try to find the
+ 				   magic symbols */
+ 				sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+ 			}
+
+ 			/* filter sections we dont want in the final image */
+ 			if (!(sechdrs[i].sh_flags & SHF_ALLOC) ||
+ 			    (sechdrs[i].sh_type == SHT_MIPS_REGINFO)) {
+ 				printk( KERN_DEBUG " ignoring section, "
+ 					"name %s type %x address 0x%x \n",
+ 					secstrings + sechdrs[i].sh_name,
+ 					sechdrs[i].sh_type, sechdrs[i].sh_addr);
+ 				continue;
+ 			}
+
+  			if (sechdrs[i].sh_addr < (unsigned int)v->load_addr) {
+ 				printk( KERN_WARNING "VPE loader: "
+ 					"fully linked image has invalid section, "
+ 					"name %s type %x address 0x%x, before load "
+ 					"address of 0x%x\n",
+ 					secstrings + sechdrs[i].sh_name,
+ 					sechdrs[i].sh_type, sechdrs[i].sh_addr,
+ 					(unsigned int)v->load_addr);
+  				return -ENOEXEC;
+  			}
+
+ 			printk(KERN_DEBUG " copying section sh_name %s, sh_addr 0x%x "
+			       "size 0x%x0 from x%p\n",
+			       secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr,
+			       sechdrs[i].sh_size, hdr + sechdrs[i].sh_offset);
+
+  			if (sechdrs[i].sh_type != SHT_NOBITS)
+				memcpy((void *)sechdrs[i].sh_addr,
+				       (char *)hdr + sechdrs[i].sh_offset,
+ 				       sechdrs[i].sh_size);
+			else
+				memset((void *)sechdrs[i].sh_addr, 0, sechdrs[i].sh_size);
 		}
 	}
 
@@ -910,71 +997,104 @@
 			   (unsigned long)v->load_addr + v->len);
 
 	if ((find_vpe_symbols(v, sechdrs, symindex, strtab, &mod)) < 0) {
+		if (v->__start == 0) {
+			printk(KERN_WARNING "VPE loader: program does not contain "
+			       "a __start symbol\n");
+			return -ENOEXEC;
+		}
 
-		printk(KERN_WARNING
-		       "VPE: program doesn't contain __start or vpe_shared symbols\n");
-		err = -ENOEXEC;
+		if (v->shared_ptr == NULL)
+			printk(KERN_WARNING "VPE loader: "
+			       "program does not contain vpe_shared symbol.\n"
+			       " Unable to use AMVP (AP/SP) facilities.\n");
 	}
 
 	printk(" elf loaded\n");
-
-cleanup:
-	return err;
+	return 0;
 }
 
-static void dump_vpe(struct vpe * v)
+__attribute_used__ void dump_vpe(struct vpe * v)
 {
 	struct tc *t;
 
+	settc(v->minor);
+
 	printk(KERN_DEBUG "VPEControl 0x%lx\n", read_vpe_c0_vpecontrol());
 	printk(KERN_DEBUG "VPEConf0 0x%lx\n", read_vpe_c0_vpeconf0());
 
-	list_for_each_entry(t, &vpecontrol.tc_list, list) {
+	list_for_each_entry(t, &vpecontrol.tc_list, list)
 		dump_tc(t);
-	}
 }
 
-/* checks for VPE is unused and gets ready to load program	 */
+static void cleanup_tc(struct tc *tc)
+{
+	int tmp;
+
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	settc(tc->index);
+	tmp = read_tc_c0_tcstatus();
+
+	/* mark not allocated and not dynamically allocatable */
+	tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
+	tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
+	write_tc_c0_tcstatus(tmp);
+
+	write_tc_c0_tchalt(TCHALT_H);
+
+	/* bind it to anything other than VPE1 */
+	write_tc_c0_tcbind(read_tc_c0_tcbind() & ~TCBIND_CURVPE); // | TCBIND_CURVPE
+
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+}
+
+static int getcwd(char *buff, int size)
+{
+	mm_segment_t old_fs;
+	int ret;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	ret = sys_getcwd(buff,size);
+
+	set_fs(old_fs);
+
+	return ret;
+}
+
+/* checks VPE is unused and gets ready to load program  */
 static int vpe_open(struct inode *inode, struct file *filp)
 {
-	int minor;
+	int minor, ret;
 	struct vpe *v;
+	struct vpe_notifications *not;
 
 	/* assume only 1 device at the mo. */
 	if ((minor = MINOR(inode->i_rdev)) != 1) {
-		printk(KERN_WARNING "VPE: only vpe1 is supported\n");
+		printk(KERN_WARNING "VPE loader: only vpe1 is supported\n");
 		return -ENODEV;
 	}
 
 	if ((v = get_vpe(minor)) == NULL) {
-		printk(KERN_WARNING "VPE: unable to get vpe\n");
+		printk(KERN_WARNING "VPE loader: unable to get vpe\n");
 		return -ENODEV;
 	}
 
 	if (v->state != VPE_STATE_UNUSED) {
-		unsigned long tmp;
-		struct tc *t;
-
-		printk(KERN_WARNING "VPE: device %d already in use\n", minor);
-
 		dvpe();
-		dump_vpe(v);
 
-		printk(KERN_WARNING "VPE: re-initialising %d\n", minor);
+		printk(KERN_DEBUG "VPE loader: tc in use dumping regs\n");
+
+		dump_tc(get_tc(minor));
+
+		list_for_each_entry(not, &v->notify, list) {
+			not->stop(minor);
+		}
 
 		release_progmem(v->load_addr);
-
-		t = get_tc(minor);
-		settc(minor);
-		tmp = read_tc_c0_tcstatus();
-
-		/* mark not allocated and not dynamically allocatable */
-		tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
-		tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
-		write_tc_c0_tcstatus(tmp);
-
-		write_tc_c0_tchalt(TCHALT_H);
-
+		cleanup_tc(get_tc(minor));
 	}
 
 	// allocate it so when we get write ops we know it's expected.
@@ -986,6 +1106,24 @@
 	v->load_addr = NULL;
 	v->len = 0;
 
+	v->uid = filp->f_uid;
+	v->gid = filp->f_gid;
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+	/* get kspd to tell us when a syscall_exit happens */
+	if (!kspd_events_reqd) {
+		kspd_notify(&kspd_events);
+		kspd_events_reqd++;
+	}
+#endif
+
+	v->cwd[0] = 0;
+	ret = getcwd(v->cwd, VPE_PATH_MAX);
+	if (ret < 0)
+		printk(KERN_WARNING "VPE loader: open, getcwd returned %d\n", ret);
+
+	v->shared_ptr = NULL;
+	v->__start = 0;
 	return 0;
 }
 
@@ -1006,14 +1144,22 @@
 		if (vpe_elfload(v) >= 0)
 			vpe_run(v);
 		else {
-			printk(KERN_WARNING "VPE: ELF load failed.\n");
+ 			printk(KERN_WARNING "VPE loader: ELF load failed.\n");
 			ret = -ENOEXEC;
 		}
 	} else {
-		printk(KERN_WARNING "VPE: only elf files are supported\n");
+ 		printk(KERN_WARNING "VPE loader: only elf files are supported\n");
 		ret = -ENOEXEC;
 	}
 
+	/* It's good to be able to run the SP and if it chokes have a look at
+	   the /dev/rt?. But if we reset the pointer to the shared struct we
+	   loose what has happened. So perhaps if garbage is sent to the vpe
+	   device, use it as a trigger for the reset. Hopefully a nice
+	   executable will be along shortly. */
+	if (ret < 0)
+		v->shared_ptr = NULL;
+
 	// cleanup any temp buffers
 	if (v->pbuffer)
 		vfree(v->pbuffer);
@@ -1033,21 +1179,19 @@
 		return -ENODEV;
 
 	if (v->pbuffer == NULL) {
-		printk(KERN_ERR "vpe_write: no pbuffer\n");
+		printk(KERN_ERR "VPE loader: no buffer for program\n");
 		return -ENOMEM;
 	}
 
 	if ((count + v->len) > v->plen) {
 		printk(KERN_WARNING
-		       "VPE Loader: elf size too big. Perhaps strip uneeded symbols\n");
+		       "VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
 		return -ENOMEM;
 	}
 
 	count -= copy_from_user(v->pbuffer + v->len, buffer, count);
-	if (!count) {
-		printk("vpe_write: copy_to_user failed\n");
+	if (!count)
 		return -EFAULT;
-	}
 
 	v->len += count;
 	return ret;
@@ -1149,16 +1293,70 @@
 {
 	struct vpe *v;
 
-	if ((v = get_vpe(index)) == NULL) {
-		printk(KERN_WARNING "vpe: invalid vpe index %d\n", index);
+	if ((v = get_vpe(index)) == NULL)
 		return NULL;
-	}
 
 	return v->shared_ptr;
 }
 
 EXPORT_SYMBOL(vpe_get_shared);
 
+int vpe_getuid(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	return v->uid;
+}
+
+EXPORT_SYMBOL(vpe_getuid);
+
+int vpe_getgid(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	return v->gid;
+}
+
+EXPORT_SYMBOL(vpe_getgid);
+
+int vpe_notify(int index, struct vpe_notifications *notify)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	list_add(&notify->list, &v->notify);
+	return 0;
+}
+
+EXPORT_SYMBOL(vpe_notify);
+
+char *vpe_getcwd(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return NULL;
+
+	return v->cwd;
+}
+
+EXPORT_SYMBOL(vpe_getcwd);
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+static void kspd_sp_exit( int sp_id)
+{
+	cleanup_tc(get_tc(sp_id));
+}
+#endif
+
 static int __init vpe_module_init(void)
 {
 	struct vpe *v = NULL;
@@ -1201,7 +1399,8 @@
 				return -ENODEV;
 			}
 
-			list_add(&t->tc, &v->tc);	/* add the tc to the list of this vpe's tc's. */
+			/* add the tc to the list of this vpe's tc's. */
+			list_add(&t->tc, &v->tc);
 
 			/* deactivate all but vpe0 */
 			if (i != 0) {
@@ -1222,10 +1421,12 @@
 						     ~(ST0_IM | ST0_IE | ST0_KSU))
 						    | ST0_CU0);
 
-				/* set config to be the same as vpe0, particularly kseg0 coherency alg */
+				/*
+				 * Set config to be the same as vpe0,
+				 * particularly kseg0 coherency alg
+				 */
 				write_vpe_c0_config(read_c0_config());
 			}
-
 		}
 
 		/* TC's */
@@ -1234,23 +1435,28 @@
 		if (i != 0) {
 			unsigned long tmp;
 
-			/* tc 0 will of course be running.... */
-			if (i == 0)
-				t->state = TC_STATE_RUNNING;
-
 			settc(i);
 
-			/* bind a TC to each VPE, May as well put all excess TC's
-			   on the last VPE */
-			if (i >= (((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1))
-				write_tc_c0_tcbind(read_tc_c0_tcbind() |
-						   ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT));
-			else
-				write_tc_c0_tcbind(read_tc_c0_tcbind() | i);
+			/* Any TC that is bound to VPE0 gets left as is - in case
+			   we are running SMTC on VPE0. A TC that is bound to any
+			   other VPE gets bound to VPE0, ideally I'd like to make
+			   it homeless but it doesn't appear to let me bind a TC
+			   to a non-existent VPE. Which is perfectly reasonable.
+
+			   The (un)bound state is visible to an EJTAG probe so may
+			   notify GDB...
+			*/
+
+			if (((tmp = read_tc_c0_tcbind()) & TCBIND_CURVPE)) {
+				/* tc is bound >vpe0 */
+				write_tc_c0_tcbind(tmp & ~TCBIND_CURVPE);
+
+				t->pvpe = get_vpe(0);	/* set the parent vpe */
+			}
 
 			tmp = read_tc_c0_tcstatus();
 
-			/* mark not allocated and not dynamically allocatable */
+			/* mark not activated and not dynamically allocatable */
 			tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
 			tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
 			write_tc_c0_tcstatus(tmp);
@@ -1262,6 +1468,9 @@
 	/* release config state */
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
+#ifdef CONFIG_MIPS_APSP_KSPD
+	kspd_events.kspd_sp_exit = kspd_sp_exit;
+#endif
 	return 0;
 }
 
@@ -1281,5 +1490,5 @@
 module_init(vpe_module_init);
 module_exit(vpe_module_exit);
 MODULE_DESCRIPTION("MIPS VPE Loader");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/include/asm-mips/kspd.h b/include/asm-mips/kspd.h
new file mode 100644
index 0000000..4e9e724
--- /dev/null
+++ b/include/asm-mips/kspd.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+
+#ifndef _ASM_KSPD_H
+#define _ASM_KSPD_H
+
+struct kspd_notifications {
+	void (*kspd_sp_exit)(int sp_id);
+
+	struct list_head list;
+};
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+extern void kspd_notify(struct kspd_notifications *notify);
+#else
+static inline void kspd_notify(struct kspd_notifications *notify)
+{
+}
+#endif
+
+#endif
diff --git a/include/asm-mips/mipsmtregs.h b/include/asm-mips/mipsmtregs.h
index a669c07..a5ac1a6 100644
--- a/include/asm-mips/mipsmtregs.h
+++ b/include/asm-mips/mipsmtregs.h
@@ -234,7 +234,7 @@
 	__asm__ __volatile__(
 	"	.set	noreorder					\n"
 	"	.set	mips32r2					\n"
-	"	emt							\n"
+	"	.word	0x41600be1			# emt		\n"
 	"	ehb							\n"
 	"	.set	mips0						\n"
 	"	.set	reorder");
@@ -364,6 +364,9 @@
 #define read_vpe_c0_ebase()		mftc0(15,1)
 #define write_vpe_c0_ebase(val)		mttc0(15, 1, val)
 #define write_vpe_c0_compare(val)	mttc0(11, 0, val)
+#define read_vpe_c0_badvaddr()		mftc0(8, 0)
+#define read_vpe_c0_epc()		mftc0(14, 0)
+#define write_vpe_c0_epc(val)		mttc0(14, 0, val)
 
 
 /* TC */
diff --git a/include/asm-mips/rtlx.h b/include/asm-mips/rtlx.h
index 1298c3f..76cd51c 100644
--- a/include/asm-mips/rtlx.h
+++ b/include/asm-mips/rtlx.h
@@ -3,32 +3,46 @@
  *
  */
 
-#ifndef _RTLX_H
-#define _RTLX_H_
+#ifndef __ASM_RTLX_H
+#define __ASM_RTLX_H_
 
 #define LX_NODE_BASE 10
 
 #define MIPSCPU_INT_BASE       16
 #define MIPS_CPU_RTLX_IRQ 0
 
-#define RTLX_VERSION 1
+#define RTLX_VERSION 2
 #define RTLX_xID 0x12345600
 #define RTLX_ID (RTLX_xID | RTLX_VERSION)
 #define RTLX_CHANNELS 8
 
-#define RTLX_BUFFER_SIZE 1024
+#define RTLX_CHANNEL_STDIO	0
+#define RTLX_CHANNEL_DBG	1
+#define RTLX_CHANNEL_SYSIO	2
 
-/*
- * lx_state bits
- */
-#define RTLX_STATE_OPENED 1UL
+extern int rtlx_open(int index, int can_sleep);
+extern int rtlx_release(int index);
+extern ssize_t rtlx_read(int index, void *buff, size_t count, int user);
+extern ssize_t rtlx_write(int index, void *buffer, size_t count, int user);
+extern unsigned int rtlx_read_poll(int index, int can_sleep);
+extern unsigned int rtlx_write_poll(int index);
+
+enum rtlx_state {
+	RTLX_STATE_UNUSED,
+	RTLX_STATE_INITIALISED,
+	RTLX_STATE_REMOTE_READY,
+	RTLX_STATE_OPENED
+};
+
+#define RTLX_BUFFER_SIZE 1024
 
 /* each channel supports read and write.
    linux (vpe0) reads lx_buffer  and writes rt_buffer
    SP (vpe1) reads rt_buffer and writes lx_buffer
 */
 struct rtlx_channel {
-	unsigned long lx_state;
+	enum rtlx_state rt_state;
+	enum rtlx_state lx_state;
 
 	int buffer_size;
 
@@ -38,15 +52,13 @@
 
 	int lx_write, lx_read;
 	char *lx_buffer;
-
-	void *queues;
-
 };
 
 struct rtlx_info {
 	unsigned long id;
+	enum rtlx_state state;
 
 	struct rtlx_channel channel[RTLX_CHANNELS];
 };
 
-#endif /* _RTLX_H_ */
+#endif /* __ASM_RTLX_H_ */
diff --git a/include/asm-mips/vpe.h b/include/asm-mips/vpe.h
new file mode 100644
index 0000000..c6e1b96
--- /dev/null
+++ b/include/asm-mips/vpe.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+
+#ifndef _ASM_VPE_H
+#define _ASM_VPE_H
+
+struct vpe_notifications {
+	void (*start)(int vpe);
+	void (*stop)(int vpe);
+
+	struct list_head list;
+};
+
+
+extern int vpe_notify(int index, struct vpe_notifications *notify);
+
+extern void *vpe_get_shared(int index);
+extern int vpe_getuid(int index);
+extern int vpe_getgid(int index);
+extern char *vpe_getcwd(int index);
+
+#endif /* _ASM_VPE_H */