Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (99 commits)
  drivers/virt: add missing linux/interrupt.h to fsl_hypervisor.c
  powerpc/85xx: fix mpic configuration in CAMP mode
  powerpc: Copy back TIF flags on return from softirq stack
  powerpc/64: Make server perfmon only built on ppc64 server devices
  powerpc/pseries: Fix hvc_vio.c build due to recent changes
  powerpc: Exporting boot_cpuid_phys
  powerpc: Add CFAR to oops output
  hvc_console: Add kdb support
  powerpc/pseries: Fix hvterm_raw_get_chars to accept < 16 chars, fixing xmon
  powerpc/irq: Quieten irq mapping printks
  powerpc: Enable lockup and hung task detectors in pseries and ppc64 defeconfigs
  powerpc: Add mpt2sas driver to pseries and ppc64 defconfig
  powerpc: Disable IRQs off tracer in ppc64 defconfig
  powerpc: Sync pseries and ppc64 defconfigs
  powerpc/pseries/hvconsole: Fix dropped console output
  hvc_console: Improve tty/console put_chars handling
  powerpc/kdump: Fix timeout in crash_kexec_wait_realmode
  powerpc/mm: Fix output of total_ram.
  powerpc/cpufreq: Add cpufreq driver for Momentum Maple boards
  powerpc: Correct annotations of pmu registration functions
  ...

Fix up trivial Kconfig/Makefile conflicts in arch/powerpc, drivers, and
drivers/cpufreq
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9e7a4f5..95b9e7e 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -128,4 +128,6 @@
 
 source "drivers/iommu/Kconfig"
 
+source "drivers/virt/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 939fcde..7fa433a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -124,3 +124,6 @@
 obj-$(CONFIG_HWSPINLOCK)	+= hwspinlock/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_IOMMU_SUPPORT)	+= iommu/
+
+# Virtualization drivers
+obj-$(CONFIG_VIRT_DRIVERS)	+= virt/
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 6040717..0cad9c7 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -32,6 +32,25 @@
 EXPORT_SYMBOL_GPL(platform_bus);
 
 /**
+ * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
+ * @dev: platform device
+ *
+ * This is called before platform_device_add() such that any pdev_archdata may
+ * be setup before the platform_notifier is called.  So if a user needs to
+ * manipulate any relevant information in the pdev_archdata they can do:
+ *
+ * 	platform_devic_alloc()
+ * 	... manipulate ...
+ * 	platform_device_add()
+ *
+ * And if they don't care they can just call platform_device_register() and
+ * everything will just work out.
+ */
+void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+}
+
+/**
  * platform_get_resource - get a resource for a device
  * @dev: platform device
  * @type: resource type
@@ -173,6 +192,7 @@
 		pa->pdev.id = id;
 		device_initialize(&pa->pdev.dev);
 		pa->pdev.dev.release = platform_device_release;
+		arch_setup_pdev_archdata(&pa->pdev);
 	}
 
 	return pa ? &pa->pdev : NULL;
@@ -334,6 +354,7 @@
 int platform_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
+	arch_setup_pdev_archdata(pdev);
 	return platform_device_add(pdev);
 }
 EXPORT_SYMBOL_GPL(platform_device_register);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index e898215..e24a2a1 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -189,5 +189,10 @@
 source "drivers/cpufreq/Kconfig.arm"
 endmenu
 
+menu "PowerPC CPU frequency scaling drivers"
+depends on PPC32 || PPC64
+source "drivers/cpufreq/Kconfig.powerpc"
+endmenu
+
 endif
 endmenu
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
new file mode 100644
index 0000000..e76992f
--- /dev/null
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -0,0 +1,7 @@
+config CPU_FREQ_MAPLE
+	bool "Support for Maple 970FX Evaluation Board"
+	depends on PPC_MAPLE
+	select CPU_FREQ_TABLE
+	help
+	  This adds support for frequency switching on Maple 970FX
+	  Evaluation Board and compatible boards (IBM JS2x blades).
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index ab75e57..a48bc02 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -43,3 +43,7 @@
 obj-$(CONFIG_ARM_S3C64XX_CPUFREQ)	+= s3c64xx-cpufreq.o
 obj-$(CONFIG_ARM_S5PV210_CPUFREQ)	+= s5pv210-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ)	+= exynos4210-cpufreq.o
+
+##################################################################################
+# PowerPC platform drivers
+obj-$(CONFIG_CPU_FREQ_MAPLE)		+= maple-cpufreq.o
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
new file mode 100644
index 0000000..89b178a
--- /dev/null
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -0,0 +1,309 @@
+/*
+ *  Copyright (C) 2011 Dmitry Eremin-Solenikov
+ *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *  and                       Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
+ * that is iMac G5 and latest single CPU desktop.
+ */
+
+#undef DEBUG
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+#include <linux/of.h>
+
+#define DBG(fmt...) pr_debug(fmt)
+
+/* see 970FX user manual */
+
+#define SCOM_PCR 0x0aa001			/* PCR scom addr */
+
+#define PCR_HILO_SELECT		0x80000000U	/* 1 = PCR, 0 = PCRH */
+#define PCR_SPEED_FULL		0x00000000U	/* 1:1 speed value */
+#define PCR_SPEED_HALF		0x00020000U	/* 1:2 speed value */
+#define PCR_SPEED_QUARTER	0x00040000U	/* 1:4 speed value */
+#define PCR_SPEED_MASK		0x000e0000U	/* speed mask */
+#define PCR_SPEED_SHIFT		17
+#define PCR_FREQ_REQ_VALID	0x00010000U	/* freq request valid */
+#define PCR_VOLT_REQ_VALID	0x00008000U	/* volt request valid */
+#define PCR_TARGET_TIME_MASK	0x00006000U	/* target time */
+#define PCR_STATLAT_MASK	0x00001f00U	/* STATLAT value */
+#define PCR_SNOOPLAT_MASK	0x000000f0U	/* SNOOPLAT value */
+#define PCR_SNOOPACC_MASK	0x0000000fU	/* SNOOPACC value */
+
+#define SCOM_PSR 0x408001			/* PSR scom addr */
+/* warning: PSR is a 64 bits register */
+#define PSR_CMD_RECEIVED	0x2000000000000000U   /* command received */
+#define PSR_CMD_COMPLETED	0x1000000000000000U   /* command completed */
+#define PSR_CUR_SPEED_MASK	0x0300000000000000U   /* current speed */
+#define PSR_CUR_SPEED_SHIFT	(56)
+
+/*
+ * The G5 only supports two frequencies (Quarter speed is not supported)
+ */
+#define CPUFREQ_HIGH                  0
+#define CPUFREQ_LOW                   1
+
+static struct cpufreq_frequency_table maple_cpu_freqs[] = {
+	{CPUFREQ_HIGH,		0},
+	{CPUFREQ_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+static struct freq_attr *maple_cpu_freqs_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+/* Power mode data is an array of the 32 bits PCR values to use for
+ * the various frequencies, retrieved from the device-tree
+ */
+static int maple_pmode_cur;
+
+static DEFINE_MUTEX(maple_switch_mutex);
+
+static const u32 *maple_pmode_data;
+static int maple_pmode_max;
+
+/*
+ * SCOM based frequency switching for 970FX rev3
+ */
+static int maple_scom_switch_freq(int speed_mode)
+{
+	unsigned long flags;
+	int to;
+
+	local_irq_save(flags);
+
+	/* Clear PCR high */
+	scom970_write(SCOM_PCR, 0);
+	/* Clear PCR low */
+	scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
+	/* Set PCR low */
+	scom970_write(SCOM_PCR, PCR_HILO_SELECT |
+		      maple_pmode_data[speed_mode]);
+
+	/* Wait for completion */
+	for (to = 0; to < 10; to++) {
+		unsigned long psr = scom970_read(SCOM_PSR);
+
+		if ((psr & PSR_CMD_RECEIVED) == 0 &&
+		    (((psr >> PSR_CUR_SPEED_SHIFT) ^
+		      (maple_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
+		    == 0)
+			break;
+		if (psr & PSR_CMD_COMPLETED)
+			break;
+		udelay(100);
+	}
+
+	local_irq_restore(flags);
+
+	maple_pmode_cur = speed_mode;
+	ppc_proc_freq = maple_cpu_freqs[speed_mode].frequency * 1000ul;
+
+	return 0;
+}
+
+static int maple_scom_query_freq(void)
+{
+	unsigned long psr = scom970_read(SCOM_PSR);
+	int i;
+
+	for (i = 0; i <= maple_pmode_max; i++)
+		if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
+		      (maple_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
+			break;
+	return i;
+}
+
+/*
+ * Common interface to the cpufreq core
+ */
+
+static int maple_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, maple_cpu_freqs);
+}
+
+static int maple_cpufreq_target(struct cpufreq_policy *policy,
+	unsigned int target_freq, unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+	int rc;
+
+	if (cpufreq_frequency_table_target(policy, maple_cpu_freqs,
+			target_freq, relation, &newstate))
+		return -EINVAL;
+
+	if (maple_pmode_cur == newstate)
+		return 0;
+
+	mutex_lock(&maple_switch_mutex);
+
+	freqs.old = maple_cpu_freqs[maple_pmode_cur].frequency;
+	freqs.new = maple_cpu_freqs[newstate].frequency;
+	freqs.cpu = 0;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	rc = maple_scom_switch_freq(newstate);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	mutex_unlock(&maple_switch_mutex);
+
+	return rc;
+}
+
+static unsigned int maple_cpufreq_get_speed(unsigned int cpu)
+{
+	return maple_cpu_freqs[maple_pmode_cur].frequency;
+}
+
+static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	policy->cpuinfo.transition_latency = 12000;
+	policy->cur = maple_cpu_freqs[maple_scom_query_freq()].frequency;
+	/* secondary CPUs are tied to the primary one by the
+	 * cpufreq core if in the secondary policy we tell it that
+	 * it actually must be one policy together with all others. */
+	cpumask_copy(policy->cpus, cpu_online_mask);
+	cpufreq_frequency_table_get_attr(maple_cpu_freqs, policy->cpu);
+
+	return cpufreq_frequency_table_cpuinfo(policy,
+		maple_cpu_freqs);
+}
+
+
+static struct cpufreq_driver maple_cpufreq_driver = {
+	.name		= "maple",
+	.owner		= THIS_MODULE,
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.init		= maple_cpufreq_cpu_init,
+	.verify		= maple_cpufreq_verify,
+	.target		= maple_cpufreq_target,
+	.get		= maple_cpufreq_get_speed,
+	.attr		= maple_cpu_freqs_attr,
+};
+
+static int __init maple_cpufreq_init(void)
+{
+	struct device_node *cpus;
+	struct device_node *cpunode;
+	unsigned int psize;
+	unsigned long max_freq;
+	const u32 *valp;
+	u32 pvr_hi;
+	int rc = -ENODEV;
+
+	/*
+	 * Behave here like powermac driver which checks machine compatibility
+	 * to ease merging of two drivers in future.
+	 */
+	if (!of_machine_is_compatible("Momentum,Maple") &&
+	    !of_machine_is_compatible("Momentum,Apache"))
+		return 0;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (cpus == NULL) {
+		DBG("No /cpus node !\n");
+		return -ENODEV;
+	}
+
+	/* Get first CPU node */
+	for (cpunode = NULL;
+	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
+		const u32 *reg = of_get_property(cpunode, "reg", NULL);
+		if (reg == NULL || (*reg) != 0)
+			continue;
+		if (!strcmp(cpunode->type, "cpu"))
+			break;
+	}
+	if (cpunode == NULL) {
+		printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
+		goto bail_cpus;
+	}
+
+	/* Check 970FX for now */
+	/* we actually don't care on which CPU to access PVR */
+	pvr_hi = PVR_VER(mfspr(SPRN_PVR));
+	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
+		printk(KERN_ERR "cpufreq: Unsupported CPU version (%x)\n",
+				pvr_hi);
+		goto bail_noprops;
+	}
+
+	/* Look for the powertune data in the device-tree */
+	/*
+	 * On Maple this property is provided by PIBS in dual-processor config,
+	 * not provided by PIBS in CPU0 config and also not provided by SLOF,
+	 * so YMMV
+	 */
+	maple_pmode_data = of_get_property(cpunode, "power-mode-data", &psize);
+	if (!maple_pmode_data) {
+		DBG("No power-mode-data !\n");
+		goto bail_noprops;
+	}
+	maple_pmode_max = psize / sizeof(u32) - 1;
+
+	/*
+	 * From what I see, clock-frequency is always the maximal frequency.
+	 * The current driver can not slew sysclk yet, so we really only deal
+	 * with powertune steps for now. We also only implement full freq and
+	 * half freq in this version. So far, I haven't yet seen a machine
+	 * supporting anything else.
+	 */
+	valp = of_get_property(cpunode, "clock-frequency", NULL);
+	if (!valp)
+		return -ENODEV;
+	max_freq = (*valp)/1000;
+	maple_cpu_freqs[0].frequency = max_freq;
+	maple_cpu_freqs[1].frequency = max_freq/2;
+
+	/* Force apply current frequency to make sure everything is in
+	 * sync (voltage is right for example). Firmware may leave us with
+	 * a strange setting ...
+	 */
+	msleep(10);
+	maple_pmode_cur = -1;
+	maple_scom_switch_freq(maple_scom_query_freq());
+
+	printk(KERN_INFO "Registering Maple CPU frequency driver\n");
+	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+		maple_cpu_freqs[1].frequency/1000,
+		maple_cpu_freqs[0].frequency/1000,
+		maple_cpu_freqs[maple_pmode_cur].frequency/1000);
+
+	rc = cpufreq_register_driver(&maple_cpufreq_driver);
+
+	of_node_put(cpunode);
+	of_node_put(cpus);
+
+	return rc;
+
+bail_noprops:
+	of_node_put(cpunode);
+bail_cpus:
+	of_node_put(cpus);
+
+	return rc;
+}
+
+module_init(maple_cpufreq_init);
+
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index e75af39..ed5a6d3 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -162,7 +162,7 @@
 	}
 
 	dev->dev.of_node = of_node_get(np);
-#if defined(CONFIG_PPC) || defined(CONFIG_MICROBLAZE)
+#if defined(CONFIG_MICROBLAZE)
 	dev->dev.dma_mask = &dev->archdata.dma_mask;
 #endif
 	dev->dev.parent = parent;
@@ -201,7 +201,7 @@
 	if (!dev)
 		return NULL;
 
-#if defined(CONFIG_PPC) || defined(CONFIG_MICROBLAZE)
+#if defined(CONFIG_MICROBLAZE)
 	dev->archdata.dma_mask = 0xffffffffUL;
 #endif
 	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index 6f2c980..e371753 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -19,6 +19,11 @@
 	  console. This driver allows each pSeries partition to have a console
 	  which is accessed via the HMC.
 
+config HVC_OLD_HVSI
+	bool "Old driver for pSeries serial port (/dev/hvsi*)"
+	depends on HVC_CONSOLE
+	default n
+
 config HVC_ISERIES
 	bool "iSeries Hypervisor Virtual Console support"
 	depends on PPC_ISERIES
diff --git a/drivers/tty/hvc/Makefile b/drivers/tty/hvc/Makefile
index 40a25d9..e292053 100644
--- a/drivers/tty/hvc/Makefile
+++ b/drivers/tty/hvc/Makefile
@@ -1,4 +1,5 @@
-obj-$(CONFIG_HVC_CONSOLE)	+= hvc_vio.o hvsi.o
+obj-$(CONFIG_HVC_CONSOLE)	+= hvc_vio.o hvsi_lib.o
+obj-$(CONFIG_HVC_OLD_HVSI)	+= hvsi.o
 obj-$(CONFIG_HVC_ISERIES)	+= hvc_iseries.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
 obj-$(CONFIG_HVC_TILE)		+= hvc_tile.o
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index e9cba13..e1aaf4f 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/slab.h>
+#include <linux/serial_core.h>
 
 #include <asm/uaccess.h>
 
@@ -163,8 +164,10 @@
 		} else {
 			r = cons_ops[index]->put_chars(vtermnos[index], c, i);
 			if (r <= 0) {
-				/* throw away chars on error */
-				i = 0;
+				/* throw away characters on error
+				 * but spin in case of -EAGAIN */
+				if (r != -EAGAIN)
+					i = 0;
 			} else if (r > 0) {
 				i -= r;
 				if (i > 0)
@@ -184,7 +187,7 @@
 }
 
 static int __init hvc_console_setup(struct console *co, char *options)
-{
+{	
 	if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES)
 		return -ENODEV;
 
@@ -448,7 +451,7 @@
 
 	n = hp->ops->put_chars(hp->vtermno, hp->outbuf, hp->n_outbuf);
 	if (n <= 0) {
-		if (n == 0) {
+		if (n == 0 || n == -EAGAIN) {
 			hp->do_wakeup = 1;
 			return 0;
 		}
@@ -745,6 +748,58 @@
 	return 0;
 }
 
+static int hvc_tiocmget(struct tty_struct *tty)
+{
+	struct hvc_struct *hp = tty->driver_data;
+
+	if (!hp || !hp->ops->tiocmget)
+		return -EINVAL;
+	return hp->ops->tiocmget(hp);
+}
+
+static int hvc_tiocmset(struct tty_struct *tty,
+			unsigned int set, unsigned int clear)
+{
+	struct hvc_struct *hp = tty->driver_data;
+
+	if (!hp || !hp->ops->tiocmset)
+		return -EINVAL;
+	return hp->ops->tiocmset(hp, set, clear);
+}
+
+#ifdef CONFIG_CONSOLE_POLL
+int hvc_poll_init(struct tty_driver *driver, int line, char *options)
+{
+	return 0;
+}
+
+static int hvc_poll_get_char(struct tty_driver *driver, int line)
+{
+	struct tty_struct *tty = driver->ttys[0];
+	struct hvc_struct *hp = tty->driver_data;
+	int n;
+	char ch;
+
+	n = hp->ops->get_chars(hp->vtermno, &ch, 1);
+
+	if (n == 0)
+		return NO_POLL_CHAR;
+
+	return ch;
+}
+
+static void hvc_poll_put_char(struct tty_driver *driver, int line, char ch)
+{
+	struct tty_struct *tty = driver->ttys[0];
+	struct hvc_struct *hp = tty->driver_data;
+	int n;
+
+	do {
+		n = hp->ops->put_chars(hp->vtermno, &ch, 1);
+	} while (n <= 0);
+}
+#endif
+
 static const struct tty_operations hvc_ops = {
 	.open = hvc_open,
 	.close = hvc_close,
@@ -753,6 +808,13 @@
 	.unthrottle = hvc_unthrottle,
 	.write_room = hvc_write_room,
 	.chars_in_buffer = hvc_chars_in_buffer,
+	.tiocmget = hvc_tiocmget,
+	.tiocmset = hvc_tiocmset,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_init = hvc_poll_init,
+	.poll_get_char = hvc_poll_get_char,
+	.poll_put_char = hvc_poll_put_char,
+#endif
 };
 
 struct hvc_struct *hvc_alloc(uint32_t vtermno, int data,
diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h
index 54381eba..c335a14 100644
--- a/drivers/tty/hvc/hvc_console.h
+++ b/drivers/tty/hvc/hvc_console.h
@@ -73,6 +73,10 @@
 	int (*notifier_add)(struct hvc_struct *hp, int irq);
 	void (*notifier_del)(struct hvc_struct *hp, int irq);
 	void (*notifier_hangup)(struct hvc_struct *hp, int irq);
+
+	/* tiocmget/set implementation */
+	int (*tiocmget)(struct hvc_struct *hp);
+	int (*tiocmset)(struct hvc_struct *hp, unsigned int set, unsigned int clear);
 };
 
 /* Register a vterm and a slot index for use as a console (console_init) */
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index e6eea14..130aace 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -27,15 +27,27 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * TODO:
+ *
+ *   - handle error in sending hvsi protocol packets
+ *   - retry nego on subsequent sends ?
  */
 
+#undef DEBUG
+
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/console.h>
 
 #include <asm/hvconsole.h>
 #include <asm/vio.h>
 #include <asm/prom.h>
 #include <asm/firmware.h>
+#include <asm/hvsi.h>
+#include <asm/udbg.h>
 
 #include "hvc_console.h"
 
@@ -43,59 +55,236 @@
 
 static struct vio_device_id hvc_driver_table[] __devinitdata = {
 	{"serial", "hvterm1"},
+#ifndef HVC_OLD_HVSI
+	{"serial", "hvterm-protocol"},
+#endif
 	{ "", "" }
 };
 MODULE_DEVICE_TABLE(vio, hvc_driver_table);
 
-static int filtered_get_chars(uint32_t vtermno, char *buf, int count)
+typedef enum hv_protocol {
+	HV_PROTOCOL_RAW,
+	HV_PROTOCOL_HVSI
+} hv_protocol_t;
+
+struct hvterm_priv {
+	u32			termno;	/* HV term number */
+	hv_protocol_t		proto;	/* Raw data or HVSI packets */
+	struct hvsi_priv	hvsi;	/* HVSI specific data */
+	spinlock_t		buf_lock;
+	char			buf[SIZE_VIO_GET_CHARS];
+	int			left;
+	int			offset;
+};
+static struct hvterm_priv *hvterm_privs[MAX_NR_HVC_CONSOLES];
+/* For early boot console */
+static struct hvterm_priv hvterm_priv0;
+
+static int hvterm_raw_get_chars(uint32_t vtermno, char *buf, int count)
 {
-	unsigned long got;
-	int i;
+	struct hvterm_priv *pv = hvterm_privs[vtermno];
+	unsigned long i;
+	unsigned long flags;
+	int got;
 
-	/*
-	 * Vio firmware will read up to SIZE_VIO_GET_CHARS at its own discretion
-	 * so we play safe and avoid the situation where got > count which could
-	 * overload the flip buffer.
-	 */
-	if (count < SIZE_VIO_GET_CHARS)
-		return -EAGAIN;
+	if (WARN_ON(!pv))
+		return 0;
 
-	got = hvc_get_chars(vtermno, buf, count);
+	spin_lock_irqsave(&pv->buf_lock, flags);
 
-	/*
-	 * Work around a HV bug where it gives us a null
-	 * after every \r.  -- paulus
-	 */
-	for (i = 1; i < got; ++i) {
-		if (buf[i] == 0 && buf[i-1] == '\r') {
-			--got;
-			if (i < got)
-				memmove(&buf[i], &buf[i+1],
-					got - i);
+	if (pv->left == 0) {
+		pv->offset = 0;
+		pv->left = hvc_get_chars(pv->termno, pv->buf, count);
+
+		/*
+		 * Work around a HV bug where it gives us a null
+		 * after every \r.  -- paulus
+		 */
+		for (i = 1; i < pv->left; ++i) {
+			if (pv->buf[i] == 0 && pv->buf[i-1] == '\r') {
+				--pv->left;
+				if (i < pv->left) {
+					memmove(&pv->buf[i], &pv->buf[i+1],
+						pv->left - i);
+				}
+			}
 		}
 	}
+
+	got = min(count, pv->left);
+	memcpy(buf, &pv->buf[pv->offset], got);
+	pv->offset += got;
+	pv->left -= got;
+
+	spin_unlock_irqrestore(&pv->buf_lock, flags);
+
 	return got;
 }
 
-static const struct hv_ops hvc_get_put_ops = {
-	.get_chars = filtered_get_chars,
-	.put_chars = hvc_put_chars,
+static int hvterm_raw_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	struct hvterm_priv *pv = hvterm_privs[vtermno];
+
+	if (WARN_ON(!pv))
+		return 0;
+
+	return hvc_put_chars(pv->termno, buf, count);
+}
+
+static const struct hv_ops hvterm_raw_ops = {
+	.get_chars = hvterm_raw_get_chars,
+	.put_chars = hvterm_raw_put_chars,
 	.notifier_add = notifier_add_irq,
 	.notifier_del = notifier_del_irq,
 	.notifier_hangup = notifier_hangup_irq,
 };
 
-static int __devinit hvc_vio_probe(struct vio_dev *vdev,
-				const struct vio_device_id *id)
+static int hvterm_hvsi_get_chars(uint32_t vtermno, char *buf, int count)
 {
+	struct hvterm_priv *pv = hvterm_privs[vtermno];
+
+	if (WARN_ON(!pv))
+		return 0;
+
+	return hvsilib_get_chars(&pv->hvsi, buf, count);
+}
+
+static int hvterm_hvsi_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	struct hvterm_priv *pv = hvterm_privs[vtermno];
+
+	if (WARN_ON(!pv))
+		return 0;
+
+	return hvsilib_put_chars(&pv->hvsi, buf, count);
+}
+
+static int hvterm_hvsi_open(struct hvc_struct *hp, int data)
+{
+	struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
+	int rc;
+
+	pr_devel("HVSI@%x: open !\n", pv->termno);
+
+	rc = notifier_add_irq(hp, data);
+	if (rc)
+		return rc;
+
+	return hvsilib_open(&pv->hvsi, hp);
+}
+
+static void hvterm_hvsi_close(struct hvc_struct *hp, int data)
+{
+	struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
+
+	pr_devel("HVSI@%x: do close !\n", pv->termno);
+
+	hvsilib_close(&pv->hvsi, hp);
+
+	notifier_del_irq(hp, data);
+}
+
+void hvterm_hvsi_hangup(struct hvc_struct *hp, int data)
+{
+	struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
+
+	pr_devel("HVSI@%x: do hangup !\n", pv->termno);
+
+	hvsilib_close(&pv->hvsi, hp);
+
+	notifier_hangup_irq(hp, data);
+}
+
+static int hvterm_hvsi_tiocmget(struct hvc_struct *hp)
+{
+	struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
+
+	if (!pv)
+		return -EINVAL;
+	return pv->hvsi.mctrl;
+}
+
+static int hvterm_hvsi_tiocmset(struct hvc_struct *hp, unsigned int set,
+				unsigned int clear)
+{
+	struct hvterm_priv *pv = hvterm_privs[hp->vtermno];
+
+	pr_devel("HVSI@%x: Set modem control, set=%x,clr=%x\n",
+		 pv->termno, set, clear);
+
+	if (set & TIOCM_DTR)
+		hvsilib_write_mctrl(&pv->hvsi, 1);
+	else if (clear & TIOCM_DTR)
+		hvsilib_write_mctrl(&pv->hvsi, 0);
+
+	return 0;
+}
+
+static const struct hv_ops hvterm_hvsi_ops = {
+	.get_chars = hvterm_hvsi_get_chars,
+	.put_chars = hvterm_hvsi_put_chars,
+	.notifier_add = hvterm_hvsi_open,
+	.notifier_del = hvterm_hvsi_close,
+	.notifier_hangup = hvterm_hvsi_hangup,
+	.tiocmget = hvterm_hvsi_tiocmget,
+	.tiocmset = hvterm_hvsi_tiocmset,
+};
+
+static int __devinit hvc_vio_probe(struct vio_dev *vdev,
+				   const struct vio_device_id *id)
+{
+	const struct hv_ops *ops;
 	struct hvc_struct *hp;
+	struct hvterm_priv *pv;
+	hv_protocol_t proto;
+	int i, termno = -1;
 
 	/* probed with invalid parameters. */
 	if (!vdev || !id)
 		return -EPERM;
 
-	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops,
-			MAX_VIO_PUT_CHARS);
+	if (of_device_is_compatible(vdev->dev.of_node, "hvterm1")) {
+		proto = HV_PROTOCOL_RAW;
+		ops = &hvterm_raw_ops;
+	} else if (of_device_is_compatible(vdev->dev.of_node, "hvterm-protocol")) {
+		proto = HV_PROTOCOL_HVSI;
+		ops = &hvterm_hvsi_ops;
+	} else {
+		pr_err("hvc_vio: Unkown protocol for %s\n", vdev->dev.of_node->full_name);
+		return -ENXIO;
+	}
+
+	pr_devel("hvc_vio_probe() device %s, using %s protocol\n",
+		 vdev->dev.of_node->full_name,
+		 proto == HV_PROTOCOL_RAW ? "raw" : "hvsi");
+
+	/* Is it our boot one ? */
+	if (hvterm_privs[0] == &hvterm_priv0 &&
+	    vdev->unit_address == hvterm_priv0.termno) {
+		pv = hvterm_privs[0];
+		termno = 0;
+		pr_devel("->boot console, using termno 0\n");
+	}
+	/* nope, allocate a new one */
+	else {
+		for (i = 0; i < MAX_NR_HVC_CONSOLES && termno < 0; i++)
+			if (!hvterm_privs[i])
+				termno = i;
+		pr_devel("->non-boot console, using termno %d\n", termno);
+		if (termno < 0)
+			return -ENODEV;
+		pv = kzalloc(sizeof(struct hvterm_priv), GFP_KERNEL);
+		if (!pv)
+			return -ENOMEM;
+		pv->termno = vdev->unit_address;
+		pv->proto = proto;
+		spin_lock_init(&pv->buf_lock);
+		hvterm_privs[termno] = pv;
+		hvsilib_init(&pv->hvsi, hvc_get_chars, hvc_put_chars,
+			     pv->termno, 0);
+	}
+
+	hp = hvc_alloc(termno, vdev->irq, ops, MAX_VIO_PUT_CHARS);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 	dev_set_drvdata(&vdev->dev, hp);
@@ -106,8 +295,16 @@
 static int __devexit hvc_vio_remove(struct vio_dev *vdev)
 {
 	struct hvc_struct *hp = dev_get_drvdata(&vdev->dev);
+	int rc, termno;
 
-	return hvc_remove(hp);
+	termno = hp->vtermno;
+	rc = hvc_remove(hp);
+	if (rc == 0) {
+		if (hvterm_privs[termno] != &hvterm_priv0)
+			kfree(hvterm_privs[termno]);
+		hvterm_privs[termno] = NULL;
+	}
+	return rc;
 }
 
 static struct vio_driver hvc_vio_driver = {
@@ -140,34 +337,149 @@
 }
 module_exit(hvc_vio_exit);
 
-/* the device tree order defines our numbering */
-static int hvc_find_vtys(void)
+static void udbg_hvc_putc(char c)
 {
-	struct device_node *vty;
-	int num_found = 0;
+	int count = -1;
 
-	for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
-			vty = of_find_node_by_name(vty, "vty")) {
-		const uint32_t *vtermno;
+	if (c == '\n')
+		udbg_hvc_putc('\r');
 
-		/* We have statically defined space for only a certain number
-		 * of console adapters.
-		 */
-		if (num_found >= MAX_NR_HVC_CONSOLES) {
-			of_node_put(vty);
+	do {
+		switch(hvterm_priv0.proto) {
+		case HV_PROTOCOL_RAW:
+			count = hvterm_raw_put_chars(0, &c, 1);
+			break;
+		case HV_PROTOCOL_HVSI:
+			count = hvterm_hvsi_put_chars(0, &c, 1);
 			break;
 		}
+	} while(count == 0);
+}
 
-		vtermno = of_get_property(vty, "reg", NULL);
-		if (!vtermno)
-			continue;
+static int udbg_hvc_getc_poll(void)
+{
+	int rc = 0;
+	char c;
 
-		if (of_device_is_compatible(vty, "hvterm1")) {
-			hvc_instantiate(*vtermno, num_found, &hvc_get_put_ops);
-			++num_found;
+	switch(hvterm_priv0.proto) {
+	case HV_PROTOCOL_RAW:
+		rc = hvterm_raw_get_chars(0, &c, 1);
+		break;
+	case HV_PROTOCOL_HVSI:
+		rc = hvterm_hvsi_get_chars(0, &c, 1);
+		break;
+	}
+	if (!rc)
+		return -1;
+	return c;
+}
+
+static int udbg_hvc_getc(void)
+{
+	int ch;
+	for (;;) {
+		ch = udbg_hvc_getc_poll();
+		if (ch == -1) {
+			/* This shouldn't be needed...but... */
+			volatile unsigned long delay;
+			for (delay=0; delay < 2000000; delay++)
+				;
+		} else {
+			return ch;
 		}
 	}
-
-	return num_found;
 }
-console_initcall(hvc_find_vtys);
+
+void __init hvc_vio_init_early(void)
+{
+	struct device_node *stdout_node;
+	const u32 *termno;
+	const char *name;
+	const struct hv_ops *ops;
+
+	/* find the boot console from /chosen/stdout */
+	if (!of_chosen)
+		return;
+	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (name == NULL)
+		return;
+	stdout_node = of_find_node_by_path(name);
+	if (!stdout_node)
+		return;
+	name = of_get_property(stdout_node, "name", NULL);
+	if (!name) {
+		printk(KERN_WARNING "stdout node missing 'name' property!\n");
+		goto out;
+	}
+
+	/* Check if it's a virtual terminal */
+	if (strncmp(name, "vty", 3) != 0)
+		goto out;
+	termno = of_get_property(stdout_node, "reg", NULL);
+	if (termno == NULL)
+		goto out;
+	hvterm_priv0.termno = *termno;
+	spin_lock_init(&hvterm_priv0.buf_lock);
+	hvterm_privs[0] = &hvterm_priv0;
+
+	/* Check the protocol */
+	if (of_device_is_compatible(stdout_node, "hvterm1")) {
+		hvterm_priv0.proto = HV_PROTOCOL_RAW;
+		ops = &hvterm_raw_ops;
+	}
+	else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+		hvterm_priv0.proto = HV_PROTOCOL_HVSI;
+		ops = &hvterm_hvsi_ops;
+		hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
+			     hvterm_priv0.termno, 1);
+		/* HVSI, perform the handshake now */
+		hvsilib_establish(&hvterm_priv0.hvsi);
+	} else
+		goto out;
+	udbg_putc = udbg_hvc_putc;
+	udbg_getc = udbg_hvc_getc;
+	udbg_getc_poll = udbg_hvc_getc_poll;
+#ifdef HVC_OLD_HVSI
+	/* When using the old HVSI driver don't register the HVC
+	 * backend for HVSI, only do udbg
+	 */
+	if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
+		goto out;
+#endif
+	add_preferred_console("hvc", 0, NULL);
+	hvc_instantiate(0, 0, ops);
+out:
+	of_node_put(stdout_node);
+}
+
+/* call this from early_init() for a working debug console on
+ * vterm capable LPAR machines
+ */
+#ifdef CONFIG_PPC_EARLY_DEBUG_LPAR
+void __init udbg_init_debug_lpar(void)
+{
+	hvterm_privs[0] = &hvterm_priv0;
+	hvterm_priv0.termno = 0;
+	hvterm_priv0.proto = HV_PROTOCOL_RAW;
+	spin_lock_init(&hvterm_priv0.buf_lock);
+	udbg_putc = udbg_hvc_putc;
+	udbg_getc = udbg_hvc_getc;
+	udbg_getc_poll = udbg_hvc_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_LPAR */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI
+void __init udbg_init_debug_lpar_hvsi(void)
+{
+	hvterm_privs[0] = &hvterm_priv0;
+	hvterm_priv0.termno = CONFIG_PPC_EARLY_DEBUG_HVSI_VTERMNO;
+	hvterm_priv0.proto = HV_PROTOCOL_HVSI;
+	spin_lock_init(&hvterm_priv0.buf_lock);
+	udbg_putc = udbg_hvc_putc;
+	udbg_getc = udbg_hvc_getc;
+	udbg_getc_poll = udbg_hvc_getc_poll;
+	hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
+		     hvterm_priv0.termno, 1);
+	hvsilib_establish(&hvterm_priv0.hvsi);
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI */
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 8a8d637..c94e2f5 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -49,6 +49,7 @@
 #include <asm/uaccess.h>
 #include <asm/vio.h>
 #include <asm/param.h>
+#include <asm/hvsi.h>
 
 #define HVSI_MAJOR	229
 #define HVSI_MINOR	128
@@ -109,68 +110,6 @@
 };
 #define HVSI_CONSOLE 0x1
 
-#define VS_DATA_PACKET_HEADER           0xff
-#define VS_CONTROL_PACKET_HEADER        0xfe
-#define VS_QUERY_PACKET_HEADER          0xfd
-#define VS_QUERY_RESPONSE_PACKET_HEADER 0xfc
-
-/* control verbs */
-#define VSV_SET_MODEM_CTL    1 /* to service processor only */
-#define VSV_MODEM_CTL_UPDATE 2 /* from service processor only */
-#define VSV_CLOSE_PROTOCOL   3
-
-/* query verbs */
-#define VSV_SEND_VERSION_NUMBER 1
-#define VSV_SEND_MODEM_CTL_STATUS 2
-
-/* yes, these masks are not consecutive. */
-#define HVSI_TSDTR 0x01
-#define HVSI_TSCD  0x20
-
-struct hvsi_header {
-	uint8_t  type;
-	uint8_t  len;
-	uint16_t seqno;
-} __attribute__((packed));
-
-struct hvsi_data {
-	uint8_t  type;
-	uint8_t  len;
-	uint16_t seqno;
-	uint8_t  data[HVSI_MAX_OUTGOING_DATA];
-} __attribute__((packed));
-
-struct hvsi_control {
-	uint8_t  type;
-	uint8_t  len;
-	uint16_t seqno;
-	uint16_t verb;
-	/* optional depending on verb: */
-	uint32_t word;
-	uint32_t mask;
-} __attribute__((packed));
-
-struct hvsi_query {
-	uint8_t  type;
-	uint8_t  len;
-	uint16_t seqno;
-	uint16_t verb;
-} __attribute__((packed));
-
-struct hvsi_query_response {
-	uint8_t  type;
-	uint8_t  len;
-	uint16_t seqno;
-	uint16_t verb;
-	uint16_t query_seqno;
-	union {
-		uint8_t  version;
-		uint32_t mctrl_word;
-	} u;
-} __attribute__((packed));
-
-
-
 static inline int is_console(struct hvsi_struct *hp)
 {
 	return hp->flags & HVSI_CONSOLE;
@@ -356,18 +295,18 @@
 	struct hvsi_query_response packet __ALIGNED__;
 	int wrote;
 
-	packet.type = VS_QUERY_RESPONSE_PACKET_HEADER;
-	packet.len = sizeof(struct hvsi_query_response);
-	packet.seqno = atomic_inc_return(&hp->seqno);
+	packet.hdr.type = VS_QUERY_RESPONSE_PACKET_HEADER;
+	packet.hdr.len = sizeof(struct hvsi_query_response);
+	packet.hdr.seqno = atomic_inc_return(&hp->seqno);
 	packet.verb = VSV_SEND_VERSION_NUMBER;
 	packet.u.version = HVSI_VERSION;
 	packet.query_seqno = query_seqno+1;
 
-	pr_debug("%s: sending %i bytes\n", __func__, packet.len);
-	dbg_dump_hex((uint8_t*)&packet, packet.len);
+	pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len);
+	dbg_dump_hex((uint8_t*)&packet, packet.hdr.len);
 
-	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.len);
-	if (wrote != packet.len) {
+	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.hdr.len);
+	if (wrote != packet.hdr.len) {
 		printk(KERN_ERR "hvsi%i: couldn't send query response!\n",
 			hp->index);
 		return -EIO;
@@ -382,7 +321,7 @@
 
 	switch (hp->state) {
 		case HVSI_WAIT_FOR_VER_QUERY:
-			hvsi_version_respond(hp, query->seqno);
+			hvsi_version_respond(hp, query->hdr.seqno);
 			__set_state(hp, HVSI_OPEN);
 			break;
 		default:
@@ -640,16 +579,16 @@
 	struct hvsi_query packet __ALIGNED__;
 	int wrote;
 
-	packet.type = VS_QUERY_PACKET_HEADER;
-	packet.len = sizeof(struct hvsi_query);
-	packet.seqno = atomic_inc_return(&hp->seqno);
+	packet.hdr.type = VS_QUERY_PACKET_HEADER;
+	packet.hdr.len = sizeof(struct hvsi_query);
+	packet.hdr.seqno = atomic_inc_return(&hp->seqno);
 	packet.verb = verb;
 
-	pr_debug("%s: sending %i bytes\n", __func__, packet.len);
-	dbg_dump_hex((uint8_t*)&packet, packet.len);
+	pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len);
+	dbg_dump_hex((uint8_t*)&packet, packet.hdr.len);
 
-	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.len);
-	if (wrote != packet.len) {
+	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.hdr.len);
+	if (wrote != packet.hdr.len) {
 		printk(KERN_ERR "hvsi%i: couldn't send query (%i)!\n", hp->index,
 			wrote);
 		return -EIO;
@@ -683,20 +622,20 @@
 	struct hvsi_control packet __ALIGNED__;
 	int wrote;
 
-	packet.type = VS_CONTROL_PACKET_HEADER,
-	packet.seqno = atomic_inc_return(&hp->seqno);
-	packet.len = sizeof(struct hvsi_control);
+	packet.hdr.type = VS_CONTROL_PACKET_HEADER,
+	packet.hdr.seqno = atomic_inc_return(&hp->seqno);
+	packet.hdr.len = sizeof(struct hvsi_control);
 	packet.verb = VSV_SET_MODEM_CTL;
 	packet.mask = HVSI_TSDTR;
 
 	if (mctrl & TIOCM_DTR)
 		packet.word = HVSI_TSDTR;
 
-	pr_debug("%s: sending %i bytes\n", __func__, packet.len);
-	dbg_dump_hex((uint8_t*)&packet, packet.len);
+	pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len);
+	dbg_dump_hex((uint8_t*)&packet, packet.hdr.len);
 
-	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.len);
-	if (wrote != packet.len) {
+	wrote = hvc_put_chars(hp->vtermno, (char *)&packet, packet.hdr.len);
+	if (wrote != packet.hdr.len) {
 		printk(KERN_ERR "hvsi%i: couldn't set DTR!\n", hp->index);
 		return -EIO;
 	}
@@ -766,13 +705,13 @@
 
 	BUG_ON(count > HVSI_MAX_OUTGOING_DATA);
 
-	packet.type = VS_DATA_PACKET_HEADER;
-	packet.seqno = atomic_inc_return(&hp->seqno);
-	packet.len = count + sizeof(struct hvsi_header);
+	packet.hdr.type = VS_DATA_PACKET_HEADER;
+	packet.hdr.seqno = atomic_inc_return(&hp->seqno);
+	packet.hdr.len = count + sizeof(struct hvsi_header);
 	memcpy(&packet.data, buf, count);
 
-	ret = hvc_put_chars(hp->vtermno, (char *)&packet, packet.len);
-	if (ret == packet.len) {
+	ret = hvc_put_chars(hp->vtermno, (char *)&packet, packet.hdr.len);
+	if (ret == packet.hdr.len) {
 		/* return the number of chars written, not the packet length */
 		return count;
 	}
@@ -783,15 +722,15 @@
 {
 	struct hvsi_control packet __ALIGNED__;
 
-	packet.type = VS_CONTROL_PACKET_HEADER;
-	packet.seqno = atomic_inc_return(&hp->seqno);
-	packet.len = 6;
+	packet.hdr.type = VS_CONTROL_PACKET_HEADER;
+	packet.hdr.seqno = atomic_inc_return(&hp->seqno);
+	packet.hdr.len = 6;
 	packet.verb = VSV_CLOSE_PROTOCOL;
 
-	pr_debug("%s: sending %i bytes\n", __func__, packet.len);
-	dbg_dump_hex((uint8_t*)&packet, packet.len);
+	pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len);
+	dbg_dump_hex((uint8_t*)&packet, packet.hdr.len);
 
-	hvc_put_chars(hp->vtermno, (char *)&packet, packet.len);
+	hvc_put_chars(hp->vtermno, (char *)&packet, packet.hdr.len);
 }
 
 static int hvsi_open(struct tty_struct *tty, struct file *filp)
diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c
new file mode 100644
index 0000000..bd9b098
--- /dev/null
+++ b/drivers/tty/hvc/hvsi_lib.c
@@ -0,0 +1,426 @@
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <asm/hvsi.h>
+
+#include "hvc_console.h"
+
+static int hvsi_send_packet(struct hvsi_priv *pv, struct hvsi_header *packet)
+{
+	packet->seqno = atomic_inc_return(&pv->seqno);
+
+	/* Assumes that always succeeds, works in practice */
+	return pv->put_chars(pv->termno, (char *)packet, packet->len);
+}
+
+static void hvsi_start_handshake(struct hvsi_priv *pv)
+{
+	struct hvsi_query q;
+
+	/* Reset state */
+	pv->established = 0;
+	atomic_set(&pv->seqno, 0);
+
+	pr_devel("HVSI@%x: Handshaking started\n", pv->termno);
+
+	/* Send version query */
+	q.hdr.type = VS_QUERY_PACKET_HEADER;
+	q.hdr.len = sizeof(struct hvsi_query);
+	q.verb = VSV_SEND_VERSION_NUMBER;
+	hvsi_send_packet(pv, &q.hdr);
+}
+
+static int hvsi_send_close(struct hvsi_priv *pv)
+{
+	struct hvsi_control ctrl;
+
+	pv->established = 0;
+
+	ctrl.hdr.type = VS_CONTROL_PACKET_HEADER;
+	ctrl.hdr.len = sizeof(struct hvsi_control);
+	ctrl.verb = VSV_CLOSE_PROTOCOL;
+	return hvsi_send_packet(pv, &ctrl.hdr);
+}
+
+static void hvsi_cd_change(struct hvsi_priv *pv, int cd)
+{
+	if (cd)
+		pv->mctrl |= TIOCM_CD;
+	else {
+		pv->mctrl &= ~TIOCM_CD;
+
+		/* We copy the existing hvsi driver semantics
+		 * here which are to trigger a hangup when
+		 * we get a carrier loss.
+		 * Closing our connection to the server will
+		 * do just that.
+		 */
+		if (!pv->is_console && pv->opened) {
+			pr_devel("HVSI@%x Carrier lost, hanging up !\n",
+				 pv->termno);
+			hvsi_send_close(pv);
+		}
+	}
+}
+
+static void hvsi_got_control(struct hvsi_priv *pv)
+{
+	struct hvsi_control *pkt = (struct hvsi_control *)pv->inbuf;
+
+	switch (pkt->verb) {
+	case VSV_CLOSE_PROTOCOL:
+		/* We restart the handshaking */
+		hvsi_start_handshake(pv);
+		break;
+	case VSV_MODEM_CTL_UPDATE:
+		/* Transition of carrier detect */
+		hvsi_cd_change(pv, pkt->word & HVSI_TSCD);
+		break;
+	}
+}
+
+static void hvsi_got_query(struct hvsi_priv *pv)
+{
+	struct hvsi_query *pkt = (struct hvsi_query *)pv->inbuf;
+	struct hvsi_query_response r;
+
+	/* We only handle version queries */
+	if (pkt->verb != VSV_SEND_VERSION_NUMBER)
+		return;
+
+	pr_devel("HVSI@%x: Got version query, sending response...\n",
+		 pv->termno);
+
+	/* Send version response */
+	r.hdr.type = VS_QUERY_RESPONSE_PACKET_HEADER;
+	r.hdr.len = sizeof(struct hvsi_query_response);
+	r.verb = VSV_SEND_VERSION_NUMBER;
+	r.u.version = HVSI_VERSION;
+	r.query_seqno = pkt->hdr.seqno;
+	hvsi_send_packet(pv, &r.hdr);
+
+	/* Assume protocol is open now */
+	pv->established = 1;
+}
+
+static void hvsi_got_response(struct hvsi_priv *pv)
+{
+	struct hvsi_query_response *r =
+		(struct hvsi_query_response *)pv->inbuf;
+
+	switch(r->verb) {
+	case VSV_SEND_MODEM_CTL_STATUS:
+		hvsi_cd_change(pv, r->u.mctrl_word & HVSI_TSCD);
+		pv->mctrl_update = 1;
+		break;
+	}
+}
+
+static int hvsi_check_packet(struct hvsi_priv *pv)
+{
+	u8 len, type;
+
+	/* Check header validity. If it's invalid, we ditch
+	 * the whole buffer and hope we eventually resync
+	 */
+	if (pv->inbuf[0] < 0xfc) {
+		pv->inbuf_len = pv->inbuf_pktlen = 0;
+		return 0;
+	}
+	type = pv->inbuf[0];
+	len = pv->inbuf[1];
+
+	/* Packet incomplete ? */
+	if (pv->inbuf_len < len)
+		return 0;
+
+	pr_devel("HVSI@%x: Got packet type %x len %d bytes:\n",
+		 pv->termno, type, len);
+
+	/* We have a packet, yay ! Handle it */
+	switch(type) {
+	case VS_DATA_PACKET_HEADER:
+		pv->inbuf_pktlen = len - 4;
+		pv->inbuf_cur = 4;
+		return 1;
+	case VS_CONTROL_PACKET_HEADER:
+		hvsi_got_control(pv);
+		break;
+	case VS_QUERY_PACKET_HEADER:
+		hvsi_got_query(pv);
+		break;
+	case VS_QUERY_RESPONSE_PACKET_HEADER:
+		hvsi_got_response(pv);
+		break;
+	}
+
+	/* Swallow packet and retry */
+	pv->inbuf_len -= len;
+	memmove(pv->inbuf, &pv->inbuf[len], pv->inbuf_len);
+	return 1;
+}
+
+static int hvsi_get_packet(struct hvsi_priv *pv)
+{
+	/* If we have room in the buffer, ask HV for more */
+	if (pv->inbuf_len < HVSI_INBUF_SIZE)
+		pv->inbuf_len += pv->get_chars(pv->termno,
+					     &pv->inbuf[pv->inbuf_len],
+					     HVSI_INBUF_SIZE - pv->inbuf_len);
+	/*
+	 * If we have at least 4 bytes in the buffer, check for
+	 * a full packet and retry
+	 */
+	if (pv->inbuf_len >= 4)
+		return hvsi_check_packet(pv);
+	return 0;
+}
+
+int hvsilib_get_chars(struct hvsi_priv *pv, char *buf, int count)
+{
+	unsigned int tries, read = 0;
+
+	if (WARN_ON(!pv))
+		return 0;
+
+	/* If we aren't open, don't do anything in order to avoid races
+	 * with connection establishment. The hvc core will call this
+	 * before we have returned from notifier_add(), and we need to
+	 * avoid multiple users playing with the receive buffer
+	 */
+	if (!pv->opened)
+		return 0;
+
+	/* We try twice, once with what data we have and once more
+	 * after we try to fetch some more from the hypervisor
+	 */
+	for (tries = 1; count && tries < 2; tries++) {
+		/* Consume existing data packet */
+		if (pv->inbuf_pktlen) {
+			unsigned int l = min(count, (int)pv->inbuf_pktlen);
+			memcpy(&buf[read], &pv->inbuf[pv->inbuf_cur], l);
+			pv->inbuf_cur += l;
+			pv->inbuf_pktlen -= l;
+			count -= l;
+			read += l;
+		}
+		if (count == 0)
+			break;
+
+		/* Data packet fully consumed, move down remaning data */
+		if (pv->inbuf_cur) {
+			pv->inbuf_len -= pv->inbuf_cur;
+			memmove(pv->inbuf, &pv->inbuf[pv->inbuf_cur],
+				pv->inbuf_len);
+			pv->inbuf_cur = 0;
+		}
+
+		/* Try to get another packet */
+		if (hvsi_get_packet(pv))
+			tries--;
+	}
+	if (!pv->established) {
+		pr_devel("HVSI@%x: returning -EPIPE\n", pv->termno);
+		return -EPIPE;
+	}
+	return read;
+}
+
+int hvsilib_put_chars(struct hvsi_priv *pv, const char *buf, int count)
+{
+	struct hvsi_data dp;
+	int rc, adjcount = min(count, HVSI_MAX_OUTGOING_DATA);
+
+	if (WARN_ON(!pv))
+		return 0;
+
+	dp.hdr.type = VS_DATA_PACKET_HEADER;
+	dp.hdr.len = adjcount + sizeof(struct hvsi_header);
+	memcpy(dp.data, buf, adjcount);
+	rc = hvsi_send_packet(pv, &dp.hdr);
+	if (rc <= 0)
+		return rc;
+	return adjcount;
+}
+
+static void maybe_msleep(unsigned long ms)
+{
+	/* During early boot, IRQs are disabled, use mdelay */
+	if (irqs_disabled())
+		mdelay(ms);
+	else
+		msleep(ms);
+}
+
+int hvsilib_read_mctrl(struct hvsi_priv *pv)
+{
+	struct hvsi_query q;
+	int rc, timeout;
+
+	pr_devel("HVSI@%x: Querying modem control status...\n",
+		 pv->termno);
+
+	pv->mctrl_update = 0;
+	q.hdr.type = VS_QUERY_PACKET_HEADER;
+	q.hdr.len = sizeof(struct hvsi_query);
+	q.hdr.seqno = atomic_inc_return(&pv->seqno);
+	q.verb = VSV_SEND_MODEM_CTL_STATUS;
+	rc = hvsi_send_packet(pv, &q.hdr);
+	if (rc <= 0) {
+		pr_devel("HVSI@%x: Error %d...\n", pv->termno, rc);
+		return rc;
+	}
+
+	/* Try for up to 200ms */
+	for (timeout = 0; timeout < 20; timeout++) {
+		if (!pv->established)
+			return -ENXIO;
+		if (pv->mctrl_update)
+			return 0;
+		if (!hvsi_get_packet(pv))
+			maybe_msleep(10);
+	}
+	return -EIO;
+}
+
+int hvsilib_write_mctrl(struct hvsi_priv *pv, int dtr)
+{
+	struct hvsi_control ctrl;
+	unsigned short mctrl;
+
+	mctrl = pv->mctrl;
+	if (dtr)
+		mctrl |= TIOCM_DTR;
+	else
+		mctrl &= ~TIOCM_DTR;
+	if (mctrl == pv->mctrl)
+		return 0;
+	pv->mctrl = mctrl;
+
+	pr_devel("HVSI@%x: %s DTR...\n", pv->termno,
+		 dtr ? "Setting" : "Clearing");
+
+	ctrl.hdr.type = VS_CONTROL_PACKET_HEADER,
+	ctrl.hdr.len = sizeof(struct hvsi_control);
+	ctrl.verb = VSV_SET_MODEM_CTL;
+	ctrl.mask = HVSI_TSDTR;
+	ctrl.word = dtr ? HVSI_TSDTR : 0;
+	return hvsi_send_packet(pv, &ctrl.hdr);
+}
+
+void hvsilib_establish(struct hvsi_priv *pv)
+{
+	int timeout;
+
+	pr_devel("HVSI@%x: Establishing...\n", pv->termno);
+
+	/* Try for up to 200ms, there can be a packet to
+	 * start the process waiting for us...
+	 */
+	for (timeout = 0; timeout < 20; timeout++) {
+		if (pv->established)
+			goto established;
+		if (!hvsi_get_packet(pv))
+			maybe_msleep(10);
+	}
+
+	/* Failed, send a close connection packet just
+	 * in case
+	 */
+	pr_devel("HVSI@%x:   ... sending close\n", pv->termno);
+
+	hvsi_send_close(pv);
+
+	/* Then restart handshake */
+
+	pr_devel("HVSI@%x:   ... restarting handshake\n", pv->termno);
+
+	hvsi_start_handshake(pv);
+
+	pr_devel("HVSI@%x:   ... waiting handshake\n", pv->termno);
+
+	/* Try for up to 200s */
+	for (timeout = 0; timeout < 20; timeout++) {
+		if (pv->established)
+			goto established;
+		if (!hvsi_get_packet(pv))
+			maybe_msleep(10);
+	}
+
+	if (!pv->established) {
+		pr_devel("HVSI@%x: Timeout handshaking, giving up !\n",
+			 pv->termno);
+		return;
+	}
+ established:
+	/* Query modem control lines */
+
+	pr_devel("HVSI@%x:   ... established, reading mctrl\n", pv->termno);
+
+	hvsilib_read_mctrl(pv);
+
+	/* Set our own DTR */
+
+	pr_devel("HVSI@%x:   ... setting mctrl\n", pv->termno);
+
+	hvsilib_write_mctrl(pv, 1);
+
+	/* Set the opened flag so reads are allowed */
+	wmb();
+	pv->opened = 1;
+}
+
+int hvsilib_open(struct hvsi_priv *pv, struct hvc_struct *hp)
+{
+	pr_devel("HVSI@%x: open !\n", pv->termno);
+
+	/* Keep track of the tty data structure */
+	pv->tty = tty_kref_get(hp->tty);
+
+	hvsilib_establish(pv);
+
+	return 0;
+}
+
+void hvsilib_close(struct hvsi_priv *pv, struct hvc_struct *hp)
+{
+	unsigned long flags;
+
+	pr_devel("HVSI@%x: close !\n", pv->termno);
+
+	if (!pv->is_console) {
+		pr_devel("HVSI@%x: Not a console, tearing down\n",
+			 pv->termno);
+
+		/* Clear opened, synchronize with khvcd */
+		spin_lock_irqsave(&hp->lock, flags);
+		pv->opened = 0;
+		spin_unlock_irqrestore(&hp->lock, flags);
+
+		/* Clear our own DTR */
+		if (!pv->tty || (pv->tty->termios->c_cflag & HUPCL))
+			hvsilib_write_mctrl(pv, 0);
+
+		/* Tear down the connection */
+		hvsi_send_close(pv);
+	}
+
+	if (pv->tty)
+		tty_kref_put(pv->tty);
+	pv->tty = NULL;
+}
+
+void hvsilib_init(struct hvsi_priv *pv,
+		  int (*get_chars)(uint32_t termno, char *buf, int count),
+		  int (*put_chars)(uint32_t termno, const char *buf,
+				   int count),
+		  int termno, int is_console)
+{
+	memset(pv, 0, sizeof(*pv));
+	pv->get_chars = get_chars;
+	pv->put_chars = put_chars;
+	pv->termno = termno;
+	pv->is_console = is_console;
+}
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
new file mode 100644
index 0000000..2dcdbc9
--- /dev/null
+++ b/drivers/virt/Kconfig
@@ -0,0 +1,32 @@
+#
+# Virtualization support drivers
+#
+
+menuconfig VIRT_DRIVERS
+	bool "Virtualization drivers"
+	---help---
+	  Say Y here to get to see options for device drivers that support
+	  virtualization environments.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRT_DRIVERS
+
+config FSL_HV_MANAGER
+	tristate "Freescale hypervisor management driver"
+	depends on FSL_SOC
+	help
+          The Freescale hypervisor management driver provides several services
+	  to drivers and applications related to the Freescale hypervisor:
+
+          1) An ioctl interface for querying and managing partitions.
+
+          2) A file interface to reading incoming doorbells.
+
+          3) An interrupt handler for shutting down the partition upon
+	     receiving the shutdown doorbell from a manager partition.
+
+          4) A kernel interface for receiving callbacks when a managed
+	     partition shuts down.
+
+endif
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
new file mode 100644
index 0000000..c47f04d
--- /dev/null
+++ b/drivers/virt/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for drivers that support virtualization
+#
+
+obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
new file mode 100644
index 0000000..3d91621
--- /dev/null
+++ b/drivers/virt/fsl_hypervisor.c
@@ -0,0 +1,938 @@
+/*
+ * Freescale Hypervisor Management Driver
+
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * The Freescale hypervisor management driver provides several services to
+ * drivers and applications related to the Freescale hypervisor:
+ *
+ * 1. An ioctl interface for querying and managing partitions.
+ *
+ * 2. A file interface to reading incoming doorbells.
+ *
+ * 3. An interrupt handler for shutting down the partition upon receiving the
+ *    shutdown doorbell from a manager partition.
+ *
+ * 4. A kernel interface for receiving callbacks when a managed partition
+ *    shuts down.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/of.h>
+#include <linux/reboot.h>
+#include <linux/uaccess.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+
+#include <linux/io.h>
+#include <asm/fsl_hcalls.h>
+
+#include <linux/fsl_hypervisor.h>
+
+static BLOCKING_NOTIFIER_HEAD(failover_subscribers);
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART
+ *
+ * Restart a running partition
+ */
+static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p)
+{
+	struct fsl_hv_ioctl_restart param;
+
+	/* Get the parameters from the user */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_restart)))
+		return -EFAULT;
+
+	param.ret = fh_partition_restart(param.partition);
+
+	if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS
+ *
+ * Query the status of a partition
+ */
+static long ioctl_status(struct fsl_hv_ioctl_status __user *p)
+{
+	struct fsl_hv_ioctl_status param;
+	u32 status;
+
+	/* Get the parameters from the user */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_status)))
+		return -EFAULT;
+
+	param.ret = fh_partition_get_status(param.partition, &status);
+	if (!param.ret)
+		param.status = status;
+
+	if (copy_to_user(p, &param, sizeof(struct fsl_hv_ioctl_status)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_PARTITION_START
+ *
+ * Start a stopped partition.
+ */
+static long ioctl_start(struct fsl_hv_ioctl_start __user *p)
+{
+	struct fsl_hv_ioctl_start param;
+
+	/* Get the parameters from the user */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_start)))
+		return -EFAULT;
+
+	param.ret = fh_partition_start(param.partition, param.entry_point,
+				       param.load);
+
+	if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP
+ *
+ * Stop a running partition
+ */
+static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p)
+{
+	struct fsl_hv_ioctl_stop param;
+
+	/* Get the parameters from the user */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_stop)))
+		return -EFAULT;
+
+	param.ret = fh_partition_stop(param.partition);
+
+	if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_MEMCPY
+ *
+ * The FH_MEMCPY hypercall takes an array of address/address/size structures
+ * to represent the data being copied.  As a convenience to the user, this
+ * ioctl takes a user-create buffer and a pointer to a guest physically
+ * contiguous buffer in the remote partition, and creates the
+ * address/address/size array for the hypercall.
+ */
+static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
+{
+	struct fsl_hv_ioctl_memcpy param;
+
+	struct page **pages = NULL;
+	void *sg_list_unaligned = NULL;
+	struct fh_sg_list *sg_list = NULL;
+
+	unsigned int num_pages;
+	unsigned long lb_offset; /* Offset within a page of the local buffer */
+
+	unsigned int i;
+	long ret = 0;
+	int num_pinned; /* return value from get_user_pages() */
+	phys_addr_t remote_paddr; /* The next address in the remote buffer */
+	uint32_t count; /* The number of bytes left to copy */
+
+	/* Get the parameters from the user */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_memcpy)))
+		return -EFAULT;
+
+	/*
+	 * One partition must be local, the other must be remote.  In other
+	 * words, if source and target are both -1, or are both not -1, then
+	 * return an error.
+	 */
+	if ((param.source == -1) == (param.target == -1))
+		return -EINVAL;
+
+	/*
+	 * The array of pages returned by get_user_pages() covers only
+	 * page-aligned memory.  Since the user buffer is probably not
+	 * page-aligned, we need to handle the discrepancy.
+	 *
+	 * We calculate the offset within a page of the S/G list, and make
+	 * adjustments accordingly.  This will result in a page list that looks
+	 * like this:
+	 *
+	 *      ----    <-- first page starts before the buffer
+	 *     |    |
+	 *     |////|-> ----
+	 *     |////|  |    |
+	 *      ----   |    |
+	 *             |    |
+	 *      ----   |    |
+	 *     |////|  |    |
+	 *     |////|  |    |
+	 *     |////|  |    |
+	 *      ----   |    |
+	 *             |    |
+	 *      ----   |    |
+	 *     |////|  |    |
+	 *     |////|  |    |
+	 *     |////|  |    |
+	 *      ----   |    |
+	 *             |    |
+	 *      ----   |    |
+	 *     |////|  |    |
+	 *     |////|-> ----
+	 *     |    |   <-- last page ends after the buffer
+	 *      ----
+	 *
+	 * The distance between the start of the first page and the start of the
+	 * buffer is lb_offset.  The hashed (///) areas are the parts of the
+	 * page list that contain the actual buffer.
+	 *
+	 * The advantage of this approach is that the number of pages is
+	 * equal to the number of entries in the S/G list that we give to the
+	 * hypervisor.
+	 */
+	lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
+	num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/* Allocate the buffers we need */
+
+	/*
+	 * 'pages' is an array of struct page pointers that's initialized by
+	 * get_user_pages().
+	 */
+	pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages) {
+		pr_debug("fsl-hv: could not allocate page list\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * sg_list is the list of fh_sg_list objects that we pass to the
+	 * hypervisor.
+	 */
+	sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) +
+		sizeof(struct fh_sg_list) - 1, GFP_KERNEL);
+	if (!sg_list_unaligned) {
+		pr_debug("fsl-hv: could not allocate S/G list\n");
+		ret = -ENOMEM;
+		goto exit;
+	}
+	sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
+
+	/* Get the physical addresses of the source buffer */
+	down_read(&current->mm->mmap_sem);
+	num_pinned = get_user_pages(current, current->mm,
+		param.local_vaddr - lb_offset, num_pages,
+		(param.source == -1) ? READ : WRITE,
+		0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (num_pinned != num_pages) {
+		/* get_user_pages() failed */
+		pr_debug("fsl-hv: could not lock source buffer\n");
+		ret = (num_pinned < 0) ? num_pinned : -EFAULT;
+		goto exit;
+	}
+
+	/*
+	 * Build the fh_sg_list[] array.  The first page is special
+	 * because it's misaligned.
+	 */
+	if (param.source == -1) {
+		sg_list[0].source = page_to_phys(pages[0]) + lb_offset;
+		sg_list[0].target = param.remote_paddr;
+	} else {
+		sg_list[0].source = param.remote_paddr;
+		sg_list[0].target = page_to_phys(pages[0]) + lb_offset;
+	}
+	sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset);
+
+	remote_paddr = param.remote_paddr + sg_list[0].size;
+	count = param.count - sg_list[0].size;
+
+	for (i = 1; i < num_pages; i++) {
+		if (param.source == -1) {
+			/* local to remote */
+			sg_list[i].source = page_to_phys(pages[i]);
+			sg_list[i].target = remote_paddr;
+		} else {
+			/* remote to local */
+			sg_list[i].source = remote_paddr;
+			sg_list[i].target = page_to_phys(pages[i]);
+		}
+		sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE);
+
+		remote_paddr += sg_list[i].size;
+		count -= sg_list[i].size;
+	}
+
+	param.ret = fh_partition_memcpy(param.source, param.target,
+		virt_to_phys(sg_list), num_pages);
+
+exit:
+	if (pages) {
+		for (i = 0; i < num_pages; i++)
+			if (pages[i])
+				put_page(pages[i]);
+	}
+
+	kfree(sg_list_unaligned);
+	kfree(pages);
+
+	if (!ret)
+		if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
+			return -EFAULT;
+
+	return ret;
+}
+
+/*
+ * Ioctl interface for FSL_HV_IOCTL_DOORBELL
+ *
+ * Ring a doorbell
+ */
+static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p)
+{
+	struct fsl_hv_ioctl_doorbell param;
+
+	/* Get the parameters from the user. */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_doorbell)))
+		return -EFAULT;
+
+	param.ret = ev_doorbell_send(param.doorbell);
+
+	if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
+{
+	struct fsl_hv_ioctl_prop param;
+	char __user *upath, *upropname;
+	void __user *upropval;
+	char *path = NULL, *propname = NULL;
+	void *propval = NULL;
+	int ret = 0;
+
+	/* Get the parameters from the user. */
+	if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_prop)))
+		return -EFAULT;
+
+	upath = (char __user *)(uintptr_t)param.path;
+	upropname = (char __user *)(uintptr_t)param.propname;
+	upropval = (void __user *)(uintptr_t)param.propval;
+
+	path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
+	if (IS_ERR(path)) {
+		ret = PTR_ERR(path);
+		goto out;
+	}
+
+	propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
+	if (IS_ERR(propname)) {
+		ret = PTR_ERR(propname);
+		goto out;
+	}
+
+	if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	propval = kmalloc(param.proplen, GFP_KERNEL);
+	if (!propval) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (set) {
+		if (copy_from_user(propval, upropval, param.proplen)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		param.ret = fh_partition_set_dtprop(param.handle,
+						    virt_to_phys(path),
+						    virt_to_phys(propname),
+						    virt_to_phys(propval),
+						    param.proplen);
+	} else {
+		param.ret = fh_partition_get_dtprop(param.handle,
+						    virt_to_phys(path),
+						    virt_to_phys(propname),
+						    virt_to_phys(propval),
+						    &param.proplen);
+
+		if (param.ret == 0) {
+			if (copy_to_user(upropval, propval, param.proplen) ||
+			    put_user(param.proplen, &p->proplen)) {
+				ret = -EFAULT;
+				goto out;
+			}
+		}
+	}
+
+	if (put_user(param.ret, &p->ret))
+		ret = -EFAULT;
+
+out:
+	kfree(path);
+	kfree(propval);
+	kfree(propname);
+
+	return ret;
+}
+
+/*
+ * Ioctl main entry point
+ */
+static long fsl_hv_ioctl(struct file *file, unsigned int cmd,
+			 unsigned long argaddr)
+{
+	void __user *arg = (void __user *)argaddr;
+	long ret;
+
+	switch (cmd) {
+	case FSL_HV_IOCTL_PARTITION_RESTART:
+		ret = ioctl_restart(arg);
+		break;
+	case FSL_HV_IOCTL_PARTITION_GET_STATUS:
+		ret = ioctl_status(arg);
+		break;
+	case FSL_HV_IOCTL_PARTITION_START:
+		ret = ioctl_start(arg);
+		break;
+	case FSL_HV_IOCTL_PARTITION_STOP:
+		ret = ioctl_stop(arg);
+		break;
+	case FSL_HV_IOCTL_MEMCPY:
+		ret = ioctl_memcpy(arg);
+		break;
+	case FSL_HV_IOCTL_DOORBELL:
+		ret = ioctl_doorbell(arg);
+		break;
+	case FSL_HV_IOCTL_GETPROP:
+		ret = ioctl_dtprop(arg, 0);
+		break;
+	case FSL_HV_IOCTL_SETPROP:
+		ret = ioctl_dtprop(arg, 1);
+		break;
+	default:
+		pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
+			 _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd),
+			 _IOC_SIZE(cmd));
+		return -ENOTTY;
+	}
+
+	return ret;
+}
+
+/* Linked list of processes that have us open */
+static struct list_head db_list;
+
+/* spinlock for db_list */
+static DEFINE_SPINLOCK(db_list_lock);
+
+/* The size of the doorbell event queue.  This must be a power of two. */
+#define QSIZE	16
+
+/* Returns the next head/tail pointer, wrapping around the queue if necessary */
+#define nextp(x) (((x) + 1) & (QSIZE - 1))
+
+/* Per-open data structure */
+struct doorbell_queue {
+	struct list_head list;
+	spinlock_t lock;
+	wait_queue_head_t wait;
+	unsigned int head;
+	unsigned int tail;
+	uint32_t q[QSIZE];
+};
+
+/* Linked list of ISRs that we registered */
+struct list_head isr_list;
+
+/* Per-ISR data structure */
+struct doorbell_isr {
+	struct list_head list;
+	unsigned int irq;
+	uint32_t doorbell;	/* The doorbell handle */
+	uint32_t partition;	/* The partition handle, if used */
+};
+
+/*
+ * Add a doorbell to all of the doorbell queues
+ */
+static void fsl_hv_queue_doorbell(uint32_t doorbell)
+{
+	struct doorbell_queue *dbq;
+	unsigned long flags;
+
+	/* Prevent another core from modifying db_list */
+	spin_lock_irqsave(&db_list_lock, flags);
+
+	list_for_each_entry(dbq, &db_list, list) {
+		if (dbq->head != nextp(dbq->tail)) {
+			dbq->q[dbq->tail] = doorbell;
+			/*
+			 * This memory barrier eliminates the need to grab
+			 * the spinlock for dbq.
+			 */
+			smp_wmb();
+			dbq->tail = nextp(dbq->tail);
+			wake_up_interruptible(&dbq->wait);
+		}
+	}
+
+	spin_unlock_irqrestore(&db_list_lock, flags);
+}
+
+/*
+ * Interrupt handler for all doorbells
+ *
+ * We use the same interrupt handler for all doorbells.  Whenever a doorbell
+ * is rung, and we receive an interrupt, we just put the handle for that
+ * doorbell (passed to us as *data) into all of the queues.
+ */
+static irqreturn_t fsl_hv_isr(int irq, void *data)
+{
+	fsl_hv_queue_doorbell((uintptr_t) data);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * State change thread function
+ *
+ * The state change notification arrives in an interrupt, but we can't call
+ * blocking_notifier_call_chain() in an interrupt handler.  We could call
+ * atomic_notifier_call_chain(), but that would require the clients' call-back
+ * function to run in interrupt context.  Since we don't want to impose that
+ * restriction on the clients, we use a threaded IRQ to process the
+ * notification in kernel context.
+ */
+static irqreturn_t fsl_hv_state_change_thread(int irq, void *data)
+{
+	struct doorbell_isr *dbisr = data;
+
+	blocking_notifier_call_chain(&failover_subscribers, dbisr->partition,
+				     NULL);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Interrupt handler for state-change doorbells
+ */
+static irqreturn_t fsl_hv_state_change_isr(int irq, void *data)
+{
+	unsigned int status;
+	struct doorbell_isr *dbisr = data;
+	int ret;
+
+	/* It's still a doorbell, so add it to all the queues. */
+	fsl_hv_queue_doorbell(dbisr->doorbell);
+
+	/* Determine the new state, and if it's stopped, notify the clients. */
+	ret = fh_partition_get_status(dbisr->partition, &status);
+	if (!ret && (status == FH_PARTITION_STOPPED))
+		return IRQ_WAKE_THREAD;
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Returns a bitmask indicating whether a read will block
+ */
+static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
+{
+	struct doorbell_queue *dbq = filp->private_data;
+	unsigned long flags;
+	unsigned int mask;
+
+	spin_lock_irqsave(&dbq->lock, flags);
+
+	poll_wait(filp, &dbq->wait, p);
+	mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM);
+
+	spin_unlock_irqrestore(&dbq->lock, flags);
+
+	return mask;
+}
+
+/*
+ * Return the handles for any incoming doorbells
+ *
+ * If there are doorbell handles in the queue for this open instance, then
+ * return them to the caller as an array of 32-bit integers.  Otherwise,
+ * block until there is at least one handle to return.
+ */
+static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len,
+			   loff_t *off)
+{
+	struct doorbell_queue *dbq = filp->private_data;
+	uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */
+	unsigned long flags;
+	ssize_t count = 0;
+
+	/* Make sure we stop when the user buffer is full. */
+	while (len >= sizeof(uint32_t)) {
+		uint32_t dbell;	/* Local copy of doorbell queue data */
+
+		spin_lock_irqsave(&dbq->lock, flags);
+
+		/*
+		 * If the queue is empty, then either we're done or we need
+		 * to block.  If the application specified O_NONBLOCK, then
+		 * we return the appropriate error code.
+		 */
+		if (dbq->head == dbq->tail) {
+			spin_unlock_irqrestore(&dbq->lock, flags);
+			if (count)
+				break;
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+			if (wait_event_interruptible(dbq->wait,
+						     dbq->head != dbq->tail))
+				return -ERESTARTSYS;
+			continue;
+		}
+
+		/*
+		 * Even though we have an smp_wmb() in the ISR, the core
+		 * might speculatively execute the "dbell = ..." below while
+		 * it's evaluating the if-statement above.  In that case, the
+		 * value put into dbell could be stale if the core accepts the
+		 * speculation. To prevent that, we need a read memory barrier
+		 * here as well.
+		 */
+		smp_rmb();
+
+		/* Copy the data to a temporary local buffer, because
+		 * we can't call copy_to_user() from inside a spinlock
+		 */
+		dbell = dbq->q[dbq->head];
+		dbq->head = nextp(dbq->head);
+
+		spin_unlock_irqrestore(&dbq->lock, flags);
+
+		if (put_user(dbell, p))
+			return -EFAULT;
+		p++;
+		count += sizeof(uint32_t);
+		len -= sizeof(uint32_t);
+	}
+
+	return count;
+}
+
+/*
+ * Open the driver and prepare for reading doorbells.
+ *
+ * Every time an application opens the driver, we create a doorbell queue
+ * for that file handle.  This queue is used for any incoming doorbells.
+ */
+static int fsl_hv_open(struct inode *inode, struct file *filp)
+{
+	struct doorbell_queue *dbq;
+	unsigned long flags;
+	int ret = 0;
+
+	dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL);
+	if (!dbq) {
+		pr_err("fsl-hv: out of memory\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&dbq->lock);
+	init_waitqueue_head(&dbq->wait);
+
+	spin_lock_irqsave(&db_list_lock, flags);
+	list_add(&dbq->list, &db_list);
+	spin_unlock_irqrestore(&db_list_lock, flags);
+
+	filp->private_data = dbq;
+
+	return ret;
+}
+
+/*
+ * Close the driver
+ */
+static int fsl_hv_close(struct inode *inode, struct file *filp)
+{
+	struct doorbell_queue *dbq = filp->private_data;
+	unsigned long flags;
+
+	int ret = 0;
+
+	spin_lock_irqsave(&db_list_lock, flags);
+	list_del(&dbq->list);
+	spin_unlock_irqrestore(&db_list_lock, flags);
+
+	kfree(dbq);
+
+	return ret;
+}
+
+static const struct file_operations fsl_hv_fops = {
+	.owner = THIS_MODULE,
+	.open = fsl_hv_open,
+	.release = fsl_hv_close,
+	.poll = fsl_hv_poll,
+	.read = fsl_hv_read,
+	.unlocked_ioctl = fsl_hv_ioctl,
+};
+
+static struct miscdevice fsl_hv_misc_dev = {
+	MISC_DYNAMIC_MINOR,
+	"fsl-hv",
+	&fsl_hv_fops
+};
+
+static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data)
+{
+	orderly_poweroff(false);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Returns the handle of the parent of the given node
+ *
+ * The handle is the value of the 'hv-handle' property
+ */
+static int get_parent_handle(struct device_node *np)
+{
+	struct device_node *parent;
+	const uint32_t *prop;
+	uint32_t handle;
+	int len;
+
+	parent = of_get_parent(np);
+	if (!parent)
+		/* It's not really possible for this to fail */
+		return -ENODEV;
+
+	/*
+	 * The proper name for the handle property is "hv-handle", but some
+	 * older versions of the hypervisor used "reg".
+	 */
+	prop = of_get_property(parent, "hv-handle", &len);
+	if (!prop)
+		prop = of_get_property(parent, "reg", &len);
+
+	if (!prop || (len != sizeof(uint32_t))) {
+		/* This can happen only if the node is malformed */
+		of_node_put(parent);
+		return -ENODEV;
+	}
+
+	handle = be32_to_cpup(prop);
+	of_node_put(parent);
+
+	return handle;
+}
+
+/*
+ * Register a callback for failover events
+ *
+ * This function is called by device drivers to register their callback
+ * functions for fail-over events.
+ */
+int fsl_hv_failover_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&failover_subscribers, nb);
+}
+EXPORT_SYMBOL(fsl_hv_failover_register);
+
+/*
+ * Unregister a callback for failover events
+ */
+int fsl_hv_failover_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&failover_subscribers, nb);
+}
+EXPORT_SYMBOL(fsl_hv_failover_unregister);
+
+/*
+ * Return TRUE if we're running under FSL hypervisor
+ *
+ * This function checks to see if we're running under the Freescale
+ * hypervisor, and returns zero if we're not, or non-zero if we are.
+ *
+ * First, it checks if MSR[GS]==1, which means we're running under some
+ * hypervisor.  Then it checks if there is a hypervisor node in the device
+ * tree.  Currently, that means there needs to be a node in the root called
+ * "hypervisor" and which has a property named "fsl,hv-version".
+ */
+static int has_fsl_hypervisor(void)
+{
+	struct device_node *node;
+	int ret;
+
+	if (!(mfmsr() & MSR_GS))
+		return 0;
+
+	node = of_find_node_by_path("/hypervisor");
+	if (!node)
+		return 0;
+
+	ret = of_find_property(node, "fsl,hv-version", NULL) != NULL;
+
+	of_node_put(node);
+
+	return ret;
+}
+
+/*
+ * Freescale hypervisor management driver init
+ *
+ * This function is called when this module is loaded.
+ *
+ * Register ourselves as a miscellaneous driver.  This will register the
+ * fops structure and create the right sysfs entries for udev.
+ */
+static int __init fsl_hypervisor_init(void)
+{
+	struct device_node *np;
+	struct doorbell_isr *dbisr, *n;
+	int ret;
+
+	pr_info("Freescale hypervisor management driver\n");
+
+	if (!has_fsl_hypervisor()) {
+		pr_info("fsl-hv: no hypervisor found\n");
+		return -ENODEV;
+	}
+
+	ret = misc_register(&fsl_hv_misc_dev);
+	if (ret) {
+		pr_err("fsl-hv: cannot register device\n");
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&db_list);
+	INIT_LIST_HEAD(&isr_list);
+
+	for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") {
+		unsigned int irq;
+		const uint32_t *handle;
+
+		handle = of_get_property(np, "interrupts", NULL);
+		irq = irq_of_parse_and_map(np, 0);
+		if (!handle || (irq == NO_IRQ)) {
+			pr_err("fsl-hv: no 'interrupts' property in %s node\n",
+				np->full_name);
+			continue;
+		}
+
+		dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL);
+		if (!dbisr)
+			goto out_of_memory;
+
+		dbisr->irq = irq;
+		dbisr->doorbell = be32_to_cpup(handle);
+
+		if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) {
+			/* The shutdown doorbell gets its own ISR */
+			ret = request_irq(irq, fsl_hv_shutdown_isr, 0,
+					  np->name, NULL);
+		} else if (of_device_is_compatible(np,
+			"fsl,hv-state-change-doorbell")) {
+			/*
+			 * The state change doorbell triggers a notification if
+			 * the state of the managed partition changes to
+			 * "stopped". We need a separate interrupt handler for
+			 * that, and we also need to know the handle of the
+			 * target partition, not just the handle of the
+			 * doorbell.
+			 */
+			dbisr->partition = ret = get_parent_handle(np);
+			if (ret < 0) {
+				pr_err("fsl-hv: node %s has missing or "
+				       "malformed parent\n", np->full_name);
+				kfree(dbisr);
+				continue;
+			}
+			ret = request_threaded_irq(irq, fsl_hv_state_change_isr,
+						   fsl_hv_state_change_thread,
+						   0, np->name, dbisr);
+		} else
+			ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr);
+
+		if (ret < 0) {
+			pr_err("fsl-hv: could not request irq %u for node %s\n",
+			       irq, np->full_name);
+			kfree(dbisr);
+			continue;
+		}
+
+		list_add(&dbisr->list, &isr_list);
+
+		pr_info("fsl-hv: registered handler for doorbell %u\n",
+			dbisr->doorbell);
+	}
+
+	return 0;
+
+out_of_memory:
+	list_for_each_entry_safe(dbisr, n, &isr_list, list) {
+		free_irq(dbisr->irq, dbisr);
+		list_del(&dbisr->list);
+		kfree(dbisr);
+	}
+
+	misc_deregister(&fsl_hv_misc_dev);
+
+	return -ENOMEM;
+}
+
+/*
+ * Freescale hypervisor management driver termination
+ *
+ * This function is called when this driver is unloaded.
+ */
+static void __exit fsl_hypervisor_exit(void)
+{
+	struct doorbell_isr *dbisr, *n;
+
+	list_for_each_entry_safe(dbisr, n, &isr_list, list) {
+		free_irq(dbisr->irq, dbisr);
+		list_del(&dbisr->list);
+		kfree(dbisr);
+	}
+
+	misc_deregister(&fsl_hv_misc_dev);
+}
+
+module_init(fsl_hypervisor_init);
+module_exit(fsl_hypervisor_exit);
+
+MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
+MODULE_DESCRIPTION("Freescale hypervisor management driver");
+MODULE_LICENSE("GPL v2");