Merge remote branch 'origin' into secretlab/next-spi
diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi
new file mode 100644
index 0000000..6325f5b
--- /dev/null
+++ b/Documentation/spi/ep93xx_spi
@@ -0,0 +1,95 @@
+Cirrus EP93xx SPI controller driver HOWTO
+=========================================
+
+ep93xx_spi driver brings SPI master support for EP93xx SPI controller.  Chip
+selects are implemented with GPIO lines.
+
+NOTE: If possible, don't use SFRMOUT (SFRM1) signal as a chip select. It will
+not work correctly (it cannot be controlled by software). Use GPIO lines
+instead.
+
+Sample configuration
+====================
+
+Typically driver configuration is done in platform board files (the files under
+arch/arm/mach-ep93xx/*.c). In this example we configure MMC over SPI through
+this driver on TS-7260 board. You can adapt the code to suit your needs.
+
+This example uses EGPIO9 as SD/MMC card chip select (this is wired in DIO1
+header on the board).
+
+You need to select CONFIG_MMC_SPI to use mmc_spi driver.
+
+arch/arm/mach-ep93xx/ts72xx.c:
+
+...
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+
+#include <mach/ep93xx_spi.h>
+
+/* this is our GPIO line used for chip select */
+#define MMC_CHIP_SELECT_GPIO EP93XX_GPIO_LINE_EGPIO9
+
+static int ts72xx_mmc_spi_setup(struct spi_device *spi)
+{
+	int err;
+
+	err = gpio_request(MMC_CHIP_SELECT_GPIO, spi->modalias);
+	if (err)
+		return err;
+
+	gpio_direction_output(MMC_CHIP_SELECT_GPIO, 1);
+
+	return 0;
+}
+
+static void ts72xx_mmc_spi_cleanup(struct spi_device *spi)
+{
+	gpio_set_value(MMC_CHIP_SELECT_GPIO, 1);
+	gpio_direction_input(MMC_CHIP_SELECT_GPIO);
+	gpio_free(MMC_CHIP_SELECT_GPIO);
+}
+
+static void ts72xx_mmc_spi_cs_control(struct spi_device *spi, int value)
+{
+	gpio_set_value(MMC_CHIP_SELECT_GPIO, value);
+}
+
+static struct ep93xx_spi_chip_ops ts72xx_mmc_spi_ops = {
+	.setup		= ts72xx_mmc_spi_setup,
+	.cleanup	= ts72xx_mmc_spi_cleanup,
+	.cs_control	= ts72xx_mmc_spi_cs_control,
+};
+
+static struct spi_board_info ts72xx_spi_devices[] __initdata = {
+	{
+		.modalias		= "mmc_spi",
+		.controller_data	= &ts72xx_mmc_spi_ops,
+		/*
+		 * We use 10 MHz even though the maximum is 7.4 MHz. The driver
+		 * will limit it automatically to max. frequency.
+		 */
+		.max_speed_hz		= 10 * 1000 * 1000,
+		.bus_num		= 0,
+		.chip_select		= 0,
+		.mode			= SPI_MODE_0,
+	},
+};
+
+static struct ep93xx_spi_info ts72xx_spi_info = {
+	.num_chipselect	= ARRAY_SIZE(ts72xx_spi_devices),
+};
+
+static void __init ts72xx_init_machine(void)
+{
+	...
+	ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices,
+			    ARRAY_SIZE(ts72xx_spi_devices));
+}
+
+Thanks to
+=========
+Martin Guy, H. Hartley Sweeten and others who helped me during development of
+the driver. Simplemachines.it donated me a Sim.One board which I used testing
+the driver on EP9307.
diff --git a/Documentation/spi/spidev_fdx.c b/Documentation/spi/spidev_fdx.c
index fc354f7..36ec077 100644
--- a/Documentation/spi/spidev_fdx.c
+++ b/Documentation/spi/spidev_fdx.c
@@ -58,10 +58,10 @@
 		len = sizeof buf;
 
 	buf[0] = 0xaa;
-	xfer[0].tx_buf = (__u64) buf;
+	xfer[0].tx_buf = (unsigned long)buf;
 	xfer[0].len = 1;
 
-	xfer[1].rx_buf = (__u64) buf;
+	xfer[1].rx_buf = (unsigned long) buf;
 	xfer[1].len = len;
 
 	status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h
new file mode 100644
index 0000000..0a37961
--- /dev/null
+++ b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_MACH_EP93XX_SPI_H
+#define __ASM_MACH_EP93XX_SPI_H
+
+struct spi_device;
+
+/**
+ * struct ep93xx_spi_info - EP93xx specific SPI descriptor
+ * @num_chipselect: number of chip selects on this board, must be
+ *                  at least one
+ */
+struct ep93xx_spi_info {
+	int	num_chipselect;
+};
+
+/**
+ * struct ep93xx_spi_chip_ops - operation callbacks for SPI slave device
+ * @setup: setup the chip select mechanism
+ * @cleanup: cleanup the chip select mechanism
+ * @cs_control: control the device chip select
+ */
+struct ep93xx_spi_chip_ops {
+	int	(*setup)(struct spi_device *spi);
+	void	(*cleanup)(struct spi_device *spi);
+	void	(*cs_control)(struct spi_device *spi, int value);
+};
+
+#endif /* __ASM_MACH_EP93XX_SPI_H */
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index 42561f4..ecc4fc6 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -248,6 +248,7 @@
 	u16		tflwfptr;	/* PSC + 0x9e */
 };
 
+#define MPC512x_PSC_FIFO_EOF		0x100
 #define MPC512x_PSC_FIFO_RESET_SLICE	0x80
 #define MPC512x_PSC_FIFO_ENABLE_SLICE	0x01
 #define MPC512x_PSC_FIFO_ENABLE_DMA	0x04
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index b7f518a..707e572 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -22,6 +22,7 @@
 #include <asm/prom.h>
 #include <asm/time.h>
 #include <asm/mpc5121.h>
+#include <asm/mpc52xx_psc.h>
 
 #include "mpc512x.h"
 
@@ -95,9 +96,86 @@
 	}
 }
 
+#define DEFAULT_FIFO_SIZE 16
+
+static unsigned int __init get_fifo_size(struct device_node *np,
+					 char *prop_name)
+{
+	const unsigned int *fp;
+
+	fp = of_get_property(np, prop_name, NULL);
+	if (fp)
+		return *fp;
+
+	pr_warning("no %s property in %s node, defaulting to %d\n",
+		   prop_name, np->full_name, DEFAULT_FIFO_SIZE);
+
+	return DEFAULT_FIFO_SIZE;
+}
+
+#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \
+		    ((u32)(_base) + sizeof(struct mpc52xx_psc)))
+
+/* Init PSC FIFO space for TX and RX slices */
+void __init mpc512x_psc_fifo_init(void)
+{
+	struct device_node *np;
+	void __iomem *psc;
+	unsigned int tx_fifo_size;
+	unsigned int rx_fifo_size;
+	int fifobase = 0; /* current fifo address in 32 bit words */
+
+	for_each_compatible_node(np, NULL, "fsl,mpc5121-psc") {
+		tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size");
+		rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size");
+
+		/* size in register is in 4 byte units */
+		tx_fifo_size /= 4;
+		rx_fifo_size /= 4;
+		if (!tx_fifo_size)
+			tx_fifo_size = 1;
+		if (!rx_fifo_size)
+			rx_fifo_size = 1;
+
+		psc = of_iomap(np, 0);
+		if (!psc) {
+			pr_err("%s: Can't map %s device\n",
+				__func__, np->full_name);
+			continue;
+		}
+
+		/* FIFO space is 4KiB, check if requested size is available */
+		if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
+			pr_err("%s: no fifo space available for %s\n",
+				__func__, np->full_name);
+			iounmap(psc);
+			/*
+			 * chances are that another device requests less
+			 * fifo space, so we continue.
+			 */
+			continue;
+		}
+
+		/* set tx and rx fifo size registers */
+		out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size);
+		fifobase += tx_fifo_size;
+		out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size);
+		fifobase += rx_fifo_size;
+
+		/* reset and enable the slices */
+		out_be32(&FIFOC(psc)->txcmd, 0x80);
+		out_be32(&FIFOC(psc)->txcmd, 0x01);
+		out_be32(&FIFOC(psc)->rxcmd, 0x80);
+		out_be32(&FIFOC(psc)->rxcmd, 0x01);
+
+		iounmap(psc);
+	}
+}
+
 void __init mpc512x_init(void)
 {
 	mpc512x_declare_of_platform_devices();
 	mpc5121_clk_init();
 	mpc512x_restart_init();
+	mpc512x_psc_fifo_init();
 }
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index beb4710f..84a35f6 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -397,34 +397,10 @@
 	return mpc5xxx_get_bus_frequency(p);
 }
 
-#define DEFAULT_FIFO_SIZE 16
-
-static unsigned int __init get_fifo_size(struct device_node *np,
-					 char *fifo_name)
-{
-	const unsigned int *fp;
-
-	fp = of_get_property(np, fifo_name, NULL);
-	if (fp)
-		return *fp;
-
-	pr_warning("no %s property in %s node, defaulting to %d\n",
-		   fifo_name, np->full_name, DEFAULT_FIFO_SIZE);
-
-	return DEFAULT_FIFO_SIZE;
-}
-
-#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \
-		    ((u32)(_base) + sizeof(struct mpc52xx_psc)))
-
 /* Init PSC FIFO Controller */
 static int __init mpc512x_psc_fifoc_init(void)
 {
 	struct device_node *np;
-	void __iomem *psc;
-	unsigned int tx_fifo_size;
-	unsigned int rx_fifo_size;
-	int fifobase = 0; /* current fifo address in 32 bit words */
 
 	np = of_find_compatible_node(NULL, NULL,
 				     "fsl,mpc5121-psc-fifo");
@@ -447,51 +423,6 @@
 		return -ENODEV;
 	}
 
-	for_each_compatible_node(np, NULL, "fsl,mpc5121-psc-uart") {
-		tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size");
-		rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size");
-
-		/* size in register is in 4 byte units */
-		tx_fifo_size /= 4;
-		rx_fifo_size /= 4;
-		if (!tx_fifo_size)
-			tx_fifo_size = 1;
-		if (!rx_fifo_size)
-			rx_fifo_size = 1;
-
-		psc = of_iomap(np, 0);
-		if (!psc) {
-			pr_err("%s: Can't map %s device\n",
-				__func__, np->full_name);
-			continue;
-		}
-
-		/* FIFO space is 4KiB, check if requested size is available */
-		if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
-			pr_err("%s: no fifo space available for %s\n",
-				__func__, np->full_name);
-			iounmap(psc);
-			/*
-			 * chances are that another device requests less
-			 * fifo space, so we continue.
-			 */
-			continue;
-		}
-		/* set tx and rx fifo size registers */
-		out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size);
-		fifobase += tx_fifo_size;
-		out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size);
-		fifobase += rx_fifo_size;
-
-		/* reset and enable the slices */
-		out_be32(&FIFOC(psc)->txcmd, 0x80);
-		out_be32(&FIFOC(psc)->txcmd, 0x01);
-		out_be32(&FIFOC(psc)->rxcmd, 0x80);
-		out_be32(&FIFOC(psc)->rxcmd, 0x01);
-
-		iounmap(psc);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f950b63..91c2f4f 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -117,6 +117,16 @@
 	help
 	  SPI master controller for DaVinci and DA8xx SPI modules.
 
+config SPI_EP93XX
+	tristate "Cirrus Logic EP93xx SPI controller"
+	depends on ARCH_EP93XX
+	help
+	  This enables using the Cirrus EP93xx SPI controller in master
+	  mode.
+
+	  To compile this driver as a module, choose M here. The module will be
+	  called ep93xx_spi.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GENERIC_GPIO
@@ -165,6 +175,13 @@
 	  This enables using the Freescale MPC52xx Programmable Serial
 	  Controller in master SPI mode.
 
+config SPI_MPC512x_PSC
+	tristate "Freescale MPC512x PSC SPI controller"
+	depends on SPI_MASTER && PPC_MPC512x
+	help
+	  This enables using the Freescale MPC5121 Programmable Serial
+	  Controller in SPI master mode.
+
 config SPI_MPC8xxx
 	tristate "Freescale MPC8xxx SPI controller"
 	depends on FSL_SOC
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index d7d0f89..e9cbd18 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -21,6 +21,7 @@
 obj-$(CONFIG_SPI_DESIGNWARE)		+= dw_spi.o
 obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_pci.o
 obj-$(CONFIG_SPI_DW_MMIO)		+= dw_spi_mmio.o
+obj-$(CONFIG_SPI_EP93XX)		+= ep93xx_spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
@@ -30,6 +31,7 @@
 obj-$(CONFIG_SPI_OMAP_100K)		+= omap_spi_100k.o
 obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
+obj-$(CONFIG_SPI_MPC512x_PSC)		+= mpc512x_psc_spi.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC52xx)		+= mpc52xx_spi.o
 obj-$(CONFIG_SPI_MPC8xxx)		+= spi_mpc8xxx.o
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index e9aeee1..f0a1418 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -102,13 +102,21 @@
 /*
  * SSP Control Register 0  - SSP_CR0
  */
-#define SSP_CR0_MASK_DSS	(0x1FUL << 0)
-#define SSP_CR0_MASK_HALFDUP	(0x1UL << 5)
+#define SSP_CR0_MASK_DSS	(0x0FUL << 0)
+#define SSP_CR0_MASK_FRF	(0x3UL << 4)
 #define SSP_CR0_MASK_SPO	(0x1UL << 6)
 #define SSP_CR0_MASK_SPH	(0x1UL << 7)
 #define SSP_CR0_MASK_SCR	(0xFFUL << 8)
-#define SSP_CR0_MASK_CSS	(0x1FUL << 16)
-#define SSP_CR0_MASK_FRF	(0x3UL << 21)
+
+/*
+ * The ST version of this block moves som bits
+ * in SSP_CR0 and extends it to 32 bits
+ */
+#define SSP_CR0_MASK_DSS_ST	(0x1FUL << 0)
+#define SSP_CR0_MASK_HALFDUP_ST	(0x1UL << 5)
+#define SSP_CR0_MASK_CSS_ST	(0x1FUL << 16)
+#define SSP_CR0_MASK_FRF_ST	(0x3UL << 21)
+
 
 /*
  * SSP Control Register 0  - SSP_CR1
@@ -117,16 +125,18 @@
 #define SSP_CR1_MASK_SSE	(0x1UL << 1)
 #define SSP_CR1_MASK_MS		(0x1UL << 2)
 #define SSP_CR1_MASK_SOD	(0x1UL << 3)
-#define SSP_CR1_MASK_RENDN	(0x1UL << 4)
-#define SSP_CR1_MASK_TENDN	(0x1UL << 5)
-#define SSP_CR1_MASK_MWAIT	(0x1UL << 6)
-#define SSP_CR1_MASK_RXIFLSEL	(0x7UL << 7)
-#define SSP_CR1_MASK_TXIFLSEL	(0x7UL << 10)
 
 /*
- * SSP Data Register - SSP_DR
+ * The ST version of this block adds some bits
+ * in SSP_CR1
  */
-#define SSP_DR_MASK_DATA	0xFFFFFFFF
+#define SSP_CR1_MASK_RENDN_ST	(0x1UL << 4)
+#define SSP_CR1_MASK_TENDN_ST	(0x1UL << 5)
+#define SSP_CR1_MASK_MWAIT_ST	(0x1UL << 6)
+#define SSP_CR1_MASK_RXIFLSEL_ST (0x7UL << 7)
+#define SSP_CR1_MASK_TXIFLSEL_ST (0x7UL << 10)
+/* This one is only in the PL023 variant */
+#define SSP_CR1_MASK_FBCLKDEL_ST (0x7UL << 13)
 
 /*
  * SSP Status Register - SSP_SR
@@ -134,7 +144,7 @@
 #define SSP_SR_MASK_TFE		(0x1UL << 0) /* Transmit FIFO empty */
 #define SSP_SR_MASK_TNF		(0x1UL << 1) /* Transmit FIFO not full */
 #define SSP_SR_MASK_RNE		(0x1UL << 2) /* Receive FIFO not empty */
-#define SSP_SR_MASK_RFF 	(0x1UL << 3) /* Receive FIFO full */
+#define SSP_SR_MASK_RFF		(0x1UL << 3) /* Receive FIFO full */
 #define SSP_SR_MASK_BSY		(0x1UL << 4) /* Busy Flag */
 
 /*
@@ -227,7 +237,7 @@
 /*
  * SSP Test Data Register - SSP_TDR
  */
-#define TDR_MASK_TESTDATA 		(0xFFFFFFFF)
+#define TDR_MASK_TESTDATA		(0xFFFFFFFF)
 
 /*
  * Message State
@@ -235,33 +245,33 @@
  * hold a single state value, that's why all this
  * (void *) casting is done here.
  */
-#define STATE_START                     ((void *) 0)
-#define STATE_RUNNING                   ((void *) 1)
-#define STATE_DONE                      ((void *) 2)
-#define STATE_ERROR                     ((void *) -1)
+#define STATE_START			((void *) 0)
+#define STATE_RUNNING			((void *) 1)
+#define STATE_DONE			((void *) 2)
+#define STATE_ERROR			((void *) -1)
 
 /*
  * Queue State
  */
-#define QUEUE_RUNNING                   (0)
-#define QUEUE_STOPPED                   (1)
+#define QUEUE_RUNNING			(0)
+#define QUEUE_STOPPED			(1)
 /*
  * SSP State - Whether Enabled or Disabled
  */
-#define SSP_DISABLED 			(0)
-#define SSP_ENABLED 			(1)
+#define SSP_DISABLED			(0)
+#define SSP_ENABLED			(1)
 
 /*
  * SSP DMA State - Whether DMA Enabled or Disabled
  */
-#define SSP_DMA_DISABLED 		(0)
-#define SSP_DMA_ENABLED 		(1)
+#define SSP_DMA_DISABLED		(0)
+#define SSP_DMA_ENABLED			(1)
 
 /*
  * SSP Clock Defaults
  */
-#define NMDK_SSP_DEFAULT_CLKRATE 0x2
-#define NMDK_SSP_DEFAULT_PRESCALE 0x40
+#define SSP_DEFAULT_CLKRATE 0x2
+#define SSP_DEFAULT_PRESCALE 0x40
 
 /*
  * SSP Clock Parameter ranges
@@ -307,16 +317,22 @@
  * @fifodepth: depth of FIFOs (both)
  * @max_bpw: maximum number of bits per word
  * @unidir: supports unidirection transfers
+ * @extended_cr: 32 bit wide control register 0 with extra
+ * features and extra features in CR1 as found in the ST variants
+ * @pl023: supports a subset of the ST extensions called "PL023"
  */
 struct vendor_data {
 	int fifodepth;
 	int max_bpw;
 	bool unidir;
+	bool extended_cr;
+	bool pl023;
 };
 
 /**
  * struct pl022 - This is the private SSP driver data structure
  * @adev: AMBA device model hookup
+ * @vendor: Vendor data for the IP block
  * @phybase: The physical memory where the SSP device resides
  * @virtbase: The virtual memory where the SSP is mapped
  * @master: SPI framework hookup
@@ -369,7 +385,8 @@
 
 /**
  * struct chip_data - To maintain runtime state of SSP for each client chip
- * @cr0: Value of control register CR0 of SSP
+ * @cr0: Value of control register CR0 of SSP - on later ST variants this
+ *       register is 32 bits wide rather than just 16
  * @cr1: Value of control register CR1 of SSP
  * @dmacr: Value of DMA control Register of SSP
  * @cpsr: Value of Clock prescale register
@@ -384,7 +401,7 @@
  * This would be set according to the current message that would be served
  */
 struct chip_data {
-	u16 cr0;
+	u32 cr0;
 	u16 cr1;
 	u16 dmacr;
 	u16 cpsr;
@@ -517,7 +534,10 @@
 {
 	struct chip_data *chip = pl022->cur_chip;
 
-	writew(chip->cr0, SSP_CR0(pl022->virtbase));
+	if (pl022->vendor->extended_cr)
+		writel(chip->cr0, SSP_CR0(pl022->virtbase));
+	else
+		writew(chip->cr0, SSP_CR0(pl022->virtbase));
 	writew(chip->cr1, SSP_CR1(pl022->virtbase));
 	writew(chip->dmacr, SSP_DMACR(pl022->virtbase));
 	writew(chip->cpsr, SSP_CPSR(pl022->virtbase));
@@ -525,38 +545,70 @@
 	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
 }
 
-/**
- * load_ssp_default_config - Load default configuration for SSP
- * @pl022: SSP driver private data structure
- */
-
 /*
  * Default SSP Register Values
  */
 #define DEFAULT_SSP_REG_CR0 ( \
 	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)	| \
-	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
+	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 4) | \
 	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
 	GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
-	GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
-	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16)	| \
-	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
+	GEN_MASK_BITS(SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) \
+)
+
+/* ST versions have slightly different bit layout */
+#define DEFAULT_SSP_REG_CR0_ST ( \
+	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS_ST, 0)	| \
+	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP_ST, 5) | \
+	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
+	GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
+	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS_ST, 16)	| \
+	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF_ST, 21) \
+)
+
+/* The PL023 version is slightly different again */
+#define DEFAULT_SSP_REG_CR0_ST_PL023 ( \
+	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS_ST, 0)	| \
+	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
+	GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) \
 )
 
 #define DEFAULT_SSP_REG_CR1 ( \
 	GEN_MASK_BITS(LOOPBACK_DISABLED, SSP_CR1_MASK_LBM, 0) | \
 	GEN_MASK_BITS(SSP_DISABLED, SSP_CR1_MASK_SSE, 1) | \
 	GEN_MASK_BITS(SSP_MASTER, SSP_CR1_MASK_MS, 2) | \
+	GEN_MASK_BITS(DO_NOT_DRIVE_TX, SSP_CR1_MASK_SOD, 3) \
+)
+
+/* ST versions extend this register to use all 16 bits */
+#define DEFAULT_SSP_REG_CR1_ST ( \
+	DEFAULT_SSP_REG_CR1 | \
+	GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN_ST, 4) | \
+	GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN_ST, 5) | \
+	GEN_MASK_BITS(SSP_MWIRE_WAIT_ZERO, SSP_CR1_MASK_MWAIT_ST, 6) |\
+	GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL_ST, 7) | \
+	GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL_ST, 10) \
+)
+
+/*
+ * The PL023 variant has further differences: no loopback mode, no microwire
+ * support, and a new clock feedback delay setting.
+ */
+#define DEFAULT_SSP_REG_CR1_ST_PL023 ( \
+	GEN_MASK_BITS(SSP_DISABLED, SSP_CR1_MASK_SSE, 1) | \
+	GEN_MASK_BITS(SSP_MASTER, SSP_CR1_MASK_MS, 2) | \
 	GEN_MASK_BITS(DO_NOT_DRIVE_TX, SSP_CR1_MASK_SOD, 3) | \
-	GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN, 4) | \
-	GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN, 5) | \
-	GEN_MASK_BITS(SSP_MWIRE_WAIT_ZERO, SSP_CR1_MASK_MWAIT, 6) |\
-	GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL, 7) | \
-	GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL, 10) \
+	GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN_ST, 4) | \
+	GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN_ST, 5) | \
+	GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL_ST, 7) | \
+	GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL_ST, 10) | \
+	GEN_MASK_BITS(SSP_FEEDBACK_CLK_DELAY_NONE, SSP_CR1_MASK_FBCLKDEL_ST, 13) \
 )
 
 #define DEFAULT_SSP_REG_CPSR ( \
-	GEN_MASK_BITS(NMDK_SSP_DEFAULT_PRESCALE, SSP_CPSR_MASK_CPSDVSR, 0) \
+	GEN_MASK_BITS(SSP_DEFAULT_PRESCALE, SSP_CPSR_MASK_CPSDVSR, 0) \
 )
 
 #define DEFAULT_SSP_REG_DMACR (\
@@ -564,11 +616,22 @@
 	GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_TXDMAE, 1) \
 )
 
-
+/**
+ * load_ssp_default_config - Load default configuration for SSP
+ * @pl022: SSP driver private data structure
+ */
 static void load_ssp_default_config(struct pl022 *pl022)
 {
-	writew(DEFAULT_SSP_REG_CR0, SSP_CR0(pl022->virtbase));
-	writew(DEFAULT_SSP_REG_CR1, SSP_CR1(pl022->virtbase));
+	if (pl022->vendor->pl023) {
+		writel(DEFAULT_SSP_REG_CR0_ST_PL023, SSP_CR0(pl022->virtbase));
+		writew(DEFAULT_SSP_REG_CR1_ST_PL023, SSP_CR1(pl022->virtbase));
+	} else if (pl022->vendor->extended_cr) {
+		writel(DEFAULT_SSP_REG_CR0_ST, SSP_CR0(pl022->virtbase));
+		writew(DEFAULT_SSP_REG_CR1_ST, SSP_CR1(pl022->virtbase));
+	} else {
+		writew(DEFAULT_SSP_REG_CR0, SSP_CR0(pl022->virtbase));
+		writew(DEFAULT_SSP_REG_CR1, SSP_CR1(pl022->virtbase));
+	}
 	writew(DEFAULT_SSP_REG_DMACR, SSP_DMACR(pl022->virtbase));
 	writew(DEFAULT_SSP_REG_CPSR, SSP_CPSR(pl022->virtbase));
 	writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
@@ -1008,7 +1071,7 @@
 		writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
 		       SSP_CR1(pl022->virtbase));
 
-		dev_dbg(&pl022->adev->dev, "POLLING TRANSFER ONGOING ... \n");
+		dev_dbg(&pl022->adev->dev, "polling transfer ongoing ...\n");
 		/* FIXME: insert a timeout so we don't hang here indefinately */
 		while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end)
 			readwriter(pl022);
@@ -1148,7 +1211,6 @@
 	 * A wait_queue on the pl022->busy could be used, but then the common
 	 * execution path (pump_messages) would be required to call wake_up or
 	 * friends on every SPI message. Do this instead */
-	pl022->run = QUEUE_STOPPED;
 	while (!list_empty(&pl022->queue) && pl022->busy && limit--) {
 		spin_unlock_irqrestore(&pl022->queue_lock, flags);
 		msleep(10);
@@ -1157,6 +1219,7 @@
 
 	if (!list_empty(&pl022->queue) || pl022->busy)
 		status = -EBUSY;
+	else pl022->run = QUEUE_STOPPED;
 
 	spin_unlock_irqrestore(&pl022->queue_lock, flags);
 
@@ -1280,11 +1343,21 @@
 				"Wait State is configured incorrectly\n");
 			return -EINVAL;
 		}
-		if ((chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
-		    && (chip_info->duplex !=
-			SSP_MICROWIRE_CHANNEL_HALF_DUPLEX)) {
-			dev_err(chip_info->dev,
-				"DUPLEX is configured incorrectly\n");
+		/* Half duplex is only available in the ST Micro version */
+		if (pl022->vendor->extended_cr) {
+			if ((chip_info->duplex !=
+			     SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+			    && (chip_info->duplex !=
+				SSP_MICROWIRE_CHANNEL_HALF_DUPLEX))
+				dev_err(chip_info->dev,
+					"Microwire duplex mode is configured incorrectly\n");
+				return -EINVAL;
+		} else {
+			if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+				dev_err(chip_info->dev,
+					"Microwire half duplex mode requested,"
+					" but this is only available in the"
+					" ST version of PL022\n");
 			return -EINVAL;
 		}
 	}
@@ -1581,22 +1654,49 @@
 
 	chip->cpsr = chip_info->clk_freq.cpsdvsr;
 
-	SSP_WRITE_BITS(chip->cr0, chip_info->data_size, SSP_CR0_MASK_DSS, 0);
-	SSP_WRITE_BITS(chip->cr0, chip_info->duplex, SSP_CR0_MASK_HALFDUP, 5);
+	/* Special setup for the ST micro extended control registers */
+	if (pl022->vendor->extended_cr) {
+		if (pl022->vendor->pl023) {
+			/* These bits are only in the PL023 */
+			SSP_WRITE_BITS(chip->cr1, chip_info->clkdelay,
+				       SSP_CR1_MASK_FBCLKDEL_ST, 13);
+		} else {
+			/* These bits are in the PL022 but not PL023 */
+			SSP_WRITE_BITS(chip->cr0, chip_info->duplex,
+				       SSP_CR0_MASK_HALFDUP_ST, 5);
+			SSP_WRITE_BITS(chip->cr0, chip_info->ctrl_len,
+				       SSP_CR0_MASK_CSS_ST, 16);
+			SSP_WRITE_BITS(chip->cr0, chip_info->iface,
+				       SSP_CR0_MASK_FRF_ST, 21);
+			SSP_WRITE_BITS(chip->cr1, chip_info->wait_state,
+				       SSP_CR1_MASK_MWAIT_ST, 6);
+		}
+		SSP_WRITE_BITS(chip->cr0, chip_info->data_size,
+			       SSP_CR0_MASK_DSS_ST, 0);
+		SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx,
+			       SSP_CR1_MASK_RENDN_ST, 4);
+		SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx,
+			       SSP_CR1_MASK_TENDN_ST, 5);
+		SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig,
+			       SSP_CR1_MASK_RXIFLSEL_ST, 7);
+		SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig,
+			       SSP_CR1_MASK_TXIFLSEL_ST, 10);
+	} else {
+		SSP_WRITE_BITS(chip->cr0, chip_info->data_size,
+			       SSP_CR0_MASK_DSS, 0);
+		SSP_WRITE_BITS(chip->cr0, chip_info->iface,
+			       SSP_CR0_MASK_FRF, 4);
+	}
+	/* Stuff that is common for all versions */
 	SSP_WRITE_BITS(chip->cr0, chip_info->clk_pol, SSP_CR0_MASK_SPO, 6);
 	SSP_WRITE_BITS(chip->cr0, chip_info->clk_phase, SSP_CR0_MASK_SPH, 7);
 	SSP_WRITE_BITS(chip->cr0, chip_info->clk_freq.scr, SSP_CR0_MASK_SCR, 8);
-	SSP_WRITE_BITS(chip->cr0, chip_info->ctrl_len, SSP_CR0_MASK_CSS, 16);
-	SSP_WRITE_BITS(chip->cr0, chip_info->iface, SSP_CR0_MASK_FRF, 21);
-	SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0);
+	/* Loopback is available on all versions except PL023 */
+	if (!pl022->vendor->pl023)
+		SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0);
 	SSP_WRITE_BITS(chip->cr1, SSP_DISABLED, SSP_CR1_MASK_SSE, 1);
 	SSP_WRITE_BITS(chip->cr1, chip_info->hierarchy, SSP_CR1_MASK_MS, 2);
 	SSP_WRITE_BITS(chip->cr1, chip_info->slave_tx_disable, SSP_CR1_MASK_SOD, 3);
-	SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx, SSP_CR1_MASK_RENDN, 4);
-	SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx, SSP_CR1_MASK_TENDN, 5);
-	SSP_WRITE_BITS(chip->cr1, chip_info->wait_state, SSP_CR1_MASK_MWAIT, 6);
-	SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig, SSP_CR1_MASK_RXIFLSEL, 7);
-	SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig, SSP_CR1_MASK_TXIFLSEL, 10);
 
 	/* Save controller_state */
 	spi_set_ctldata(spi, chip);
@@ -1809,6 +1909,8 @@
 	.fifodepth = 8,
 	.max_bpw = 16,
 	.unidir = false,
+	.extended_cr = false,
+	.pl023 = false,
 };
 
 
@@ -1816,6 +1918,16 @@
 	.fifodepth = 32,
 	.max_bpw = 32,
 	.unidir = false,
+	.extended_cr = true,
+	.pl023 = false,
+};
+
+static struct vendor_data vendor_st_pl023 = {
+	.fifodepth = 32,
+	.max_bpw = 32,
+	.unidir = false,
+	.extended_cr = true,
+	.pl023 = true,
 };
 
 static struct amba_id pl022_ids[] = {
@@ -1837,6 +1949,18 @@
 		.mask	= 0xffffffff,
 		.data	= &vendor_st,
 	},
+	{
+		/*
+		 * ST-Ericsson derivative "PL023" (this is not
+		 * an official ARM number), this is a PL022 SSP block
+		 * stripped to SPI mode only, it has 32bit wide
+		 * and 32 locations deep TX/RX FIFO but no extended
+		 * CR0/CR1 register
+		 */
+		.id     = 0x00080023,
+		.mask   = 0xffffffff,
+		.data   = &vendor_st_pl023,
+	},
 	{ 0, 0 },
 };
 
diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c
index 95afb6b7..b85090c 100644
--- a/drivers/spi/davinci_spi.c
+++ b/drivers/spi/davinci_spi.c
@@ -301,7 +301,7 @@
 	struct davinci_spi *davinci_spi;
 	struct davinci_spi_platform_data *pdata;
 	u8 bits_per_word = 0;
-	u32 hz = 0, prescale;
+	u32 hz = 0, prescale = 0, clkspeed;
 
 	davinci_spi = spi_master_get_devdata(spi->master);
 	pdata = davinci_spi->pdata;
@@ -338,10 +338,16 @@
 	set_fmt_bits(davinci_spi->base, bits_per_word & 0x1f,
 			spi->chip_select);
 
-	prescale = ((clk_get_rate(davinci_spi->clk) / hz) - 1) & 0xff;
+	clkspeed = clk_get_rate(davinci_spi->clk);
+	if (hz > clkspeed / 2)
+		prescale = 1 << 8;
+	if (hz < clkspeed / 256)
+		prescale = 255 << 8;
+	if (!prescale)
+		prescale = ((clkspeed / hz - 1) << 8) & 0x0000ff00;
 
 	clear_fmt_bits(davinci_spi->base, 0x0000ff00, spi->chip_select);
-	set_fmt_bits(davinci_spi->base, prescale << 8, spi->chip_select);
+	set_fmt_bits(davinci_spi->base, prescale, spi->chip_select);
 
 	return 0;
 }
diff --git a/drivers/spi/ep93xx_spi.c b/drivers/spi/ep93xx_spi.c
new file mode 100644
index 0000000..0ba35df
--- /dev/null
+++ b/drivers/spi/ep93xx_spi.c
@@ -0,0 +1,938 @@
+/*
+ * Driver for Cirrus Logic EP93xx SPI controller.
+ *
+ * Copyright (c) 2010 Mika Westerberg
+ *
+ * Explicit FIFO handling code was inspired by amba-pl022 driver.
+ *
+ * Chip select support using other than built-in GPIOs by H. Hartley Sweeten.
+ *
+ * For more information about the SPI controller see documentation on Cirrus
+ * Logic web site:
+ *     http://www.cirrus.com/en/pubs/manual/EP93xx_Users_Guide_UM1.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/spi/spi.h>
+
+#include <mach/ep93xx_spi.h>
+
+#define SSPCR0			0x0000
+#define SSPCR0_MODE_SHIFT	6
+#define SSPCR0_SCR_SHIFT	8
+
+#define SSPCR1			0x0004
+#define SSPCR1_RIE		BIT(0)
+#define SSPCR1_TIE		BIT(1)
+#define SSPCR1_RORIE		BIT(2)
+#define SSPCR1_LBM		BIT(3)
+#define SSPCR1_SSE		BIT(4)
+#define SSPCR1_MS		BIT(5)
+#define SSPCR1_SOD		BIT(6)
+
+#define SSPDR			0x0008
+
+#define SSPSR			0x000c
+#define SSPSR_TFE		BIT(0)
+#define SSPSR_TNF		BIT(1)
+#define SSPSR_RNE		BIT(2)
+#define SSPSR_RFF		BIT(3)
+#define SSPSR_BSY		BIT(4)
+#define SSPCPSR			0x0010
+
+#define SSPIIR			0x0014
+#define SSPIIR_RIS		BIT(0)
+#define SSPIIR_TIS		BIT(1)
+#define SSPIIR_RORIS		BIT(2)
+#define SSPICR			SSPIIR
+
+/* timeout in milliseconds */
+#define SPI_TIMEOUT		5
+/* maximum depth of RX/TX FIFO */
+#define SPI_FIFO_SIZE		8
+
+/**
+ * struct ep93xx_spi - EP93xx SPI controller structure
+ * @lock: spinlock that protects concurrent accesses to fields @running,
+ *        @current_msg and @msg_queue
+ * @pdev: pointer to platform device
+ * @clk: clock for the controller
+ * @regs_base: pointer to ioremap()'d registers
+ * @irq: IRQ number used by the driver
+ * @min_rate: minimum clock rate (in Hz) supported by the controller
+ * @max_rate: maximum clock rate (in Hz) supported by the controller
+ * @running: is the queue running
+ * @wq: workqueue used by the driver
+ * @msg_work: work that is queued for the driver
+ * @wait: wait here until given transfer is completed
+ * @msg_queue: queue for the messages
+ * @current_msg: message that is currently processed (or %NULL if none)
+ * @tx: current byte in transfer to transmit
+ * @rx: current byte in transfer to receive
+ * @fifo_level: how full is FIFO (%0..%SPI_FIFO_SIZE - %1). Receiving one
+ *              frame decreases this level and sending one frame increases it.
+ *
+ * This structure holds EP93xx SPI controller specific information. When
+ * @running is %true, driver accepts transfer requests from protocol drivers.
+ * @current_msg is used to hold pointer to the message that is currently
+ * processed. If @current_msg is %NULL, it means that no processing is going
+ * on.
+ *
+ * Most of the fields are only written once and they can be accessed without
+ * taking the @lock. Fields that are accessed concurrently are: @current_msg,
+ * @running, and @msg_queue.
+ */
+struct ep93xx_spi {
+	spinlock_t			lock;
+	const struct platform_device	*pdev;
+	struct clk			*clk;
+	void __iomem			*regs_base;
+	int				irq;
+	unsigned long			min_rate;
+	unsigned long			max_rate;
+	bool				running;
+	struct workqueue_struct		*wq;
+	struct work_struct		msg_work;
+	struct completion		wait;
+	struct list_head		msg_queue;
+	struct spi_message		*current_msg;
+	size_t				tx;
+	size_t				rx;
+	size_t				fifo_level;
+};
+
+/**
+ * struct ep93xx_spi_chip - SPI device hardware settings
+ * @spi: back pointer to the SPI device
+ * @rate: max rate in hz this chip supports
+ * @div_cpsr: cpsr (pre-scaler) divider
+ * @div_scr: scr divider
+ * @dss: bits per word (4 - 16 bits)
+ * @ops: private chip operations
+ *
+ * This structure is used to store hardware register specific settings for each
+ * SPI device. Settings are written to hardware by function
+ * ep93xx_spi_chip_setup().
+ */
+struct ep93xx_spi_chip {
+	const struct spi_device		*spi;
+	unsigned long			rate;
+	u8				div_cpsr;
+	u8				div_scr;
+	u8				dss;
+	struct ep93xx_spi_chip_ops	*ops;
+};
+
+/* converts bits per word to CR0.DSS value */
+#define bits_per_word_to_dss(bpw)	((bpw) - 1)
+
+static inline void
+ep93xx_spi_write_u8(const struct ep93xx_spi *espi, u16 reg, u8 value)
+{
+	__raw_writeb(value, espi->regs_base + reg);
+}
+
+static inline u8
+ep93xx_spi_read_u8(const struct ep93xx_spi *spi, u16 reg)
+{
+	return __raw_readb(spi->regs_base + reg);
+}
+
+static inline void
+ep93xx_spi_write_u16(const struct ep93xx_spi *espi, u16 reg, u16 value)
+{
+	__raw_writew(value, espi->regs_base + reg);
+}
+
+static inline u16
+ep93xx_spi_read_u16(const struct ep93xx_spi *spi, u16 reg)
+{
+	return __raw_readw(spi->regs_base + reg);
+}
+
+static int ep93xx_spi_enable(const struct ep93xx_spi *espi)
+{
+	u8 regval;
+	int err;
+
+	err = clk_enable(espi->clk);
+	if (err)
+		return err;
+
+	regval = ep93xx_spi_read_u8(espi, SSPCR1);
+	regval |= SSPCR1_SSE;
+	ep93xx_spi_write_u8(espi, SSPCR1, regval);
+
+	return 0;
+}
+
+static void ep93xx_spi_disable(const struct ep93xx_spi *espi)
+{
+	u8 regval;
+
+	regval = ep93xx_spi_read_u8(espi, SSPCR1);
+	regval &= ~SSPCR1_SSE;
+	ep93xx_spi_write_u8(espi, SSPCR1, regval);
+
+	clk_disable(espi->clk);
+}
+
+static void ep93xx_spi_enable_interrupts(const struct ep93xx_spi *espi)
+{
+	u8 regval;
+
+	regval = ep93xx_spi_read_u8(espi, SSPCR1);
+	regval |= (SSPCR1_RORIE | SSPCR1_TIE | SSPCR1_RIE);
+	ep93xx_spi_write_u8(espi, SSPCR1, regval);
+}
+
+static void ep93xx_spi_disable_interrupts(const struct ep93xx_spi *espi)
+{
+	u8 regval;
+
+	regval = ep93xx_spi_read_u8(espi, SSPCR1);
+	regval &= ~(SSPCR1_RORIE | SSPCR1_TIE | SSPCR1_RIE);
+	ep93xx_spi_write_u8(espi, SSPCR1, regval);
+}
+
+/**
+ * ep93xx_spi_calc_divisors() - calculates SPI clock divisors
+ * @espi: ep93xx SPI controller struct
+ * @chip: divisors are calculated for this chip
+ * @rate: desired SPI output clock rate
+ *
+ * Function calculates cpsr (clock pre-scaler) and scr divisors based on
+ * given @rate and places them to @chip->div_cpsr and @chip->div_scr. If,
+ * for some reason, divisors cannot be calculated nothing is stored and
+ * %-EINVAL is returned.
+ */
+static int ep93xx_spi_calc_divisors(const struct ep93xx_spi *espi,
+				    struct ep93xx_spi_chip *chip,
+				    unsigned long rate)
+{
+	unsigned long spi_clk_rate = clk_get_rate(espi->clk);
+	int cpsr, scr;
+
+	/*
+	 * Make sure that max value is between values supported by the
+	 * controller. Note that minimum value is already checked in
+	 * ep93xx_spi_transfer().
+	 */
+	rate = clamp(rate, espi->min_rate, espi->max_rate);
+
+	/*
+	 * Calculate divisors so that we can get speed according the
+	 * following formula:
+	 *	rate = spi_clock_rate / (cpsr * (1 + scr))
+	 *
+	 * cpsr must be even number and starts from 2, scr can be any number
+	 * between 0 and 255.
+	 */
+	for (cpsr = 2; cpsr <= 254; cpsr += 2) {
+		for (scr = 0; scr <= 255; scr++) {
+			if ((spi_clk_rate / (cpsr * (scr + 1))) <= rate) {
+				chip->div_scr = (u8)scr;
+				chip->div_cpsr = (u8)cpsr;
+				return 0;
+			}
+		}
+	}
+
+	return -EINVAL;
+}
+
+static void ep93xx_spi_cs_control(struct spi_device *spi, bool control)
+{
+	struct ep93xx_spi_chip *chip = spi_get_ctldata(spi);
+	int value = (spi->mode & SPI_CS_HIGH) ? control : !control;
+
+	if (chip->ops && chip->ops->cs_control)
+		chip->ops->cs_control(spi, value);
+}
+
+/**
+ * ep93xx_spi_setup() - setup an SPI device
+ * @spi: SPI device to setup
+ *
+ * This function sets up SPI device mode, speed etc. Can be called multiple
+ * times for a single device. Returns %0 in case of success, negative error in
+ * case of failure. When this function returns success, the device is
+ * deselected.
+ */
+static int ep93xx_spi_setup(struct spi_device *spi)
+{
+	struct ep93xx_spi *espi = spi_master_get_devdata(spi->master);
+	struct ep93xx_spi_chip *chip;
+
+	if (spi->bits_per_word < 4 || spi->bits_per_word > 16) {
+		dev_err(&espi->pdev->dev, "invalid bits per word %d\n",
+			spi->bits_per_word);
+		return -EINVAL;
+	}
+
+	chip = spi_get_ctldata(spi);
+	if (!chip) {
+		dev_dbg(&espi->pdev->dev, "initial setup for %s\n",
+			spi->modalias);
+
+		chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		chip->spi = spi;
+		chip->ops = spi->controller_data;
+
+		if (chip->ops && chip->ops->setup) {
+			int ret = chip->ops->setup(spi);
+			if (ret) {
+				kfree(chip);
+				return ret;
+			}
+		}
+
+		spi_set_ctldata(spi, chip);
+	}
+
+	if (spi->max_speed_hz != chip->rate) {
+		int err;
+
+		err = ep93xx_spi_calc_divisors(espi, chip, spi->max_speed_hz);
+		if (err != 0) {
+			spi_set_ctldata(spi, NULL);
+			kfree(chip);
+			return err;
+		}
+		chip->rate = spi->max_speed_hz;
+	}
+
+	chip->dss = bits_per_word_to_dss(spi->bits_per_word);
+
+	ep93xx_spi_cs_control(spi, false);
+	return 0;
+}
+
+/**
+ * ep93xx_spi_transfer() - queue message to be transferred
+ * @spi: target SPI device
+ * @msg: message to be transferred
+ *
+ * This function is called by SPI device drivers when they are going to transfer
+ * a new message. It simply puts the message in the queue and schedules
+ * workqueue to perform the actual transfer later on.
+ *
+ * Returns %0 on success and negative error in case of failure.
+ */
+static int ep93xx_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct ep93xx_spi *espi = spi_master_get_devdata(spi->master);
+	struct spi_transfer *t;
+	unsigned long flags;
+
+	if (!msg || !msg->complete)
+		return -EINVAL;
+
+	/* first validate each transfer */
+	list_for_each_entry(t, &msg->transfers, transfer_list) {
+		if (t->bits_per_word) {
+			if (t->bits_per_word < 4 || t->bits_per_word > 16)
+				return -EINVAL;
+		}
+		if (t->speed_hz && t->speed_hz < espi->min_rate)
+				return -EINVAL;
+	}
+
+	/*
+	 * Now that we own the message, let's initialize it so that it is
+	 * suitable for us. We use @msg->status to signal whether there was
+	 * error in transfer and @msg->state is used to hold pointer to the
+	 * current transfer (or %NULL if no active current transfer).
+	 */
+	msg->state = NULL;
+	msg->status = 0;
+	msg->actual_length = 0;
+
+	spin_lock_irqsave(&espi->lock, flags);
+	if (!espi->running) {
+		spin_unlock_irqrestore(&espi->lock, flags);
+		return -ESHUTDOWN;
+	}
+	list_add_tail(&msg->queue, &espi->msg_queue);
+	queue_work(espi->wq, &espi->msg_work);
+	spin_unlock_irqrestore(&espi->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ep93xx_spi_cleanup() - cleans up master controller specific state
+ * @spi: SPI device to cleanup
+ *
+ * This function releases master controller specific state for given @spi
+ * device.
+ */
+static void ep93xx_spi_cleanup(struct spi_device *spi)
+{
+	struct ep93xx_spi_chip *chip;
+
+	chip = spi_get_ctldata(spi);
+	if (chip) {
+		if (chip->ops && chip->ops->cleanup)
+			chip->ops->cleanup(spi);
+		spi_set_ctldata(spi, NULL);
+		kfree(chip);
+	}
+}
+
+/**
+ * ep93xx_spi_chip_setup() - configures hardware according to given @chip
+ * @espi: ep93xx SPI controller struct
+ * @chip: chip specific settings
+ *
+ * This function sets up the actual hardware registers with settings given in
+ * @chip. Note that no validation is done so make sure that callers validate
+ * settings before calling this.
+ */
+static void ep93xx_spi_chip_setup(const struct ep93xx_spi *espi,
+				  const struct ep93xx_spi_chip *chip)
+{
+	u16 cr0;
+
+	cr0 = chip->div_scr << SSPCR0_SCR_SHIFT;
+	cr0 |= (chip->spi->mode & (SPI_CPHA|SPI_CPOL)) << SSPCR0_MODE_SHIFT;
+	cr0 |= chip->dss;
+
+	dev_dbg(&espi->pdev->dev, "setup: mode %d, cpsr %d, scr %d, dss %d\n",
+		chip->spi->mode, chip->div_cpsr, chip->div_scr, chip->dss);
+	dev_dbg(&espi->pdev->dev, "setup: cr0 %#x", cr0);
+
+	ep93xx_spi_write_u8(espi, SSPCPSR, chip->div_cpsr);
+	ep93xx_spi_write_u16(espi, SSPCR0, cr0);
+}
+
+static inline int bits_per_word(const struct ep93xx_spi *espi)
+{
+	struct spi_message *msg = espi->current_msg;
+	struct spi_transfer *t = msg->state;
+
+	return t->bits_per_word ? t->bits_per_word : msg->spi->bits_per_word;
+}
+
+static void ep93xx_do_write(struct ep93xx_spi *espi, struct spi_transfer *t)
+{
+	if (bits_per_word(espi) > 8) {
+		u16 tx_val = 0;
+
+		if (t->tx_buf)
+			tx_val = ((u16 *)t->tx_buf)[espi->tx];
+		ep93xx_spi_write_u16(espi, SSPDR, tx_val);
+		espi->tx += sizeof(tx_val);
+	} else {
+		u8 tx_val = 0;
+
+		if (t->tx_buf)
+			tx_val = ((u8 *)t->tx_buf)[espi->tx];
+		ep93xx_spi_write_u8(espi, SSPDR, tx_val);
+		espi->tx += sizeof(tx_val);
+	}
+}
+
+static void ep93xx_do_read(struct ep93xx_spi *espi, struct spi_transfer *t)
+{
+	if (bits_per_word(espi) > 8) {
+		u16 rx_val;
+
+		rx_val = ep93xx_spi_read_u16(espi, SSPDR);
+		if (t->rx_buf)
+			((u16 *)t->rx_buf)[espi->rx] = rx_val;
+		espi->rx += sizeof(rx_val);
+	} else {
+		u8 rx_val;
+
+		rx_val = ep93xx_spi_read_u8(espi, SSPDR);
+		if (t->rx_buf)
+			((u8 *)t->rx_buf)[espi->rx] = rx_val;
+		espi->rx += sizeof(rx_val);
+	}
+}
+
+/**
+ * ep93xx_spi_read_write() - perform next RX/TX transfer
+ * @espi: ep93xx SPI controller struct
+ *
+ * This function transfers next bytes (or half-words) to/from RX/TX FIFOs. If
+ * called several times, the whole transfer will be completed. Returns
+ * %-EINPROGRESS when current transfer was not yet completed otherwise %0.
+ *
+ * When this function is finished, RX FIFO should be empty and TX FIFO should be
+ * full.
+ */
+static int ep93xx_spi_read_write(struct ep93xx_spi *espi)
+{
+	struct spi_message *msg = espi->current_msg;
+	struct spi_transfer *t = msg->state;
+
+	/* read as long as RX FIFO has frames in it */
+	while ((ep93xx_spi_read_u8(espi, SSPSR) & SSPSR_RNE)) {
+		ep93xx_do_read(espi, t);
+		espi->fifo_level--;
+	}
+
+	/* write as long as TX FIFO has room */
+	while (espi->fifo_level < SPI_FIFO_SIZE && espi->tx < t->len) {
+		ep93xx_do_write(espi, t);
+		espi->fifo_level++;
+	}
+
+	if (espi->rx == t->len) {
+		msg->actual_length += t->len;
+		return 0;
+	}
+
+	return -EINPROGRESS;
+}
+
+/**
+ * ep93xx_spi_process_transfer() - processes one SPI transfer
+ * @espi: ep93xx SPI controller struct
+ * @msg: current message
+ * @t: transfer to process
+ *
+ * This function processes one SPI transfer given in @t. Function waits until
+ * transfer is complete (may sleep) and updates @msg->status based on whether
+ * transfer was succesfully processed or not.
+ */
+static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi,
+					struct spi_message *msg,
+					struct spi_transfer *t)
+{
+	struct ep93xx_spi_chip *chip = spi_get_ctldata(msg->spi);
+
+	msg->state = t;
+
+	/*
+	 * Handle any transfer specific settings if needed. We use
+	 * temporary chip settings here and restore original later when
+	 * the transfer is finished.
+	 */
+	if (t->speed_hz || t->bits_per_word) {
+		struct ep93xx_spi_chip tmp_chip = *chip;
+
+		if (t->speed_hz) {
+			int err;
+
+			err = ep93xx_spi_calc_divisors(espi, &tmp_chip,
+						       t->speed_hz);
+			if (err) {
+				dev_err(&espi->pdev->dev,
+					"failed to adjust speed\n");
+				msg->status = err;
+				return;
+			}
+		}
+
+		if (t->bits_per_word)
+			tmp_chip.dss = bits_per_word_to_dss(t->bits_per_word);
+
+		/*
+		 * Set up temporary new hw settings for this transfer.
+		 */
+		ep93xx_spi_chip_setup(espi, &tmp_chip);
+	}
+
+	espi->rx = 0;
+	espi->tx = 0;
+
+	/*
+	 * Now everything is set up for the current transfer. We prime the TX
+	 * FIFO, enable interrupts, and wait for the transfer to complete.
+	 */
+	if (ep93xx_spi_read_write(espi)) {
+		ep93xx_spi_enable_interrupts(espi);
+		wait_for_completion(&espi->wait);
+	}
+
+	/*
+	 * In case of error during transmit, we bail out from processing
+	 * the message.
+	 */
+	if (msg->status)
+		return;
+
+	/*
+	 * After this transfer is finished, perform any possible
+	 * post-transfer actions requested by the protocol driver.
+	 */
+	if (t->delay_usecs) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(t->delay_usecs));
+	}
+	if (t->cs_change) {
+		if (!list_is_last(&t->transfer_list, &msg->transfers)) {
+			/*
+			 * In case protocol driver is asking us to drop the
+			 * chipselect briefly, we let the scheduler to handle
+			 * any "delay" here.
+			 */
+			ep93xx_spi_cs_control(msg->spi, false);
+			cond_resched();
+			ep93xx_spi_cs_control(msg->spi, true);
+		}
+	}
+
+	if (t->speed_hz || t->bits_per_word)
+		ep93xx_spi_chip_setup(espi, chip);
+}
+
+/*
+ * ep93xx_spi_process_message() - process one SPI message
+ * @espi: ep93xx SPI controller struct
+ * @msg: message to process
+ *
+ * This function processes a single SPI message. We go through all transfers in
+ * the message and pass them to ep93xx_spi_process_transfer(). Chipselect is
+ * asserted during the whole message (unless per transfer cs_change is set).
+ *
+ * @msg->status contains %0 in case of success or negative error code in case of
+ * failure.
+ */
+static void ep93xx_spi_process_message(struct ep93xx_spi *espi,
+				       struct spi_message *msg)
+{
+	unsigned long timeout;
+	struct spi_transfer *t;
+	int err;
+
+	/*
+	 * Enable the SPI controller and its clock.
+	 */
+	err = ep93xx_spi_enable(espi);
+	if (err) {
+		dev_err(&espi->pdev->dev, "failed to enable SPI controller\n");
+		msg->status = err;
+		return;
+	}
+
+	/*
+	 * Just to be sure: flush any data from RX FIFO.
+	 */
+	timeout = jiffies + msecs_to_jiffies(SPI_TIMEOUT);
+	while (ep93xx_spi_read_u16(espi, SSPSR) & SSPSR_RNE) {
+		if (time_after(jiffies, timeout)) {
+			dev_warn(&espi->pdev->dev,
+				 "timeout while flushing RX FIFO\n");
+			msg->status = -ETIMEDOUT;
+			return;
+		}
+		ep93xx_spi_read_u16(espi, SSPDR);
+	}
+
+	/*
+	 * We explicitly handle FIFO level. This way we don't have to check TX
+	 * FIFO status using %SSPSR_TNF bit which may cause RX FIFO overruns.
+	 */
+	espi->fifo_level = 0;
+
+	/*
+	 * Update SPI controller registers according to spi device and assert
+	 * the chipselect.
+	 */
+	ep93xx_spi_chip_setup(espi, spi_get_ctldata(msg->spi));
+	ep93xx_spi_cs_control(msg->spi, true);
+
+	list_for_each_entry(t, &msg->transfers, transfer_list) {
+		ep93xx_spi_process_transfer(espi, msg, t);
+		if (msg->status)
+			break;
+	}
+
+	/*
+	 * Now the whole message is transferred (or failed for some reason). We
+	 * deselect the device and disable the SPI controller.
+	 */
+	ep93xx_spi_cs_control(msg->spi, false);
+	ep93xx_spi_disable(espi);
+}
+
+#define work_to_espi(work) (container_of((work), struct ep93xx_spi, msg_work))
+
+/**
+ * ep93xx_spi_work() - EP93xx SPI workqueue worker function
+ * @work: work struct
+ *
+ * Workqueue worker function. This function is called when there are new
+ * SPI messages to be processed. Message is taken out from the queue and then
+ * passed to ep93xx_spi_process_message().
+ *
+ * After message is transferred, protocol driver is notified by calling
+ * @msg->complete(). In case of error, @msg->status is set to negative error
+ * number, otherwise it contains zero (and @msg->actual_length is updated).
+ */
+static void ep93xx_spi_work(struct work_struct *work)
+{
+	struct ep93xx_spi *espi = work_to_espi(work);
+	struct spi_message *msg;
+
+	spin_lock_irq(&espi->lock);
+	if (!espi->running || espi->current_msg ||
+		list_empty(&espi->msg_queue)) {
+		spin_unlock_irq(&espi->lock);
+		return;
+	}
+	msg = list_first_entry(&espi->msg_queue, struct spi_message, queue);
+	list_del_init(&msg->queue);
+	espi->current_msg = msg;
+	spin_unlock_irq(&espi->lock);
+
+	ep93xx_spi_process_message(espi, msg);
+
+	/*
+	 * Update the current message and re-schedule ourselves if there are
+	 * more messages in the queue.
+	 */
+	spin_lock_irq(&espi->lock);
+	espi->current_msg = NULL;
+	if (espi->running && !list_empty(&espi->msg_queue))
+		queue_work(espi->wq, &espi->msg_work);
+	spin_unlock_irq(&espi->lock);
+
+	/* notify the protocol driver that we are done with this message */
+	msg->complete(msg->context);
+}
+
+static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id)
+{
+	struct ep93xx_spi *espi = dev_id;
+	u8 irq_status = ep93xx_spi_read_u8(espi, SSPIIR);
+
+	/*
+	 * If we got ROR (receive overrun) interrupt we know that something is
+	 * wrong. Just abort the message.
+	 */
+	if (unlikely(irq_status & SSPIIR_RORIS)) {
+		/* clear the overrun interrupt */
+		ep93xx_spi_write_u8(espi, SSPICR, 0);
+		dev_warn(&espi->pdev->dev,
+			 "receive overrun, aborting the message\n");
+		espi->current_msg->status = -EIO;
+	} else {
+		/*
+		 * Interrupt is either RX (RIS) or TX (TIS). For both cases we
+		 * simply execute next data transfer.
+		 */
+		if (ep93xx_spi_read_write(espi)) {
+			/*
+			 * In normal case, there still is some processing left
+			 * for current transfer. Let's wait for the next
+			 * interrupt then.
+			 */
+			return IRQ_HANDLED;
+		}
+	}
+
+	/*
+	 * Current transfer is finished, either with error or with success. In
+	 * any case we disable interrupts and notify the worker to handle
+	 * any post-processing of the message.
+	 */
+	ep93xx_spi_disable_interrupts(espi);
+	complete(&espi->wait);
+	return IRQ_HANDLED;
+}
+
+static int __init ep93xx_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct ep93xx_spi_info *info;
+	struct ep93xx_spi *espi;
+	struct resource *res;
+	int error;
+
+	info = pdev->dev.platform_data;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*espi));
+	if (!master) {
+		dev_err(&pdev->dev, "failed to allocate spi master\n");
+		return -ENOMEM;
+	}
+
+	master->setup = ep93xx_spi_setup;
+	master->transfer = ep93xx_spi_transfer;
+	master->cleanup = ep93xx_spi_cleanup;
+	master->bus_num = pdev->id;
+	master->num_chipselect = info->num_chipselect;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
+	platform_set_drvdata(pdev, master);
+
+	espi = spi_master_get_devdata(master);
+
+	espi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(espi->clk)) {
+		dev_err(&pdev->dev, "unable to get spi clock\n");
+		error = PTR_ERR(espi->clk);
+		goto fail_release_master;
+	}
+
+	spin_lock_init(&espi->lock);
+	init_completion(&espi->wait);
+
+	/*
+	 * Calculate maximum and minimum supported clock rates
+	 * for the controller.
+	 */
+	espi->max_rate = clk_get_rate(espi->clk) / 2;
+	espi->min_rate = clk_get_rate(espi->clk) / (254 * 256);
+	espi->pdev = pdev;
+
+	espi->irq = platform_get_irq(pdev, 0);
+	if (espi->irq < 0) {
+		error = -EBUSY;
+		dev_err(&pdev->dev, "failed to get irq resources\n");
+		goto fail_put_clock;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "unable to get iomem resource\n");
+		error = -ENODEV;
+		goto fail_put_clock;
+	}
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "unable to request iomem resources\n");
+		error = -EBUSY;
+		goto fail_put_clock;
+	}
+
+	espi->regs_base = ioremap(res->start, resource_size(res));
+	if (!espi->regs_base) {
+		dev_err(&pdev->dev, "failed to map resources\n");
+		error = -ENODEV;
+		goto fail_free_mem;
+	}
+
+	error = request_irq(espi->irq, ep93xx_spi_interrupt, 0,
+			    "ep93xx-spi", espi);
+	if (error) {
+		dev_err(&pdev->dev, "failed to request irq\n");
+		goto fail_unmap_regs;
+	}
+
+	espi->wq = create_singlethread_workqueue("ep93xx_spid");
+	if (!espi->wq) {
+		dev_err(&pdev->dev, "unable to create workqueue\n");
+		goto fail_free_irq;
+	}
+	INIT_WORK(&espi->msg_work, ep93xx_spi_work);
+	INIT_LIST_HEAD(&espi->msg_queue);
+	espi->running = true;
+
+	/* make sure that the hardware is disabled */
+	ep93xx_spi_write_u8(espi, SSPCR1, 0);
+
+	error = spi_register_master(master);
+	if (error) {
+		dev_err(&pdev->dev, "failed to register SPI master\n");
+		goto fail_free_queue;
+	}
+
+	dev_info(&pdev->dev, "EP93xx SPI Controller at 0x%08lx irq %d\n",
+		 (unsigned long)res->start, espi->irq);
+
+	return 0;
+
+fail_free_queue:
+	destroy_workqueue(espi->wq);
+fail_free_irq:
+	free_irq(espi->irq, espi);
+fail_unmap_regs:
+	iounmap(espi->regs_base);
+fail_free_mem:
+	release_mem_region(res->start, resource_size(res));
+fail_put_clock:
+	clk_put(espi->clk);
+fail_release_master:
+	spi_master_put(master);
+	platform_set_drvdata(pdev, NULL);
+
+	return error;
+}
+
+static int __exit ep93xx_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct resource *res;
+
+	spin_lock_irq(&espi->lock);
+	espi->running = false;
+	spin_unlock_irq(&espi->lock);
+
+	destroy_workqueue(espi->wq);
+
+	/*
+	 * Complete remaining messages with %-ESHUTDOWN status.
+	 */
+	spin_lock_irq(&espi->lock);
+	while (!list_empty(&espi->msg_queue)) {
+		struct spi_message *msg;
+
+		msg = list_first_entry(&espi->msg_queue,
+				       struct spi_message, queue);
+		list_del_init(&msg->queue);
+		msg->status = -ESHUTDOWN;
+		spin_unlock_irq(&espi->lock);
+		msg->complete(msg->context);
+		spin_lock_irq(&espi->lock);
+	}
+	spin_unlock_irq(&espi->lock);
+
+	free_irq(espi->irq, espi);
+	iounmap(espi->regs_base);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
+	clk_put(espi->clk);
+	platform_set_drvdata(pdev, NULL);
+
+	spi_unregister_master(master);
+	return 0;
+}
+
+static struct platform_driver ep93xx_spi_driver = {
+	.driver		= {
+		.name	= "ep93xx-spi",
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(ep93xx_spi_remove),
+};
+
+static int __init ep93xx_spi_init(void)
+{
+	return platform_driver_probe(&ep93xx_spi_driver, ep93xx_spi_probe);
+}
+module_init(ep93xx_spi_init);
+
+static void __exit ep93xx_spi_exit(void)
+{
+	platform_driver_unregister(&ep93xx_spi_driver);
+}
+module_exit(ep93xx_spi_exit);
+
+MODULE_DESCRIPTION("EP93xx SPI Controller driver");
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ep93xx-spi");
diff --git a/drivers/spi/mpc512x_psc_spi.c b/drivers/spi/mpc512x_psc_spi.c
new file mode 100644
index 0000000..28a126d
--- /dev/null
+++ b/drivers/spi/mpc512x_psc_spi.c
@@ -0,0 +1,576 @@
+/*
+ * MPC512x PSC in SPI mode driver.
+ *
+ * Copyright (C) 2007,2008 Freescale Semiconductor Inc.
+ * Original port from 52xx driver:
+ *	Hongjun Chen <hong-jun.chen@freescale.com>
+ *
+ * Fork of mpc52xx_psc_spi.c:
+ *	Copyright (C) 2006 TOPTICA Photonics AG., Dragos Carp
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/of_platform.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/spi/spi.h>
+#include <linux/fsl_devices.h>
+#include <asm/mpc52xx_psc.h>
+
+struct mpc512x_psc_spi {
+	void (*cs_control)(struct spi_device *spi, bool on);
+	u32 sysclk;
+
+	/* driver internal data */
+	struct mpc52xx_psc __iomem *psc;
+	struct mpc512x_psc_fifo __iomem *fifo;
+	unsigned int irq;
+	u8 bits_per_word;
+	u8 busy;
+	u32 mclk;
+	u8 eofbyte;
+
+	struct workqueue_struct *workqueue;
+	struct work_struct work;
+
+	struct list_head queue;
+	spinlock_t lock;	/* Message queue lock */
+
+	struct completion done;
+};
+
+/* controller state */
+struct mpc512x_psc_spi_cs {
+	int bits_per_word;
+	int speed_hz;
+};
+
+/* set clock freq, clock ramp, bits per work
+ * if t is NULL then reset the values to the default values
+ */
+static int mpc512x_psc_spi_transfer_setup(struct spi_device *spi,
+					  struct spi_transfer *t)
+{
+	struct mpc512x_psc_spi_cs *cs = spi->controller_state;
+
+	cs->speed_hz = (t && t->speed_hz)
+	    ? t->speed_hz : spi->max_speed_hz;
+	cs->bits_per_word = (t && t->bits_per_word)
+	    ? t->bits_per_word : spi->bits_per_word;
+	cs->bits_per_word = ((cs->bits_per_word + 7) / 8) * 8;
+	return 0;
+}
+
+static void mpc512x_psc_spi_activate_cs(struct spi_device *spi)
+{
+	struct mpc512x_psc_spi_cs *cs = spi->controller_state;
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
+	struct mpc52xx_psc __iomem *psc = mps->psc;
+	u32 sicr;
+	u32 ccr;
+	u16 bclkdiv;
+
+	sicr = in_be32(&psc->sicr);
+
+	/* Set clock phase and polarity */
+	if (spi->mode & SPI_CPHA)
+		sicr |= 0x00001000;
+	else
+		sicr &= ~0x00001000;
+
+	if (spi->mode & SPI_CPOL)
+		sicr |= 0x00002000;
+	else
+		sicr &= ~0x00002000;
+
+	if (spi->mode & SPI_LSB_FIRST)
+		sicr |= 0x10000000;
+	else
+		sicr &= ~0x10000000;
+	out_be32(&psc->sicr, sicr);
+
+	ccr = in_be32(&psc->ccr);
+	ccr &= 0xFF000000;
+	if (cs->speed_hz)
+		bclkdiv = (mps->mclk / cs->speed_hz) - 1;
+	else
+		bclkdiv = (mps->mclk / 1000000) - 1;	/* default 1MHz */
+
+	ccr |= (((bclkdiv & 0xff) << 16) | (((bclkdiv >> 8) & 0xff) << 8));
+	out_be32(&psc->ccr, ccr);
+	mps->bits_per_word = cs->bits_per_word;
+
+	if (mps->cs_control)
+		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
+}
+
+static void mpc512x_psc_spi_deactivate_cs(struct spi_device *spi)
+{
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
+
+	if (mps->cs_control)
+		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+
+}
+
+/* extract and scale size field in txsz or rxsz */
+#define MPC512x_PSC_FIFO_SZ(sz) ((sz & 0x7ff) << 2);
+
+#define EOFBYTE 1
+
+static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi,
+					 struct spi_transfer *t)
+{
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
+	struct mpc52xx_psc __iomem *psc = mps->psc;
+	struct mpc512x_psc_fifo __iomem *fifo = mps->fifo;
+	size_t len = t->len;
+	u8 *tx_buf = (u8 *)t->tx_buf;
+	u8 *rx_buf = (u8 *)t->rx_buf;
+
+	if (!tx_buf && !rx_buf && t->len)
+		return -EINVAL;
+
+	/* Zero MR2 */
+	in_8(&psc->mode);
+	out_8(&psc->mode, 0x0);
+
+	while (len) {
+		int count;
+		int i;
+		u8 data;
+		size_t fifosz;
+		int rxcount;
+
+		/*
+		 * The number of bytes that can be sent at a time
+		 * depends on the fifo size.
+		 */
+		fifosz = MPC512x_PSC_FIFO_SZ(in_be32(&fifo->txsz));
+		count = min(fifosz, len);
+
+		for (i = count; i > 0; i--) {
+			data = tx_buf ? *tx_buf++ : 0;
+			if (len == EOFBYTE)
+				setbits32(&fifo->txcmd, MPC512x_PSC_FIFO_EOF);
+			out_8(&fifo->txdata_8, data);
+			len--;
+		}
+
+		INIT_COMPLETION(mps->done);
+
+		/* interrupt on tx fifo empty */
+		out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY);
+		out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY);
+
+		/* enable transmiter/receiver */
+		out_8(&psc->command,
+		      MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
+
+		wait_for_completion(&mps->done);
+
+		mdelay(1);
+
+		/* rx fifo should have count bytes in it */
+		rxcount = in_be32(&fifo->rxcnt);
+		if (rxcount != count)
+			mdelay(1);
+
+		rxcount = in_be32(&fifo->rxcnt);
+		if (rxcount != count) {
+			dev_warn(&spi->dev, "expected %d bytes in rx fifo "
+				 "but got %d\n", count, rxcount);
+		}
+
+		rxcount = min(rxcount, count);
+		for (i = rxcount; i > 0; i--) {
+			data = in_8(&fifo->rxdata_8);
+			if (rx_buf)
+				*rx_buf++ = data;
+		}
+		while (in_be32(&fifo->rxcnt)) {
+			in_8(&fifo->rxdata_8);
+		}
+
+		out_8(&psc->command,
+		      MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE);
+	}
+	/* disable transmiter/receiver and fifo interrupt */
+	out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE);
+	out_be32(&fifo->tximr, 0);
+	return 0;
+}
+
+static void mpc512x_psc_spi_work(struct work_struct *work)
+{
+	struct mpc512x_psc_spi *mps = container_of(work,
+						   struct mpc512x_psc_spi,
+						   work);
+
+	spin_lock_irq(&mps->lock);
+	mps->busy = 1;
+	while (!list_empty(&mps->queue)) {
+		struct spi_message *m;
+		struct spi_device *spi;
+		struct spi_transfer *t = NULL;
+		unsigned cs_change;
+		int status;
+
+		m = container_of(mps->queue.next, struct spi_message, queue);
+		list_del_init(&m->queue);
+		spin_unlock_irq(&mps->lock);
+
+		spi = m->spi;
+		cs_change = 1;
+		status = 0;
+		list_for_each_entry(t, &m->transfers, transfer_list) {
+			if (t->bits_per_word || t->speed_hz) {
+				status = mpc512x_psc_spi_transfer_setup(spi, t);
+				if (status < 0)
+					break;
+			}
+
+			if (cs_change)
+				mpc512x_psc_spi_activate_cs(spi);
+			cs_change = t->cs_change;
+
+			status = mpc512x_psc_spi_transfer_rxtx(spi, t);
+			if (status)
+				break;
+			m->actual_length += t->len;
+
+			if (t->delay_usecs)
+				udelay(t->delay_usecs);
+
+			if (cs_change)
+				mpc512x_psc_spi_deactivate_cs(spi);
+		}
+
+		m->status = status;
+		m->complete(m->context);
+
+		if (status || !cs_change)
+			mpc512x_psc_spi_deactivate_cs(spi);
+
+		mpc512x_psc_spi_transfer_setup(spi, NULL);
+
+		spin_lock_irq(&mps->lock);
+	}
+	mps->busy = 0;
+	spin_unlock_irq(&mps->lock);
+}
+
+static int mpc512x_psc_spi_setup(struct spi_device *spi)
+{
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
+	struct mpc512x_psc_spi_cs *cs = spi->controller_state;
+	unsigned long flags;
+
+	if (spi->bits_per_word % 8)
+		return -EINVAL;
+
+	if (!cs) {
+		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		if (!cs)
+			return -ENOMEM;
+		spi->controller_state = cs;
+	}
+
+	cs->bits_per_word = spi->bits_per_word;
+	cs->speed_hz = spi->max_speed_hz;
+
+	spin_lock_irqsave(&mps->lock, flags);
+	if (!mps->busy)
+		mpc512x_psc_spi_deactivate_cs(spi);
+	spin_unlock_irqrestore(&mps->lock, flags);
+
+	return 0;
+}
+
+static int mpc512x_psc_spi_transfer(struct spi_device *spi,
+				    struct spi_message *m)
+{
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	m->actual_length = 0;
+	m->status = -EINPROGRESS;
+
+	spin_lock_irqsave(&mps->lock, flags);
+	list_add_tail(&m->queue, &mps->queue);
+	queue_work(mps->workqueue, &mps->work);
+	spin_unlock_irqrestore(&mps->lock, flags);
+
+	return 0;
+}
+
+static void mpc512x_psc_spi_cleanup(struct spi_device *spi)
+{
+	kfree(spi->controller_state);
+}
+
+static int mpc512x_psc_spi_port_config(struct spi_master *master,
+				       struct mpc512x_psc_spi *mps)
+{
+	struct mpc52xx_psc __iomem *psc = mps->psc;
+	struct mpc512x_psc_fifo __iomem *fifo = mps->fifo;
+	struct clk *spiclk;
+	int ret = 0;
+	char name[32];
+	u32 sicr;
+	u32 ccr;
+	u16 bclkdiv;
+
+	sprintf(name, "psc%d_mclk", master->bus_num);
+	spiclk = clk_get(&master->dev, name);
+	clk_enable(spiclk);
+	mps->mclk = clk_get_rate(spiclk);
+	clk_put(spiclk);
+
+	/* Reset the PSC into a known state */
+	out_8(&psc->command, MPC52xx_PSC_RST_RX);
+	out_8(&psc->command, MPC52xx_PSC_RST_TX);
+	out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE);
+
+	/* Disable psc interrupts all useful interrupts are in fifo */
+	out_be16(&psc->isr_imr.imr, 0);
+
+	/* Disable fifo interrupts, will be enabled later */
+	out_be32(&fifo->tximr, 0);
+	out_be32(&fifo->rximr, 0);
+
+	/* Setup fifo slice address and size */
+	/*out_be32(&fifo->txsz, 0x0fe00004);*/
+	/*out_be32(&fifo->rxsz, 0x0ff00004);*/
+
+	sicr =	0x01000000 |	/* SIM = 0001 -- 8 bit */
+		0x00800000 |	/* GenClk = 1 -- internal clk */
+		0x00008000 |	/* SPI = 1 */
+		0x00004000 |	/* MSTR = 1   -- SPI master */
+		0x00000800;	/* UseEOF = 1 -- SS low until EOF */
+
+	out_be32(&psc->sicr, sicr);
+
+	ccr = in_be32(&psc->ccr);
+	ccr &= 0xFF000000;
+	bclkdiv = (mps->mclk / 1000000) - 1;	/* default 1MHz */
+	ccr |= (((bclkdiv & 0xff) << 16) | (((bclkdiv >> 8) & 0xff) << 8));
+	out_be32(&psc->ccr, ccr);
+
+	/* Set 2ms DTL delay */
+	out_8(&psc->ctur, 0x00);
+	out_8(&psc->ctlr, 0x82);
+
+	/* we don't use the alarms */
+	out_be32(&fifo->rxalarm, 0xfff);
+	out_be32(&fifo->txalarm, 0);
+
+	/* Enable FIFO slices for Rx/Tx */
+	out_be32(&fifo->rxcmd,
+		 MPC512x_PSC_FIFO_ENABLE_SLICE | MPC512x_PSC_FIFO_ENABLE_DMA);
+	out_be32(&fifo->txcmd,
+		 MPC512x_PSC_FIFO_ENABLE_SLICE | MPC512x_PSC_FIFO_ENABLE_DMA);
+
+	mps->bits_per_word = 8;
+
+	return ret;
+}
+
+static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id)
+{
+	struct mpc512x_psc_spi *mps = (struct mpc512x_psc_spi *)dev_id;
+	struct mpc512x_psc_fifo __iomem *fifo = mps->fifo;
+
+	/* clear interrupt and wake up the work queue */
+	if (in_be32(&fifo->txisr) &
+	    in_be32(&fifo->tximr) & MPC512x_PSC_FIFO_EMPTY) {
+		out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY);
+		out_be32(&fifo->tximr, 0);
+		complete(&mps->done);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+/* bus_num is used only for the case dev->platform_data == NULL */
+static int __init mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
+					   u32 size, unsigned int irq,
+					   s16 bus_num)
+{
+	struct fsl_spi_platform_data *pdata = dev->platform_data;
+	struct mpc512x_psc_spi *mps;
+	struct spi_master *master;
+	int ret;
+	void *tempp;
+
+	master = spi_alloc_master(dev, sizeof *mps);
+	if (master == NULL)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, master);
+	mps = spi_master_get_devdata(master);
+	mps->irq = irq;
+
+	if (pdata == NULL) {
+		dev_err(dev, "probe called without platform data, no "
+			"cs_control function will be called\n");
+		mps->cs_control = NULL;
+		mps->sysclk = 0;
+		master->bus_num = bus_num;
+		master->num_chipselect = 255;
+	} else {
+		mps->cs_control = pdata->cs_control;
+		mps->sysclk = pdata->sysclk;
+		master->bus_num = pdata->bus_num;
+		master->num_chipselect = pdata->max_chipselect;
+	}
+
+	master->setup = mpc512x_psc_spi_setup;
+	master->transfer = mpc512x_psc_spi_transfer;
+	master->cleanup = mpc512x_psc_spi_cleanup;
+
+	tempp = ioremap(regaddr, size);
+	if (!tempp) {
+		dev_err(dev, "could not ioremap I/O port range\n");
+		ret = -EFAULT;
+		goto free_master;
+	}
+	mps->psc = tempp;
+	mps->fifo =
+		(struct mpc512x_psc_fifo *)(tempp + sizeof(struct mpc52xx_psc));
+
+	ret = request_irq(mps->irq, mpc512x_psc_spi_isr, IRQF_SHARED,
+			  "mpc512x-psc-spi", mps);
+	if (ret)
+		goto free_master;
+
+	ret = mpc512x_psc_spi_port_config(master, mps);
+	if (ret < 0)
+		goto free_irq;
+
+	spin_lock_init(&mps->lock);
+	init_completion(&mps->done);
+	INIT_WORK(&mps->work, mpc512x_psc_spi_work);
+	INIT_LIST_HEAD(&mps->queue);
+
+	mps->workqueue =
+		create_singlethread_workqueue(dev_name(master->dev.parent));
+	if (mps->workqueue == NULL) {
+		ret = -EBUSY;
+		goto free_irq;
+	}
+
+	ret = spi_register_master(master);
+	if (ret < 0)
+		goto unreg_master;
+
+	return ret;
+
+unreg_master:
+	destroy_workqueue(mps->workqueue);
+free_irq:
+	free_irq(mps->irq, mps);
+free_master:
+	if (mps->psc)
+		iounmap(mps->psc);
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int __exit mpc512x_psc_spi_do_remove(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct mpc512x_psc_spi *mps = spi_master_get_devdata(master);
+
+	flush_workqueue(mps->workqueue);
+	destroy_workqueue(mps->workqueue);
+	spi_unregister_master(master);
+	free_irq(mps->irq, mps);
+	if (mps->psc)
+		iounmap(mps->psc);
+
+	return 0;
+}
+
+static int __init mpc512x_psc_spi_of_probe(struct of_device *op,
+					   const struct of_device_id *match)
+{
+	const u32 *regaddr_p;
+	u64 regaddr64, size64;
+	s16 id = -1;
+
+	regaddr_p = of_get_address(op->node, 0, &size64, NULL);
+	if (!regaddr_p) {
+		dev_err(&op->dev, "Invalid PSC address\n");
+		return -EINVAL;
+	}
+	regaddr64 = of_translate_address(op->node, regaddr_p);
+
+	/* get PSC id (0..11, used by port_config) */
+	if (op->dev.platform_data == NULL) {
+		const u32 *psc_nump;
+
+		psc_nump = of_get_property(op->node, "cell-index", NULL);
+		if (!psc_nump || *psc_nump > 11) {
+			dev_err(&op->dev, "mpc512x_psc_spi: Device node %s "
+				"has invalid cell-index property\n",
+				op->node->full_name);
+			return -EINVAL;
+		}
+		id = *psc_nump;
+	}
+
+	return mpc512x_psc_spi_do_probe(&op->dev, (u32) regaddr64, (u32) size64,
+					irq_of_parse_and_map(op->node, 0), id);
+}
+
+static int __exit mpc512x_psc_spi_of_remove(struct of_device *op)
+{
+	return mpc512x_psc_spi_do_remove(&op->dev);
+}
+
+static struct of_device_id mpc512x_psc_spi_of_match[] = {
+	{ .compatible = "fsl,mpc5121-psc-spi", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mpc512x_psc_spi_of_match);
+
+static struct of_platform_driver mpc512x_psc_spi_of_driver = {
+	.match_table = mpc512x_psc_spi_of_match,
+	.probe = mpc512x_psc_spi_of_probe,
+	.remove = __exit_p(mpc512x_psc_spi_of_remove),
+	.driver = {
+		.name = "mpc512x-psc-spi",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init mpc512x_psc_spi_init(void)
+{
+	return of_register_platform_driver(&mpc512x_psc_spi_of_driver);
+}
+module_init(mpc512x_psc_spi_init);
+
+static void __exit mpc512x_psc_spi_exit(void)
+{
+	of_unregister_platform_driver(&mpc512x_psc_spi_of_driver);
+}
+module_exit(mpc512x_psc_spi_exit);
+
+MODULE_AUTHOR("John Rigby");
+MODULE_DESCRIPTION("MPC512x PSC SPI Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index e0de0d0..b3a94ca 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -38,7 +38,7 @@
 
 #include <plat/dma.h>
 #include <plat/clock.h>
-
+#include <plat/mcspi.h>
 
 #define OMAP2_MCSPI_MAX_FREQ		48000000
 
@@ -113,7 +113,7 @@
 /* use PIO for small transfers, avoiding DMA setup/teardown overhead and
  * cache operations; better heuristics consider wordsize and bitrate.
  */
-#define DMA_MIN_BYTES			8
+#define DMA_MIN_BYTES			160
 
 
 struct omap2_mcspi {
@@ -229,6 +229,8 @@
 
 	l = enable ? OMAP2_MCSPI_CHCTRL_EN : 0;
 	mcspi_write_cs_reg(spi, OMAP2_MCSPI_CHCTRL0, l);
+	/* Flash post-writes */
+	mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHCTRL0);
 }
 
 static void omap2_mcspi_force_cs(struct spi_device *spi, int cs_active)
@@ -303,11 +305,14 @@
 	unsigned int		count, c;
 	unsigned long		base, tx_reg, rx_reg;
 	int			word_len, data_type, element_count;
+	int			elements;
+	u32			l;
 	u8			* rx;
 	const u8		* tx;
 
 	mcspi = spi_master_get_devdata(spi->master);
 	mcspi_dma = &mcspi->dma_channels[spi->chip_select];
+	l = mcspi_cached_chconf0(spi);
 
 	count = xfer->len;
 	c = count;
@@ -346,8 +351,12 @@
 	}
 
 	if (rx != NULL) {
+		elements = element_count - 1;
+		if (l & OMAP2_MCSPI_CHCONF_TURBO)
+			elements--;
+
 		omap_set_dma_transfer_params(mcspi_dma->dma_rx_channel,
-				data_type, element_count - 1, 1,
+				data_type, elements, 1,
 				OMAP_DMA_SYNC_ELEMENT,
 				mcspi_dma->dma_rx_sync_dev, 1);
 
@@ -379,17 +388,42 @@
 		wait_for_completion(&mcspi_dma->dma_rx_completion);
 		dma_unmap_single(NULL, xfer->rx_dma, count, DMA_FROM_DEVICE);
 		omap2_mcspi_set_enable(spi, 0);
+
+		if (l & OMAP2_MCSPI_CHCONF_TURBO) {
+
+			if (likely(mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHSTAT0)
+				   & OMAP2_MCSPI_CHSTAT_RXS)) {
+				u32 w;
+
+				w = mcspi_read_cs_reg(spi, OMAP2_MCSPI_RX0);
+				if (word_len <= 8)
+					((u8 *)xfer->rx_buf)[elements++] = w;
+				else if (word_len <= 16)
+					((u16 *)xfer->rx_buf)[elements++] = w;
+				else /* word_len <= 32 */
+					((u32 *)xfer->rx_buf)[elements++] = w;
+			} else {
+				dev_err(&spi->dev,
+					"DMA RX penultimate word empty");
+				count -= (word_len <= 8)  ? 2 :
+					(word_len <= 16) ? 4 :
+					/* word_len <= 32 */ 8;
+				omap2_mcspi_set_enable(spi, 1);
+				return count;
+			}
+		}
+
 		if (likely(mcspi_read_cs_reg(spi, OMAP2_MCSPI_CHSTAT0)
 				& OMAP2_MCSPI_CHSTAT_RXS)) {
 			u32 w;
 
 			w = mcspi_read_cs_reg(spi, OMAP2_MCSPI_RX0);
 			if (word_len <= 8)
-				((u8 *)xfer->rx_buf)[element_count - 1] = w;
+				((u8 *)xfer->rx_buf)[elements] = w;
 			else if (word_len <= 16)
-				((u16 *)xfer->rx_buf)[element_count - 1] = w;
+				((u16 *)xfer->rx_buf)[elements] = w;
 			else /* word_len <= 32 */
-				((u32 *)xfer->rx_buf)[element_count - 1] = w;
+				((u32 *)xfer->rx_buf)[elements] = w;
 		} else {
 			dev_err(&spi->dev, "DMA RX last word empty");
 			count -= (word_len <= 8)  ? 1 :
@@ -433,7 +467,6 @@
 	word_len = cs->word_len;
 
 	l = mcspi_cached_chconf0(spi);
-	l &= ~OMAP2_MCSPI_CHCONF_TRM_MASK;
 
 	/* We store the pre-calculated register addresses on stack to speed
 	 * up the transfer loop. */
@@ -468,11 +501,26 @@
 					dev_err(&spi->dev, "RXS timed out\n");
 					goto out;
 				}
-				/* prevent last RX_ONLY read from triggering
-				 * more word i/o: switch to rx+tx
-				 */
-				if (c == 0 && tx == NULL)
-					mcspi_write_chconf0(spi, l);
+
+				if (c == 1 && tx == NULL &&
+				    (l & OMAP2_MCSPI_CHCONF_TURBO)) {
+					omap2_mcspi_set_enable(spi, 0);
+					*rx++ = __raw_readl(rx_reg);
+#ifdef VERBOSE
+					dev_dbg(&spi->dev, "read-%d %02x\n",
+						    word_len, *(rx - 1));
+#endif
+					if (mcspi_wait_for_reg_bit(chstat_reg,
+						OMAP2_MCSPI_CHSTAT_RXS) < 0) {
+						dev_err(&spi->dev,
+							"RXS timed out\n");
+						goto out;
+					}
+					c = 0;
+				} else if (c == 0 && tx == NULL) {
+					omap2_mcspi_set_enable(spi, 0);
+				}
+
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %02x\n",
@@ -506,11 +554,26 @@
 					dev_err(&spi->dev, "RXS timed out\n");
 					goto out;
 				}
-				/* prevent last RX_ONLY read from triggering
-				 * more word i/o: switch to rx+tx
-				 */
-				if (c == 0 && tx == NULL)
-					mcspi_write_chconf0(spi, l);
+
+				if (c == 2 && tx == NULL &&
+				    (l & OMAP2_MCSPI_CHCONF_TURBO)) {
+					omap2_mcspi_set_enable(spi, 0);
+					*rx++ = __raw_readl(rx_reg);
+#ifdef VERBOSE
+					dev_dbg(&spi->dev, "read-%d %04x\n",
+						    word_len, *(rx - 1));
+#endif
+					if (mcspi_wait_for_reg_bit(chstat_reg,
+						OMAP2_MCSPI_CHSTAT_RXS) < 0) {
+						dev_err(&spi->dev,
+							"RXS timed out\n");
+						goto out;
+					}
+					c = 0;
+				} else if (c == 0 && tx == NULL) {
+					omap2_mcspi_set_enable(spi, 0);
+				}
+
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %04x\n",
@@ -544,11 +607,26 @@
 					dev_err(&spi->dev, "RXS timed out\n");
 					goto out;
 				}
-				/* prevent last RX_ONLY read from triggering
-				 * more word i/o: switch to rx+tx
-				 */
-				if (c == 0 && tx == NULL)
-					mcspi_write_chconf0(spi, l);
+
+				if (c == 4 && tx == NULL &&
+				    (l & OMAP2_MCSPI_CHCONF_TURBO)) {
+					omap2_mcspi_set_enable(spi, 0);
+					*rx++ = __raw_readl(rx_reg);
+#ifdef VERBOSE
+					dev_dbg(&spi->dev, "read-%d %08x\n",
+						    word_len, *(rx - 1));
+#endif
+					if (mcspi_wait_for_reg_bit(chstat_reg,
+						OMAP2_MCSPI_CHSTAT_RXS) < 0) {
+						dev_err(&spi->dev,
+							"RXS timed out\n");
+						goto out;
+					}
+					c = 0;
+				} else if (c == 0 && tx == NULL) {
+					omap2_mcspi_set_enable(spi, 0);
+				}
+
 				*rx++ = __raw_readl(rx_reg);
 #ifdef VERBOSE
 				dev_dbg(&spi->dev, "read-%d %08x\n",
@@ -568,6 +646,7 @@
 			dev_err(&spi->dev, "EOT timed out\n");
 	}
 out:
+	omap2_mcspi_set_enable(spi, 1);
 	return count - c;
 }
 
@@ -755,7 +834,6 @@
 	struct omap2_mcspi_cs	*cs;
 
 	mcspi = spi_master_get_devdata(spi->master);
-	mcspi_dma = &mcspi->dma_channels[spi->chip_select];
 
 	if (spi->controller_state) {
 		/* Unlink controller state from context save list */
@@ -765,13 +843,17 @@
 		kfree(spi->controller_state);
 	}
 
-	if (mcspi_dma->dma_rx_channel != -1) {
-		omap_free_dma(mcspi_dma->dma_rx_channel);
-		mcspi_dma->dma_rx_channel = -1;
-	}
-	if (mcspi_dma->dma_tx_channel != -1) {
-		omap_free_dma(mcspi_dma->dma_tx_channel);
-		mcspi_dma->dma_tx_channel = -1;
+	if (spi->chip_select < spi->master->num_chipselect) {
+		mcspi_dma = &mcspi->dma_channels[spi->chip_select];
+
+		if (mcspi_dma->dma_rx_channel != -1) {
+			omap_free_dma(mcspi_dma->dma_rx_channel);
+			mcspi_dma->dma_rx_channel = -1;
+		}
+		if (mcspi_dma->dma_tx_channel != -1) {
+			omap_free_dma(mcspi_dma->dma_tx_channel);
+			mcspi_dma->dma_tx_channel = -1;
+		}
 	}
 }
 
@@ -797,6 +879,7 @@
 		struct spi_transfer		*t = NULL;
 		int				cs_active = 0;
 		struct omap2_mcspi_cs		*cs;
+		struct omap2_mcspi_device_config *cd;
 		int				par_override = 0;
 		int				status = 0;
 		u32				chconf;
@@ -809,6 +892,7 @@
 
 		spi = m->spi;
 		cs = spi->controller_state;
+		cd = spi->controller_data;
 
 		omap2_mcspi_set_enable(spi, 1);
 		list_for_each_entry(t, &m->transfers, transfer_list) {
@@ -832,10 +916,19 @@
 
 			chconf = mcspi_cached_chconf0(spi);
 			chconf &= ~OMAP2_MCSPI_CHCONF_TRM_MASK;
+			chconf &= ~OMAP2_MCSPI_CHCONF_TURBO;
+
 			if (t->tx_buf == NULL)
 				chconf |= OMAP2_MCSPI_CHCONF_TRM_RX_ONLY;
 			else if (t->rx_buf == NULL)
 				chconf |= OMAP2_MCSPI_CHCONF_TRM_TX_ONLY;
+
+			if (cd && cd->turbo_mode && t->tx_buf == NULL) {
+				/* Turbo mode is for more than one word */
+				if (t->len > ((cs->word_len + 7) >> 3))
+					chconf |= OMAP2_MCSPI_CHCONF_TURBO;
+			}
+
 			mcspi_write_chconf0(spi, chconf);
 
 			if (t->len) {
diff --git a/drivers/spi/spi_bitbang_txrx.h b/drivers/spi/spi_bitbang_txrx.h
new file mode 100644
index 0000000..fc033bb
--- /dev/null
+++ b/drivers/spi/spi_bitbang_txrx.h
@@ -0,0 +1,93 @@
+/*
+ * Mix this utility code with some glue code to get one of several types of
+ * simple SPI master driver.  Two do polled word-at-a-time I/O:
+ *
+ *   -	GPIO/parport bitbangers.  Provide chipselect() and txrx_word[](),
+ *	expanding the per-word routines from the inline templates below.
+ *
+ *   -	Drivers for controllers resembling bare shift registers.  Provide
+ *	chipselect() and txrx_word[](), with custom setup()/cleanup() methods
+ *	that use your controller's clock and chipselect registers.
+ *
+ * Some hardware works well with requests at spi_transfer scope:
+ *
+ *   -	Drivers leveraging smarter hardware, with fifos or DMA; or for half
+ *	duplex (MicroWire) controllers.  Provide chipselect() and txrx_bufs(),
+ *	and custom setup()/cleanup() methods.
+ */
+
+/*
+ * The code that knows what GPIO pins do what should have declared four
+ * functions, ideally as inlines, before including this header:
+ *
+ *  void setsck(struct spi_device *, int is_on);
+ *  void setmosi(struct spi_device *, int is_on);
+ *  int getmiso(struct spi_device *);
+ *  void spidelay(unsigned);
+ *
+ * setsck()'s is_on parameter is a zero/nonzero boolean.
+ *
+ * setmosi()'s is_on parameter is a zero/nonzero boolean.
+ *
+ * getmiso() is required to return 0 or 1 only. Any other value is invalid
+ * and will result in improper operation.
+ *
+ * A non-inlined routine would call bitbang_txrx_*() routines.  The
+ * main loop could easily compile down to a handful of instructions,
+ * especially if the delay is a NOP (to run at peak speed).
+ *
+ * Since this is software, the timings may not be exactly what your board's
+ * chips need ... there may be several reasons you'd need to tweak timings
+ * in these routines, not just make to make it faster or slower to match a
+ * particular CPU clock rate.
+ */
+
+static inline u32
+bitbang_txrx_be_cpha0(struct spi_device *spi,
+		unsigned nsecs, unsigned cpol,
+		u32 word, u8 bits)
+{
+	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
+
+	/* clock starts at inactive polarity */
+	for (word <<= (32 - bits); likely(bits); bits--) {
+
+		/* setup MSB (to slave) on trailing edge */
+		setmosi(spi, word & (1 << 31));
+		spidelay(nsecs);	/* T(setup) */
+
+		setsck(spi, !cpol);
+		spidelay(nsecs);
+
+		/* sample MSB (from slave) on leading edge */
+		word <<= 1;
+		word |= getmiso(spi);
+		setsck(spi, cpol);
+	}
+	return word;
+}
+
+static inline u32
+bitbang_txrx_be_cpha1(struct spi_device *spi,
+		unsigned nsecs, unsigned cpol,
+		u32 word, u8 bits)
+{
+	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
+
+	/* clock starts at inactive polarity */
+	for (word <<= (32 - bits); likely(bits); bits--) {
+
+		/* setup MSB (to slave) on leading edge */
+		setsck(spi, !cpol);
+		setmosi(spi, word & (1 << 31));
+		spidelay(nsecs); /* T(setup) */
+
+		setsck(spi, cpol);
+		spidelay(nsecs);
+
+		/* sample MSB (from slave) on trailing edge */
+		word <<= 1;
+		word |= getmiso(spi);
+	}
+	return word;
+}
diff --git a/drivers/spi/spi_butterfly.c b/drivers/spi/spi_butterfly.c
index c218486..8b52812 100644
--- a/drivers/spi/spi_butterfly.c
+++ b/drivers/spi/spi_butterfly.c
@@ -149,8 +149,7 @@
 #define	spidelay(X)	do{}while(0)
 //#define	spidelay	ndelay
 
-#define	EXPAND_BITBANG_TXRX
-#include <linux/spi/spi_bitbang.h>
+#include "spi_bitbang_txrx.h"
 
 static u32
 butterfly_txrx_word_mode0(struct spi_device *spi,
diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c
index 26bd03e..7edbd58 100644
--- a/drivers/spi/spi_gpio.c
+++ b/drivers/spi/spi_gpio.c
@@ -127,8 +127,7 @@
  */
 #define spidelay(nsecs)	do {} while (0)
 
-#define	EXPAND_BITBANG_TXRX
-#include <linux/spi/spi_bitbang.h>
+#include "spi_bitbang_txrx.h"
 
 /*
  * These functions can leverage inline expansion of GPIO calls to shrink
diff --git a/drivers/spi/spi_lm70llp.c b/drivers/spi/spi_lm70llp.c
index 568c781..86fb7b5 100644
--- a/drivers/spi/spi_lm70llp.c
+++ b/drivers/spi/spi_lm70llp.c
@@ -174,8 +174,7 @@
 }
 /*--------------------------------------------------------------------*/
 
-#define EXPAND_BITBANG_TXRX 1
-#include <linux/spi/spi_bitbang.h>
+#include "spi_bitbang_txrx.h"
 
 static void lm70_chipselect(struct spi_device *spi, int value)
 {
diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c
index 75b7f8c..ffa111a 100644
--- a/drivers/spi/spi_mpc8xxx.c
+++ b/drivers/spi/spi_mpc8xxx.c
@@ -241,7 +241,6 @@
 
 	/* Turn off SPI unit prior changing mode */
 	mpc8xxx_spi_write_reg(mode, cs->hw_mode & ~SPMODE_ENABLE);
-	mpc8xxx_spi_write_reg(mode, cs->hw_mode);
 
 	/* When in CPM mode, we need to reinit tx and rx. */
 	if (mspi->flags & SPI_CPM_MODE) {
@@ -258,7 +257,7 @@
 			}
 		}
 	}
-
+	mpc8xxx_spi_write_reg(mode, cs->hw_mode);
 	local_irq_restore(flags);
 }
 
@@ -287,11 +286,75 @@
 	}
 }
 
+static int
+mspi_apply_cpu_mode_quirks(struct spi_mpc8xxx_cs *cs,
+			   struct spi_device *spi,
+			   struct mpc8xxx_spi *mpc8xxx_spi,
+			   int bits_per_word)
+{
+	cs->rx_shift = 0;
+	cs->tx_shift = 0;
+	if (bits_per_word <= 8) {
+		cs->get_rx = mpc8xxx_spi_rx_buf_u8;
+		cs->get_tx = mpc8xxx_spi_tx_buf_u8;
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
+			cs->rx_shift = 16;
+			cs->tx_shift = 24;
+		}
+	} else if (bits_per_word <= 16) {
+		cs->get_rx = mpc8xxx_spi_rx_buf_u16;
+		cs->get_tx = mpc8xxx_spi_tx_buf_u16;
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
+			cs->rx_shift = 16;
+			cs->tx_shift = 16;
+		}
+	} else if (bits_per_word <= 32) {
+		cs->get_rx = mpc8xxx_spi_rx_buf_u32;
+		cs->get_tx = mpc8xxx_spi_tx_buf_u32;
+	} else
+		return -EINVAL;
+
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE &&
+	    spi->mode & SPI_LSB_FIRST) {
+		cs->tx_shift = 0;
+		if (bits_per_word <= 8)
+			cs->rx_shift = 8;
+		else
+			cs->rx_shift = 0;
+	}
+	mpc8xxx_spi->rx_shift = cs->rx_shift;
+	mpc8xxx_spi->tx_shift = cs->tx_shift;
+	mpc8xxx_spi->get_rx = cs->get_rx;
+	mpc8xxx_spi->get_tx = cs->get_tx;
+
+	return bits_per_word;
+}
+
+static int
+mspi_apply_qe_mode_quirks(struct spi_mpc8xxx_cs *cs,
+			  struct spi_device *spi,
+			  int bits_per_word)
+{
+	/* QE uses Little Endian for words > 8
+	 * so transform all words > 8 into 8 bits
+	 * Unfortnatly that doesn't work for LSB so
+	 * reject these for now */
+	/* Note: 32 bits word, LSB works iff
+	 * tfcr/rfcr is set to CPMFCR_GBL */
+	if (spi->mode & SPI_LSB_FIRST &&
+	    bits_per_word > 8)
+		return -EINVAL;
+	if (bits_per_word > 8)
+		return 8; /* pretend its 8 bits */
+	return bits_per_word;
+}
+
 static
 int mpc8xxx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	u8 bits_per_word, pm;
+	int bits_per_word;
+	u8 pm;
 	u32 hz;
 	struct spi_mpc8xxx_cs	*cs = spi->controller_state;
 
@@ -317,41 +380,16 @@
 	if (!hz)
 		hz = spi->max_speed_hz;
 
-	cs->rx_shift = 0;
-	cs->tx_shift = 0;
-	if (bits_per_word <= 8) {
-		cs->get_rx = mpc8xxx_spi_rx_buf_u8;
-		cs->get_tx = mpc8xxx_spi_tx_buf_u8;
-		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-			cs->rx_shift = 16;
-			cs->tx_shift = 24;
-		}
-	} else if (bits_per_word <= 16) {
-		cs->get_rx = mpc8xxx_spi_rx_buf_u16;
-		cs->get_tx = mpc8xxx_spi_tx_buf_u16;
-		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-			cs->rx_shift = 16;
-			cs->tx_shift = 16;
-		}
-	} else if (bits_per_word <= 32) {
-		cs->get_rx = mpc8xxx_spi_rx_buf_u32;
-		cs->get_tx = mpc8xxx_spi_tx_buf_u32;
-	} else
-		return -EINVAL;
+	if (!(mpc8xxx_spi->flags & SPI_CPM_MODE))
+		bits_per_word = mspi_apply_cpu_mode_quirks(cs, spi,
+							   mpc8xxx_spi,
+							   bits_per_word);
+	else if (mpc8xxx_spi->flags & SPI_QE)
+		bits_per_word = mspi_apply_qe_mode_quirks(cs, spi,
+							  bits_per_word);
 
-	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE &&
-			spi->mode & SPI_LSB_FIRST) {
-		cs->tx_shift = 0;
-		if (bits_per_word <= 8)
-			cs->rx_shift = 8;
-		else
-			cs->rx_shift = 0;
-	}
-
-	mpc8xxx_spi->rx_shift = cs->rx_shift;
-	mpc8xxx_spi->tx_shift = cs->tx_shift;
-	mpc8xxx_spi->get_rx = cs->get_rx;
-	mpc8xxx_spi->get_tx = cs->get_tx;
+	if (bits_per_word < 0)
+		return bits_per_word;
 
 	if (bits_per_word == 32)
 		bits_per_word = 0;
@@ -438,7 +476,7 @@
 			dev_err(dev, "unable to map tx dma\n");
 			return -ENOMEM;
 		}
-	} else {
+	} else if (t->tx_buf) {
 		mspi->tx_dma = t->tx_dma;
 	}
 
@@ -449,7 +487,7 @@
 			dev_err(dev, "unable to map rx dma\n");
 			goto err_rx_dma;
 		}
-	} else {
+	} else if (t->rx_buf) {
 		mspi->rx_dma = t->rx_dma;
 	}
 
@@ -477,7 +515,7 @@
 
 	if (mspi->map_tx_dma)
 		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
-	if (mspi->map_tx_dma)
+	if (mspi->map_rx_dma)
 		dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
 	mspi->xfer_in_progress = NULL;
 }
diff --git a/drivers/spi/spi_s3c24xx_gpio.c b/drivers/spi/spi_s3c24xx_gpio.c
index bbf9371..8979a75 100644
--- a/drivers/spi/spi_s3c24xx_gpio.c
+++ b/drivers/spi/spi_s3c24xx_gpio.c
@@ -58,8 +58,7 @@
 
 #define spidelay(x) ndelay(x)
 
-#define	EXPAND_BITBANG_TXRX
-#include <linux/spi/spi_bitbang.h>
+#include "spi_bitbang_txrx.h"
 
 
 static u32 s3c2410_spigpio_txrx_mode0(struct spi_device *spi,
diff --git a/drivers/spi/spi_sh_sci.c b/drivers/spi/spi_sh_sci.c
index a65c12f..a511be7 100644
--- a/drivers/spi/spi_sh_sci.c
+++ b/drivers/spi/spi_sh_sci.c
@@ -78,8 +78,7 @@
 
 #define spidelay(x) ndelay(x)
 
-#define EXPAND_BITBANG_TXRX
-#include <linux/spi/spi_bitbang.h>
+#include "spi_bitbang_txrx.h"
 
 static u32 sh_sci_spi_txrx_mode0(struct spi_device *spi,
 				      unsigned nsecs, u32 word, u8 bits)
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index e4836c6b..abf26cc 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -71,6 +71,7 @@
 
 /**
  * enum ssp_rx_endian - endianess of Rx FIFO Data
+ * this feature is only available in ST versionf of PL022
  */
 enum ssp_rx_endian {
 	SSP_RX_MSB,
@@ -181,7 +182,8 @@
 };
 
 /**
- * enum Microwire - whether Full/Half Duplex
+ * enum ssp_duplex - whether Full/Half Duplex on microwire, only
+ * available in the ST Micro variant.
  * @SSP_MICROWIRE_CHANNEL_FULL_DUPLEX: SSPTXD becomes bi-directional,
  *     SSPRXD not used
  * @SSP_MICROWIRE_CHANNEL_HALF_DUPLEX: SSPTXD is an output, SSPRXD is
@@ -193,6 +195,31 @@
 };
 
 /**
+ * enum ssp_clkdelay - an optional clock delay on the feedback clock
+ * only available in the ST Micro PL023 variant.
+ * @SSP_FEEDBACK_CLK_DELAY_NONE: no delay, the data coming in from the
+ * slave is sampled directly
+ * @SSP_FEEDBACK_CLK_DELAY_1T: the incoming slave data is sampled with
+ * a delay of T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_2T: dito with a delay if 2T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_3T: dito with a delay if 3T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_4T: dito with a delay if 4T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_5T: dito with a delay if 5T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_6T: dito with a delay if 6T-dt
+ * @SSP_FEEDBACK_CLK_DELAY_7T: dito with a delay if 7T-dt
+ */
+enum ssp_clkdelay {
+	SSP_FEEDBACK_CLK_DELAY_NONE,
+	SSP_FEEDBACK_CLK_DELAY_1T,
+	SSP_FEEDBACK_CLK_DELAY_2T,
+	SSP_FEEDBACK_CLK_DELAY_3T,
+	SSP_FEEDBACK_CLK_DELAY_4T,
+	SSP_FEEDBACK_CLK_DELAY_5T,
+	SSP_FEEDBACK_CLK_DELAY_6T,
+	SSP_FEEDBACK_CLK_DELAY_7T
+};
+
+/**
  * CHIP select/deselect commands
  */
 enum ssp_chip_select {
@@ -235,6 +262,8 @@
  * @ctrl_len: Microwire interface: Control length
  * @wait_state: Microwire interface: Wait state
  * @duplex: Microwire interface: Full/Half duplex
+ * @clkdelay: on the PL023 variant, the delay in feeback clock cycles
+ * before sampling the incoming line
  * @cs_control: function pointer to board-specific function to
  * assert/deassert I/O port to control HW generation of devices chip-select.
  * @dma_xfer_type: Type of DMA xfer (Mem-to-periph or Periph-to-Periph)
@@ -258,6 +287,7 @@
 	enum ssp_microwire_ctrl_len ctrl_len;
 	enum ssp_microwire_wait_state wait_state;
 	enum ssp_duplex duplex;
+	enum ssp_clkdelay clkdelay;
 	void (*cs_control) (u32 control);
 };
 
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 3274c50..f987a2b 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -1,24 +1,6 @@
 #ifndef	__SPI_BITBANG_H
 #define	__SPI_BITBANG_H
 
-/*
- * Mix this utility code with some glue code to get one of several types of
- * simple SPI master driver.  Two do polled word-at-a-time I/O:
- *
- *   -	GPIO/parport bitbangers.  Provide chipselect() and txrx_word[](),
- *	expanding the per-word routines from the inline templates below.
- *
- *   -	Drivers for controllers resembling bare shift registers.  Provide
- *	chipselect() and txrx_word[](), with custom setup()/cleanup() methods
- *	that use your controller's clock and chipselect registers.
- *
- * Some hardware works well with requests at spi_transfer scope:
- *
- *   -	Drivers leveraging smarter hardware, with fifos or DMA; or for half
- *	duplex (MicroWire) controllers.  Provide chipselect() and txrx_bufs(),
- *	and custom setup()/cleanup() methods.
- */
-
 #include <linux/workqueue.h>
 
 struct spi_bitbang {
@@ -68,86 +50,3 @@
 extern int spi_bitbang_stop(struct spi_bitbang *spi);
 
 #endif	/* __SPI_BITBANG_H */
-
-/*-------------------------------------------------------------------------*/
-
-#ifdef	EXPAND_BITBANG_TXRX
-
-/*
- * The code that knows what GPIO pins do what should have declared four
- * functions, ideally as inlines, before #defining EXPAND_BITBANG_TXRX
- * and including this header:
- *
- *  void setsck(struct spi_device *, int is_on);
- *  void setmosi(struct spi_device *, int is_on);
- *  int getmiso(struct spi_device *);
- *  void spidelay(unsigned);
- *
- * setsck()'s is_on parameter is a zero/nonzero boolean.
- *
- * setmosi()'s is_on parameter is a zero/nonzero boolean.
- *
- * getmiso() is required to return 0 or 1 only. Any other value is invalid
- * and will result in improper operation.
- *
- * A non-inlined routine would call bitbang_txrx_*() routines.  The
- * main loop could easily compile down to a handful of instructions,
- * especially if the delay is a NOP (to run at peak speed).
- *
- * Since this is software, the timings may not be exactly what your board's
- * chips need ... there may be several reasons you'd need to tweak timings
- * in these routines, not just make to make it faster or slower to match a
- * particular CPU clock rate.
- */
-
-static inline u32
-bitbang_txrx_be_cpha0(struct spi_device *spi,
-		unsigned nsecs, unsigned cpol,
-		u32 word, u8 bits)
-{
-	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
-
-	/* clock starts at inactive polarity */
-	for (word <<= (32 - bits); likely(bits); bits--) {
-
-		/* setup MSB (to slave) on trailing edge */
-		setmosi(spi, word & (1 << 31));
-		spidelay(nsecs);	/* T(setup) */
-
-		setsck(spi, !cpol);
-		spidelay(nsecs);
-
-		/* sample MSB (from slave) on leading edge */
-		word <<= 1;
-		word |= getmiso(spi);
-		setsck(spi, cpol);
-	}
-	return word;
-}
-
-static inline u32
-bitbang_txrx_be_cpha1(struct spi_device *spi,
-		unsigned nsecs, unsigned cpol,
-		u32 word, u8 bits)
-{
-	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
-
-	/* clock starts at inactive polarity */
-	for (word <<= (32 - bits); likely(bits); bits--) {
-
-		/* setup MSB (to slave) on leading edge */
-		setsck(spi, !cpol);
-		setmosi(spi, word & (1 << 31));
-		spidelay(nsecs); /* T(setup) */
-
-		setsck(spi, cpol);
-		spidelay(nsecs);
-
-		/* sample MSB (from slave) on trailing edge */
-		word <<= 1;
-		word |= getmiso(spi);
-	}
-	return word;
-}
-
-#endif	/* EXPAND_BITBANG_TXRX */