Merge branch 'spi-4.21' into spi-next
diff --git a/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt b/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt
new file mode 100644
index 0000000..1fd9a44
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt
@@ -0,0 +1,43 @@
+Nuvoton NPCM Peripheral Serial Peripheral Interface(PSPI) controller driver
+
+Nuvoton NPCM7xx SOC support two PSPI channels.
+
+Required properties:
+ - compatible : "nuvoton,npcm750-pspi" for NPCM7XX BMC
+ - #address-cells : should be 1. see spi-bus.txt
+ - #size-cells : should be 0. see spi-bus.txt
+ - specifies physical base address and size of the register.
+ - interrupts : contain PSPI interrupt.
+ - clocks : phandle of PSPI reference clock.
+ - clock-names: Should be "clk_apb5".
+ - pinctrl-names : a pinctrl state named "default" must be defined.
+ - pinctrl-0 : phandle referencing pin configuration of the device.
+ - cs-gpios: Specifies the gpio pins to be used for chipselects.
+            See: Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Optional properties:
+- clock-frequency : Input clock frequency to the PSPI block in Hz.
+		    Default is 25000000 Hz.
+
+Aliases:
+- All the SPI controller nodes should be represented in the aliases node using
+  the following format 'spi{n}' withe the correct numbered in "aliases" node.
+
+Example:
+
+aliases {
+	spi0 = &spi0;
+};
+
+spi0: spi@f0200000 {
+	compatible = "nuvoton,npcm750-pspi";
+	reg = <0xf0200000 0x1000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pspi1_pins>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&clk NPCM7XX_CLK_APB5>;
+	clock-names = "clk_apb5";
+	cs-gpios = <&gpio6 11 GPIO_ACTIVE_LOW>;
+};
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
index 2ba5f9c..487208c 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -2,6 +2,7 @@
 
 Required properties:
 - compatible :
+  - "ti,am654-mcspi" for AM654.
   - "ti,omap2-mcspi" for OMAP2 & OMAP3.
   - "ti,omap4-mcspi" for OMAP4+.
 - ti,spi-num-cs : Number of chipselect supported  by the instance.
diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
index 4b836ad..37cf695 100644
--- a/Documentation/devicetree/bindings/spi/sh-msiof.txt
+++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt
@@ -5,6 +5,7 @@
 			 "renesas,msiof-r8a7744" (RZ/G1N)
 			 "renesas,msiof-r8a7745" (RZ/G1E)
 			 "renesas,msiof-r8a774a1" (RZ/G2M)
+			 "renesas,msiof-r8a774c0" (RZ/G2E)
 			 "renesas,msiof-r8a7790" (R-Car H2)
 			 "renesas,msiof-r8a7791" (R-Car M2-W)
 			 "renesas,msiof-r8a7792" (R-Car V2H)
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt
index 8d178a4..6cc3c6f 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt
@@ -5,8 +5,11 @@
   - "fsl,imx7ulp-spi" for LPSPI compatible with the one integrated on i.MX7ULP soc
   - "fsl,imx8qxp-spi" for LPSPI compatible with the one integrated on i.MX8QXP soc
 - reg : address and length of the lpspi master registers
+- interrupt-parent : core interrupt controller
 - interrupts : lpspi interrupt
 - clocks : lpspi clock specifier
+- spi-slave : spi slave mode support. In slave mode, add this attribute without
+	      value. In master mode, remove it.
 
 Examples:
 
@@ -16,4 +19,5 @@
 	interrupt-parent = <&intc>;
 	interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
 	clocks = <&clks IMX7ULP_CLK_LPSPI2>;
+	spi-slave;
 };
diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
index 236dcb0..69c3567 100644
--- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
+++ b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt
@@ -6,8 +6,10 @@
     - mediatek,mt2712-spi: for mt2712 platforms
     - mediatek,mt6589-spi: for mt6589 platforms
     - mediatek,mt7622-spi: for mt7622 platforms
+    - "mediatek,mt7629-spi", "mediatek,mt7622-spi": for mt7629 platforms
     - mediatek,mt8135-spi: for mt8135 platforms
     - mediatek,mt8173-spi: for mt8173 platforms
+    - mediatek,mt8183-spi: for mt8183 platforms
 
 - #address-cells: should be 1.
 
diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt
new file mode 100644
index 0000000..529f2da
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt
@@ -0,0 +1,34 @@
+Macronix SPI controller Device Tree Bindings
+--------------------------------------------
+
+Required properties:
+- compatible: should be "mxicy,mx25f0a-spi"
+- #address-cells: should be 1
+- #size-cells: should be 0
+- reg: should contain 2 entries, one for the registers and one for the direct
+       mapping area
+- reg-names: should contain "regs" and "dirmap"
+- interrupts: interrupt line connected to the SPI controller
+- clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk"
+- clocks: should contain 3 entries for the "ps_clk", "send_clk" and
+	  "send_dly_clk" clocks
+
+Example:
+
+	spi@43c30000 {
+		compatible = "mxicy,mx25f0a-spi";
+		reg = <0x43c30000 0x10000>, <0xa0000000 0x20000000>;
+		reg-names = "regs", "dirmap";
+		clocks = <&clkwizard 0>, <&clkwizard 1>, <&clkc 18>;
+		clock-names = "send_clk", "send_dly_clk", "ps_clk";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		flash@0 {
+			compatible = "jedec,spi-nor";
+			reg = <0>;
+			spi-max-frequency = <25000000>;
+			spi-tx-bus-width = <4>;
+			spi-rx-bus-width = <4>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/spi/spi-pxa2xx.txt b/Documentation/devicetree/bindings/spi/spi-pxa2xx.txt
index 0335a9b..e30e0c2 100644
--- a/Documentation/devicetree/bindings/spi/spi-pxa2xx.txt
+++ b/Documentation/devicetree/bindings/spi/spi-pxa2xx.txt
@@ -11,6 +11,9 @@
 Optional properties:
 - cs-gpios: list of GPIO chip selects. See the SPI bus bindings,
   Documentation/devicetree/bindings/spi/spi-bus.txt
+- spi-slave: Empty property indicating the SPI controller is used in slave mode.
+- ready-gpios: GPIO used to signal a SPI master that the FIFO is filled
+  and we're ready to service a transfer. Only useful in slave mode.
 
 Child nodes represent devices on the SPI bus
   See ../spi/spi-bus.txt
diff --git a/Documentation/devicetree/bindings/spi/spi-rspi.txt b/Documentation/devicetree/bindings/spi/spi-rspi.txt
index fc97ad6..421722b 100644
--- a/Documentation/devicetree/bindings/spi/spi-rspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-rspi.txt
@@ -15,6 +15,7 @@
 			- "renesas,qspi-r8a7743" (RZ/G1M)
 			- "renesas,qspi-r8a7744" (RZ/G1N)
 			- "renesas,qspi-r8a7745" (RZ/G1E)
+			- "renesas,qspi-r8a77470" (RZ/G1C)
 			- "renesas,qspi-r8a7790" (R-Car H2)
 			- "renesas,qspi-r8a7791" (R-Car M2-W)
 			- "renesas,qspi-r8a7792" (R-Car V2H)
diff --git a/Documentation/devicetree/bindings/spi/spi-uniphier.txt b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
index b04e66a..e120157 100644
--- a/Documentation/devicetree/bindings/spi/spi-uniphier.txt
+++ b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
@@ -5,6 +5,8 @@
 Required properties:
  - compatible: should be "socionext,uniphier-scssi"
  - reg: address and length of the spi master registers
+ - #address-cells: must be <1>, see spi-bus.txt
+ - #size-cells: must be <0>, see spi-bus.txt
  - interrupts: a single interrupt specifier
  - pinctrl-names: should be "default"
  - pinctrl-0: pin control state for the default mode
@@ -16,6 +18,8 @@
 spi0: spi@54006000 {
 	compatible = "socionext,uniphier-scssi";
 	reg = <0x54006000 0x100>;
+	#address-cells = <1>;
+	#size-cells = <0>;
 	interrupts = <0 39 4>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_spi0>;
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 7d3a5c9..a5b0761 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -397,6 +397,13 @@
 	  say Y or M here.If you are not sure, say N.
 	  SPI drivers for Mediatek MT65XX and MT81XX series ARM SoCs.
 
+config SPI_NPCM_PSPI
+	tristate "Nuvoton NPCM PSPI Controller"
+	depends on ARCH_NPCM || COMPILE_TEST
+	help
+	  This driver provides support for Nuvoton NPCM BMC
+	  Peripheral SPI controller in master mode.
+
 config SPI_NUC900
 	tristate "Nuvoton NUC900 series SPI"
 	depends on ARCH_W90X900
@@ -435,7 +442,7 @@
 
 config SPI_OMAP24XX
 	tristate "McSPI driver for OMAP"
-	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST
 	select SG_SPLIT
 	help
 	  SPI master controller for OMAP24XX and later Multichannel SPI
@@ -684,6 +691,12 @@
 	help
 	  This enables using the SPI controller on the Allwinner A31 SoCs.
 
+config SPI_MXIC
+        tristate "Macronix MX25F0A SPI controller"
+        depends on SPI_MASTER
+        help
+          This selects the Macronix MX25F0A SPI controller driver.
+
 config SPI_MXS
 	tristate "Freescale MXS SPI controller"
 	depends on ARCH_MXS
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 3575205..0c4b7a8 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -58,7 +58,9 @@
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= spi-mpc52xx-psc.o
 obj-$(CONFIG_SPI_MPC52xx)		+= spi-mpc52xx.o
 obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
+obj-$(CONFIG_SPI_MXIC)			+= spi-mxic.o
 obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
+obj-$(CONFIG_SPI_NPCM_PSPI)		+= spi-npcm-pspi.o
 obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
 obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
 spi-octeon-objs				:= spi-cavium.o spi-cavium-octeon.o
diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
index a924657..a694d70 100644
--- a/drivers/spi/spi-at91-usart.c
+++ b/drivers/spi/spi-at91-usart.c
@@ -12,7 +12,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/spi/spi.h>
 
@@ -399,6 +401,59 @@ static int at91_usart_spi_probe(struct platform_device *pdev)
 	return ret;
 }
 
+__maybe_unused static int at91_usart_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctlr);
+
+	clk_disable_unprepare(aus->clk);
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+__maybe_unused static int at91_usart_spi_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctrl);
+
+	pinctrl_pm_select_default_state(dev);
+
+	return clk_prepare_enable(aus->clk);
+}
+
+__maybe_unused static int at91_usart_spi_suspend(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	int ret;
+
+	ret = spi_controller_suspend(ctrl);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_suspended(dev))
+		at91_usart_spi_runtime_suspend(dev);
+
+	return 0;
+}
+
+__maybe_unused static int at91_usart_spi_resume(struct device *dev)
+{
+	struct spi_controller *ctrl = dev_get_drvdata(dev);
+	struct at91_usart_spi *aus = spi_master_get_devdata(ctrl);
+	int ret;
+
+	if (!pm_runtime_suspended(dev)) {
+		ret = at91_usart_spi_runtime_resume(dev);
+		if (ret)
+			return ret;
+	}
+
+	at91_usart_spi_init(aus);
+
+	return spi_controller_resume(ctrl);
+}
+
 static int at91_usart_spi_remove(struct platform_device *pdev)
 {
 	struct spi_controller *ctlr = platform_get_drvdata(pdev);
@@ -409,6 +464,12 @@ static int at91_usart_spi_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct dev_pm_ops at91_usart_spi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(at91_usart_spi_suspend, at91_usart_spi_resume)
+	SET_RUNTIME_PM_OPS(at91_usart_spi_runtime_suspend,
+			   at91_usart_spi_runtime_resume, NULL)
+};
+
 static const struct of_device_id at91_usart_spi_dt_ids[] = {
 	{ .compatible = "microchip,at91sam9g45-usart-spi"},
 	{ /* sentinel */}
@@ -419,6 +480,7 @@ MODULE_DEVICE_TABLE(of, at91_usart_spi_dt_ids);
 static struct platform_driver at91_usart_spi_driver = {
 	.driver = {
 		.name = "at91_usart_spi",
+		.pm = &at91_usart_spi_pm_ops,
 	},
 	.probe = at91_usart_spi_probe,
 	.remove = at91_usart_spi_remove,
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 25abf2d..35aebdf 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -20,7 +20,6 @@
  * GNU General Public License for more details.
  */
 
-#include <asm/page.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -72,6 +71,8 @@
 #define BCM2835_SPI_CS_CS_10		0x00000002
 #define BCM2835_SPI_CS_CS_01		0x00000001
 
+#define BCM2835_SPI_FIFO_SIZE		64
+#define BCM2835_SPI_FIFO_SIZE_3_4	48
 #define BCM2835_SPI_POLLING_LIMIT_US	30
 #define BCM2835_SPI_POLLING_JIFFIES	2
 #define BCM2835_SPI_DMA_MIN_LENGTH	96
@@ -80,14 +81,35 @@
 
 #define DRV_NAME	"spi-bcm2835"
 
+/**
+ * struct bcm2835_spi - BCM2835 SPI controller
+ * @regs: base address of register map
+ * @clk: core clock, divided to calculate serial clock
+ * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full
+ * @tfr: SPI transfer currently processed
+ * @tx_buf: pointer whence next transmitted byte is read
+ * @rx_buf: pointer where next received byte is written
+ * @tx_len: remaining bytes to transmit
+ * @rx_len: remaining bytes to receive
+ * @tx_prologue: bytes transmitted without DMA if first TX sglist entry's
+ *	length is not a multiple of 4 (to overcome hardware limitation)
+ * @rx_prologue: bytes received without DMA if first RX sglist entry's
+ *	length is not a multiple of 4 (to overcome hardware limitation)
+ * @tx_spillover: whether @tx_prologue spills over to second TX sglist entry
+ * @dma_pending: whether a DMA transfer is in progress
+ */
 struct bcm2835_spi {
 	void __iomem *regs;
 	struct clk *clk;
 	int irq;
+	struct spi_transfer *tfr;
 	const u8 *tx_buf;
 	u8 *rx_buf;
 	int tx_len;
 	int rx_len;
+	int tx_prologue;
+	int rx_prologue;
+	unsigned int tx_spillover;
 	unsigned int dma_pending;
 };
 
@@ -126,6 +148,115 @@ static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs)
 	}
 }
 
+/**
+ * bcm2835_rd_fifo_count() - blindly read exactly @count bytes from RX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to read from RX FIFO
+ *
+ * The caller must ensure that @bs->rx_len is greater than or equal to @count,
+ * that the RX FIFO contains at least @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a read from the FIFO register receives
+ * 32-bit instead of just 8-bit).  Moreover @bs->rx_buf must not be %NULL.
+ */
+static inline void bcm2835_rd_fifo_count(struct bcm2835_spi *bs, int count)
+{
+	u32 val;
+	int len;
+
+	bs->rx_len -= count;
+
+	while (count > 0) {
+		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+		len = min(count, 4);
+		memcpy(bs->rx_buf, &val, len);
+		bs->rx_buf += len;
+		count -= 4;
+	}
+}
+
+/**
+ * bcm2835_wr_fifo_count() - blindly write exactly @count bytes to TX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to write to TX FIFO
+ *
+ * The caller must ensure that @bs->tx_len is greater than or equal to @count,
+ * that the TX FIFO can accommodate @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a write to the FIFO register transmits
+ * 32-bit instead of just 8-bit).
+ */
+static inline void bcm2835_wr_fifo_count(struct bcm2835_spi *bs, int count)
+{
+	u32 val;
+	int len;
+
+	bs->tx_len -= count;
+
+	while (count > 0) {
+		if (bs->tx_buf) {
+			len = min(count, 4);
+			memcpy(&val, bs->tx_buf, len);
+			bs->tx_buf += len;
+		} else {
+			val = 0;
+		}
+		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
+		count -= 4;
+	}
+}
+
+/**
+ * bcm2835_wait_tx_fifo_empty() - busy-wait for TX FIFO to empty
+ * @bs: BCM2835 SPI controller
+ *
+ * The caller must ensure that the RX FIFO can accommodate as many bytes
+ * as have been written to the TX FIFO:  Transmission is halted once the
+ * RX FIFO is full, causing this function to spin forever.
+ */
+static inline void bcm2835_wait_tx_fifo_empty(struct bcm2835_spi *bs)
+{
+	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
+		cpu_relax();
+}
+
+/**
+ * bcm2835_rd_fifo_blind() - blindly read up to @count bytes from RX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes available for reading in RX FIFO
+ */
+static inline void bcm2835_rd_fifo_blind(struct bcm2835_spi *bs, int count)
+{
+	u8 val;
+
+	count = min(count, bs->rx_len);
+	bs->rx_len -= count;
+
+	while (count) {
+		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+		if (bs->rx_buf)
+			*bs->rx_buf++ = val;
+		count--;
+	}
+}
+
+/**
+ * bcm2835_wr_fifo_blind() - blindly write up to @count bytes to TX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes available for writing in TX FIFO
+ */
+static inline void bcm2835_wr_fifo_blind(struct bcm2835_spi *bs, int count)
+{
+	u8 val;
+
+	count = min(count, bs->tx_len);
+	bs->tx_len -= count;
+
+	while (count) {
+		val = bs->tx_buf ? *bs->tx_buf++ : 0;
+		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
+		count--;
+	}
+}
+
 static void bcm2835_spi_reset_hw(struct spi_master *master)
 {
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
@@ -149,6 +280,19 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
 	struct spi_master *master = dev_id;
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/*
+	 * An interrupt is signaled either if DONE is set (TX FIFO empty)
+	 * or if RXR is set (RX FIFO >= ¾ full).
+	 */
+	if (cs & BCM2835_SPI_CS_RXF)
+		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
+	else if (cs & BCM2835_SPI_CS_RXR)
+		bcm2835_rd_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE_3_4);
+
+	if (bs->tx_len && cs & BCM2835_SPI_CS_DONE)
+		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
 
 	/* Read as many bytes as possible from FIFO */
 	bcm2835_rd_fifo(bs);
@@ -168,32 +312,22 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
 					struct spi_device *spi,
 					struct spi_transfer *tfr,
-					u32 cs)
+					u32 cs, bool fifo_empty)
 {
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
 
-	/* fill in fifo if we have gpio-cs
-	 * note that there have been rare events where the native-CS
-	 * flapped for <1us which may change the behaviour
-	 * with gpio-cs this does not happen, so it is implemented
-	 * only for this case
-	 */
-	if (gpio_is_valid(spi->cs_gpio)) {
-		/* enable HW block, but without interrupts enabled
-		 * this would triggern an immediate interrupt
-		 */
-		bcm2835_wr(bs, BCM2835_SPI_CS,
-			   cs | BCM2835_SPI_CS_TA);
-		/* fill in tx fifo as much as possible */
-		bcm2835_wr_fifo(bs);
-	}
-
 	/*
-	 * Enable the HW block. This will immediately trigger a DONE (TX
-	 * empty) interrupt, upon which we will fill the TX FIFO with the
-	 * first TX bytes. Pre-filling the TX FIFO here to avoid the
-	 * interrupt doesn't work:-(
+	 * Enable HW block, but with interrupts still disabled.
+	 * Otherwise the empty TX FIFO would immediately trigger an interrupt.
 	 */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
+
+	/* fill TX FIFO as much as possible */
+	if (fifo_empty)
+		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
+	bcm2835_wr_fifo(bs);
+
+	/* enable interrupts */
 	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
@@ -210,15 +344,162 @@ static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
  * the main one being that DMA transfers are limited to 16 bit
  * (so 0 to 65535 bytes) by the SPI HW due to BCM2835_SPI_DLEN
  *
- * also we currently assume that the scatter-gather fragments are
- * all multiple of 4 (except the last) - otherwise we would need
- * to reset the FIFO before subsequent transfers...
- * this also means that tx/rx transfers sg's need to be of equal size!
- *
  * there may be a few more border-cases we may need to address as well
  * but unfortunately this would mean splitting up the scatter-gather
  * list making it slightly unpractical...
  */
+
+/**
+ * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
+ * @master: SPI master
+ * @tfr: SPI transfer
+ * @bs: BCM2835 SPI controller
+ * @cs: CS register
+ *
+ * A limitation in DMA mode is that the FIFO must be accessed in 4 byte chunks.
+ * Only the final write access is permitted to transmit less than 4 bytes, the
+ * SPI controller deduces its intended size from the DLEN register.
+ *
+ * If a TX or RX sglist contains multiple entries, one per page, and the first
+ * entry starts in the middle of a page, that first entry's length may not be
+ * a multiple of 4.  Subsequent entries are fine because they span an entire
+ * page, hence do have a length that's a multiple of 4.
+ *
+ * This cannot happen with kmalloc'ed buffers (which is what most clients use)
+ * because they are contiguous in physical memory and therefore not split on
+ * page boundaries by spi_map_buf().  But it *can* happen with vmalloc'ed
+ * buffers.
+ *
+ * The DMA engine is incapable of combining sglist entries into a continuous
+ * stream of 4 byte chunks, it treats every entry separately:  A TX entry is
+ * rounded up a to a multiple of 4 bytes by transmitting surplus bytes, an RX
+ * entry is rounded up by throwing away received bytes.
+ *
+ * Overcome this limitation by transferring the first few bytes without DMA:
+ * E.g. if the first TX sglist entry's length is 23 and the first RX's is 42,
+ * write 3 bytes to the TX FIFO but read only 2 bytes from the RX FIFO.
+ * The residue of 1 byte in the RX FIFO is picked up by DMA.  Together with
+ * the rest of the first RX sglist entry it makes up a multiple of 4 bytes.
+ *
+ * Should the RX prologue be larger, say, 3 vis-à-vis a TX prologue of 1,
+ * write 1 + 4 = 5 bytes to the TX FIFO and read 3 bytes from the RX FIFO.
+ * Caution, the additional 4 bytes spill over to the second TX sglist entry
+ * if the length of the first is *exactly* 1.
+ *
+ * At most 6 bytes are written and at most 3 bytes read.  Do we know the
+ * transfer has this many bytes?  Yes, see BCM2835_SPI_DMA_MIN_LENGTH.
+ *
+ * The FIFO is normally accessed with 8-bit width by the CPU and 32-bit width
+ * by the DMA engine.  Toggling the DMA Enable flag in the CS register switches
+ * the width but also garbles the FIFO's contents.  The prologue must therefore
+ * be transmitted in 32-bit width to ensure that the following DMA transfer can
+ * pick up the residue in the RX FIFO in ungarbled form.
+ */
+static void bcm2835_spi_transfer_prologue(struct spi_master *master,
+					  struct spi_transfer *tfr,
+					  struct bcm2835_spi *bs,
+					  u32 cs)
+{
+	int tx_remaining;
+
+	bs->tfr		 = tfr;
+	bs->tx_prologue  = 0;
+	bs->rx_prologue  = 0;
+	bs->tx_spillover = false;
+
+	if (!sg_is_last(&tfr->tx_sg.sgl[0]))
+		bs->tx_prologue = sg_dma_len(&tfr->tx_sg.sgl[0]) & 3;
+
+	if (!sg_is_last(&tfr->rx_sg.sgl[0])) {
+		bs->rx_prologue = sg_dma_len(&tfr->rx_sg.sgl[0]) & 3;
+
+		if (bs->rx_prologue > bs->tx_prologue) {
+			if (sg_is_last(&tfr->tx_sg.sgl[0])) {
+				bs->tx_prologue  = bs->rx_prologue;
+			} else {
+				bs->tx_prologue += 4;
+				bs->tx_spillover =
+					!(sg_dma_len(&tfr->tx_sg.sgl[0]) & ~3);
+			}
+		}
+	}
+
+	/* rx_prologue > 0 implies tx_prologue > 0, so check only the latter */
+	if (!bs->tx_prologue)
+		return;
+
+	/* Write and read RX prologue.  Adjust first entry in RX sglist. */
+	if (bs->rx_prologue) {
+		bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->rx_prologue);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+						  | BCM2835_SPI_CS_DMAEN);
+		bcm2835_wr_fifo_count(bs, bs->rx_prologue);
+		bcm2835_wait_tx_fifo_empty(bs);
+		bcm2835_rd_fifo_count(bs, bs->rx_prologue);
+		bcm2835_spi_reset_hw(master);
+
+		dma_sync_single_for_device(master->dma_rx->device->dev,
+					   sg_dma_address(&tfr->rx_sg.sgl[0]),
+					   bs->rx_prologue, DMA_FROM_DEVICE);
+
+		sg_dma_address(&tfr->rx_sg.sgl[0]) += bs->rx_prologue;
+		sg_dma_len(&tfr->rx_sg.sgl[0])     -= bs->rx_prologue;
+	}
+
+	/*
+	 * Write remaining TX prologue.  Adjust first entry in TX sglist.
+	 * Also adjust second entry if prologue spills over to it.
+	 */
+	tx_remaining = bs->tx_prologue - bs->rx_prologue;
+	if (tx_remaining) {
+		bcm2835_wr(bs, BCM2835_SPI_DLEN, tx_remaining);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+						  | BCM2835_SPI_CS_DMAEN);
+		bcm2835_wr_fifo_count(bs, tx_remaining);
+		bcm2835_wait_tx_fifo_empty(bs);
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_TX);
+	}
+
+	if (likely(!bs->tx_spillover)) {
+		sg_dma_address(&tfr->tx_sg.sgl[0]) += bs->tx_prologue;
+		sg_dma_len(&tfr->tx_sg.sgl[0])     -= bs->tx_prologue;
+	} else {
+		sg_dma_len(&tfr->tx_sg.sgl[0])      = 0;
+		sg_dma_address(&tfr->tx_sg.sgl[1]) += 4;
+		sg_dma_len(&tfr->tx_sg.sgl[1])     -= 4;
+	}
+}
+
+/**
+ * bcm2835_spi_undo_prologue() - reconstruct original sglist state
+ * @bs: BCM2835 SPI controller
+ *
+ * Undo changes which were made to an SPI transfer's sglist when transmitting
+ * the prologue.  This is necessary to ensure the same memory ranges are
+ * unmapped that were originally mapped.
+ */
+static void bcm2835_spi_undo_prologue(struct bcm2835_spi *bs)
+{
+	struct spi_transfer *tfr = bs->tfr;
+
+	if (!bs->tx_prologue)
+		return;
+
+	if (bs->rx_prologue) {
+		sg_dma_address(&tfr->rx_sg.sgl[0]) -= bs->rx_prologue;
+		sg_dma_len(&tfr->rx_sg.sgl[0])     += bs->rx_prologue;
+	}
+
+	if (likely(!bs->tx_spillover)) {
+		sg_dma_address(&tfr->tx_sg.sgl[0]) -= bs->tx_prologue;
+		sg_dma_len(&tfr->tx_sg.sgl[0])     += bs->tx_prologue;
+	} else {
+		sg_dma_len(&tfr->tx_sg.sgl[0])      = bs->tx_prologue - 4;
+		sg_dma_address(&tfr->tx_sg.sgl[1]) -= 4;
+		sg_dma_len(&tfr->tx_sg.sgl[1])     += 4;
+	}
+}
+
 static void bcm2835_spi_dma_done(void *data)
 {
 	struct spi_master *master = data;
@@ -233,7 +514,8 @@ static void bcm2835_spi_dma_done(void *data)
 	 * situation otherwise...
 	 */
 	if (cmpxchg(&bs->dma_pending, true, false)) {
-		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_async(master->dma_tx);
+		bcm2835_spi_undo_prologue(bs);
 	}
 
 	/* and mark as completed */;
@@ -284,20 +566,6 @@ static int bcm2835_spi_prepare_sg(struct spi_master *master,
 	return dma_submit_error(cookie);
 }
 
-static inline int bcm2835_check_sg_length(struct sg_table *sgt)
-{
-	int i;
-	struct scatterlist *sgl;
-
-	/* check that the sg entries are word-sized (except for last) */
-	for_each_sg(sgt->sgl, sgl, (int)sgt->nents - 1, i) {
-		if (sg_dma_len(sgl) % 4)
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
 static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
 					struct spi_device *spi,
 					struct spi_transfer *tfr,
@@ -306,18 +574,16 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
 	int ret;
 
-	/* check that the scatter gather segments are all a multiple of 4 */
-	if (bcm2835_check_sg_length(&tfr->tx_sg) ||
-	    bcm2835_check_sg_length(&tfr->rx_sg)) {
-		dev_warn_once(&spi->dev,
-			      "scatter gather segment length is not a multiple of 4 - falling back to interrupt mode\n");
-		return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
-	}
+	/*
+	 * Transfer first few bytes without DMA if length of first TX or RX
+	 * sglist entry is not a multiple of 4 bytes (hardware limitation).
+	 */
+	bcm2835_spi_transfer_prologue(master, tfr, bs, cs);
 
 	/* setup tx-DMA */
 	ret = bcm2835_spi_prepare_sg(master, tfr, true);
 	if (ret)
-		return ret;
+		goto err_reset_hw;
 
 	/* start TX early */
 	dma_async_issue_pending(master->dma_tx);
@@ -326,7 +592,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
 	bs->dma_pending = 1;
 
 	/* set the DMA length */
-	bcm2835_wr(bs, BCM2835_SPI_DLEN, tfr->len);
+	bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->tx_len);
 
 	/* start the HW */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
@@ -339,10 +605,9 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
 	ret = bcm2835_spi_prepare_sg(master, tfr, false);
 	if (ret) {
 		/* need to reset on errors */
-		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_sync(master->dma_tx);
 		bs->dma_pending = false;
-		bcm2835_spi_reset_hw(master);
-		return ret;
+		goto err_reset_hw;
 	}
 
 	/* start rx dma late */
@@ -350,16 +615,17 @@ static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
 
 	/* wait for wakeup in framework */
 	return 1;
+
+err_reset_hw:
+	bcm2835_spi_reset_hw(master);
+	bcm2835_spi_undo_prologue(bs);
+	return ret;
 }
 
 static bool bcm2835_spi_can_dma(struct spi_master *master,
 				struct spi_device *spi,
 				struct spi_transfer *tfr)
 {
-	/* only run for gpio_cs */
-	if (!gpio_is_valid(spi->cs_gpio))
-		return false;
-
 	/* we start DMA efforts only on bigger transfers */
 	if (tfr->len < BCM2835_SPI_DMA_MIN_LENGTH)
 		return false;
@@ -377,25 +643,6 @@ static bool bcm2835_spi_can_dma(struct spi_master *master,
 		return false;
 	}
 
-	/* if we run rx/tx_buf with word aligned addresses then we are OK */
-	if ((((size_t)tfr->rx_buf & 3) == 0) &&
-	    (((size_t)tfr->tx_buf & 3) == 0))
-		return true;
-
-	/* otherwise we only allow transfers within the same page
-	 * to avoid wasting time on dma_mapping when it is not practical
-	 */
-	if (((size_t)tfr->tx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
-		dev_warn_once(&spi->dev,
-			      "Unaligned spi tx-transfer bridging page\n");
-		return false;
-	}
-	if (((size_t)tfr->rx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
-		dev_warn_once(&spi->dev,
-			      "Unaligned spi rx-transfer bridging page\n");
-		return false;
-	}
-
 	/* return OK */
 	return true;
 }
@@ -403,12 +650,12 @@ static bool bcm2835_spi_can_dma(struct spi_master *master,
 static void bcm2835_dma_release(struct spi_master *master)
 {
 	if (master->dma_tx) {
-		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_sync(master->dma_tx);
 		dma_release_channel(master->dma_tx);
 		master->dma_tx = NULL;
 	}
 	if (master->dma_rx) {
-		dmaengine_terminate_all(master->dma_rx);
+		dmaengine_terminate_sync(master->dma_rx);
 		dma_release_channel(master->dma_rx);
 		master->dma_rx = NULL;
 	}
@@ -491,7 +738,7 @@ static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
 	 * if we are interrupted here, then the data is
 	 * getting transferred by the HW while we are interrupted
 	 */
-	bcm2835_wr_fifo(bs);
+	bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
 
 	/* set the timeout */
 	timeout = jiffies + BCM2835_SPI_POLLING_JIFFIES;
@@ -514,7 +761,7 @@ static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
 					    bs->tx_len, bs->rx_len);
 			/* fall back to interrupt mode */
 			return bcm2835_spi_transfer_one_irq(master, spi,
-							    tfr, cs);
+							    tfr, cs, false);
 		}
 	}
 
@@ -559,12 +806,12 @@ static int bcm2835_spi_transfer_one(struct spi_master *master,
 	else
 		cs &= ~BCM2835_SPI_CS_REN;
 
-	/* for gpio_cs set dummy CS so that no HW-CS get changed
-	 * we can not run this in bcm2835_spi_set_cs, as it does
-	 * not get called for cs_gpio cases, so we need to do it here
+	/*
+	 * The driver always uses software-controlled GPIO Chip Select.
+	 * Set the hardware-controlled native Chip Select to an invalid
+	 * value to prevent it from interfering.
 	 */
-	if (gpio_is_valid(spi->cs_gpio) || (spi->mode & SPI_NO_CS))
-		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+	cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
 
 	/* set transmit buffers and length */
 	bs->tx_buf = tfr->tx_buf;
@@ -588,7 +835,7 @@ static int bcm2835_spi_transfer_one(struct spi_master *master,
 		return bcm2835_spi_transfer_one_dma(master, spi, tfr, cs);
 
 	/* run in interrupt-mode */
-	return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
+	return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs, true);
 }
 
 static int bcm2835_spi_prepare_message(struct spi_master *master,
@@ -617,66 +864,14 @@ static void bcm2835_spi_handle_err(struct spi_master *master,
 
 	/* if an error occurred and we have an active dma, then terminate */
 	if (cmpxchg(&bs->dma_pending, true, false)) {
-		dmaengine_terminate_all(master->dma_tx);
-		dmaengine_terminate_all(master->dma_rx);
+		dmaengine_terminate_sync(master->dma_tx);
+		dmaengine_terminate_sync(master->dma_rx);
+		bcm2835_spi_undo_prologue(bs);
 	}
 	/* and reset */
 	bcm2835_spi_reset_hw(master);
 }
 
-static void bcm2835_spi_set_cs(struct spi_device *spi, bool gpio_level)
-{
-	/*
-	 * we can assume that we are "native" as per spi_set_cs
-	 *   calling us ONLY when cs_gpio is not set
-	 * we can also assume that we are CS < 3 as per bcm2835_spi_setup
-	 *   we would not get called because of error handling there.
-	 * the level passed is the electrical level not enabled/disabled
-	 *   so it has to get translated back to enable/disable
-	 *   see spi_set_cs in spi.c for the implementation
-	 */
-
-	struct spi_master *master = spi->master;
-	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-	bool enable;
-
-	/* calculate the enable flag from the passed gpio_level */
-	enable = (spi->mode & SPI_CS_HIGH) ? gpio_level : !gpio_level;
-
-	/* set flags for "reverse" polarity in the registers */
-	if (spi->mode & SPI_CS_HIGH) {
-		/* set the correct CS-bits */
-		cs |= BCM2835_SPI_CS_CSPOL;
-		cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
-	} else {
-		/* clean the CS-bits */
-		cs &= ~BCM2835_SPI_CS_CSPOL;
-		cs &= ~(BCM2835_SPI_CS_CSPOL0 << spi->chip_select);
-	}
-
-	/* select the correct chip_select depending on disabled/enabled */
-	if (enable) {
-		/* set cs correctly */
-		if (spi->mode & SPI_NO_CS) {
-			/* use the "undefined" chip-select */
-			cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
-		} else {
-			/* set the chip select */
-			cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
-			cs |= spi->chip_select;
-		}
-	} else {
-		/* disable CSPOL which puts HW-CS into deselected state */
-		cs &= ~BCM2835_SPI_CS_CSPOL;
-		/* use the "undefined" chip-select as precaution */
-		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
-	}
-
-	/* finally set the calculated flags in SPI_CS */
-	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
-}
-
 static int chip_match_name(struct gpio_chip *chip, void *data)
 {
 	return !strcmp(chip->label, data);
@@ -748,7 +943,6 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->num_chipselect = 3;
 	master->setup = bcm2835_spi_setup;
-	master->set_cs = bcm2835_spi_set_cs;
 	master->transfer_one = bcm2835_spi_transfer_one;
 	master->handle_err = bcm2835_spi_handle_err;
 	master->prepare_message = bcm2835_spi_prepare_message;
@@ -841,4 +1035,4 @@ module_platform_driver(bcm2835_spi_driver);
 
 MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835");
 MODULE_AUTHOR("Chris Boot <bootc@bootc.net>");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 3094d81..671e374 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -542,4 +542,4 @@ module_platform_driver(bcm2835aux_spi_driver);
 
 MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835 aux");
 MODULE_AUTHOR("Martin Sperl <kernel@martin.sperl.org>");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 3ffb6a40..d0dd781 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/of_platform.h>
+#include <linux/acpi.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 
@@ -243,12 +244,19 @@ static const struct of_device_id dw_spi_mmio_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, dw_spi_mmio_of_match);
 
+static const struct acpi_device_id dw_spi_mmio_acpi_match[] = {
+	{"HISI0173", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, dw_spi_mmio_acpi_match);
+
 static struct platform_driver dw_spi_mmio_driver = {
 	.probe		= dw_spi_mmio_probe,
 	.remove		= dw_spi_mmio_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.of_match_table = dw_spi_mmio_of_match,
+		.acpi_match_table = ACPI_PTR(dw_spi_mmio_acpi_match),
 	},
 };
 module_platform_driver(dw_spi_mmio_driver);
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index b705f2b..2e822a5 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -507,6 +507,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 	master->handle_err = dw_spi_handle_err;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
+	master->dev.fwnode = dev->fwnode;
 	master->flags = SPI_MASTER_GPIO_SS;
 
 	if (dws->set_cs)
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 3082e72..5e10dc5 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1090,8 +1090,8 @@ static int dspi_probe(struct platform_device *pdev)
 		goto out_clk_put;
 	}
 
-	ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt, 0,
-			pdev->name, dspi);
+	ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt,
+			       IRQF_SHARED, pdev->name, dspi);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Unable to attach DSPI interrupt\n");
 		goto out_clk_put;
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 5167097..08dcc3c 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -3,6 +3,7 @@
 // Freescale i.MX7ULP LPSPI driver
 //
 // Copyright 2016 Freescale Semiconductor, Inc.
+// Copyright 2018 NXP Semiconductors
 
 #include <linux/clk.h>
 #include <linux/completion.h>
@@ -54,6 +55,7 @@
 #define IER_RDIE	BIT(1)
 #define IER_TDIE	BIT(0)
 #define CFGR1_PCSCFG	BIT(27)
+#define CFGR1_PINCFG	(BIT(24)|BIT(25))
 #define CFGR1_PCSPOL	BIT(8)
 #define CFGR1_NOSTALL	BIT(3)
 #define CFGR1_MASTER	BIT(0)
@@ -79,6 +81,7 @@ struct fsl_lpspi_data {
 	struct device *dev;
 	void __iomem *base;
 	struct clk *clk;
+	bool is_slave;
 
 	void *rx_buf;
 	const void *tx_buf;
@@ -86,11 +89,14 @@ struct fsl_lpspi_data {
 	void (*rx)(struct fsl_lpspi_data *);
 
 	u32 remain;
+	u8 watermark;
 	u8 txfifosize;
 	u8 rxfifosize;
 
 	struct lpspi_config config;
 	struct completion xfer_done;
+
+	bool slave_aborted;
 };
 
 static const struct of_device_id fsl_lpspi_dt_ids[] = {
@@ -137,16 +143,18 @@ static void fsl_lpspi_intctrl(struct fsl_lpspi_data *fsl_lpspi,
 	writel(enable, fsl_lpspi->base + IMX7ULP_IER);
 }
 
-static int lpspi_prepare_xfer_hardware(struct spi_master *master)
+static int lpspi_prepare_xfer_hardware(struct spi_controller *controller)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 
 	return clk_prepare_enable(fsl_lpspi->clk);
 }
 
-static int lpspi_unprepare_xfer_hardware(struct spi_master *master)
+static int lpspi_unprepare_xfer_hardware(struct spi_controller *controller)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 
 	clk_disable_unprepare(fsl_lpspi->clk);
 
@@ -203,21 +211,22 @@ static void fsl_lpspi_set_cmd(struct fsl_lpspi_data *fsl_lpspi,
 	u32 temp = 0;
 
 	temp |= fsl_lpspi->config.bpw - 1;
-	temp |= fsl_lpspi->config.prescale << 27;
 	temp |= (fsl_lpspi->config.mode & 0x3) << 30;
-	temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
+	if (!fsl_lpspi->is_slave) {
+		temp |= fsl_lpspi->config.prescale << 27;
+		temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
 
-	/*
-	 * Set TCR_CONT will keep SS asserted after current transfer.
-	 * For the first transfer, clear TCR_CONTC to assert SS.
-	 * For subsequent transfer, set TCR_CONTC to keep SS asserted.
-	 */
-	temp |= TCR_CONT;
-	if (is_first_xfer)
-		temp &= ~TCR_CONTC;
-	else
-		temp |= TCR_CONTC;
-
+		/*
+		 * Set TCR_CONT will keep SS asserted after current transfer.
+		 * For the first transfer, clear TCR_CONTC to assert SS.
+		 * For subsequent transfer, set TCR_CONTC to keep SS asserted.
+		 */
+		temp |= TCR_CONT;
+		if (is_first_xfer)
+			temp &= ~TCR_CONTC;
+		else
+			temp |= TCR_CONTC;
+	}
 	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
 
 	dev_dbg(fsl_lpspi->dev, "TCR=0x%x\n", temp);
@@ -227,7 +236,7 @@ static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi)
 {
 	u32 temp;
 
-	temp = fsl_lpspi->txfifosize >> 1 | (fsl_lpspi->rxfifosize >> 1) << 16;
+	temp = fsl_lpspi->watermark >> 1 | (fsl_lpspi->watermark >> 1) << 16;
 
 	writel(temp, fsl_lpspi->base + IMX7ULP_FCR);
 
@@ -253,7 +262,8 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi)
 	if (prescale == 8 && scldiv >= 256)
 		return -EINVAL;
 
-	writel(scldiv, fsl_lpspi->base + IMX7ULP_CCR);
+	writel(scldiv | (scldiv << 8) | ((scldiv >> 1) << 16),
+					fsl_lpspi->base + IMX7ULP_CCR);
 
 	dev_dbg(fsl_lpspi->dev, "perclk=%d, speed=%d, prescale =%d, scldiv=%d\n",
 		perclk_rate, config.speed_hz, prescale, scldiv);
@@ -270,13 +280,18 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
 	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
 	writel(0, fsl_lpspi->base + IMX7ULP_CR);
 
-	ret = fsl_lpspi_set_bitrate(fsl_lpspi);
-	if (ret)
-		return ret;
+	if (!fsl_lpspi->is_slave) {
+		ret = fsl_lpspi_set_bitrate(fsl_lpspi);
+		if (ret)
+			return ret;
+	}
 
 	fsl_lpspi_set_watermark(fsl_lpspi);
 
-	temp = CFGR1_PCSCFG | CFGR1_MASTER;
+	if (!fsl_lpspi->is_slave)
+		temp = CFGR1_MASTER;
+	else
+		temp = CFGR1_PINCFG;
 	if (fsl_lpspi->config.mode & SPI_CS_HIGH)
 		temp |= CFGR1_PCSPOL;
 	writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1);
@@ -291,7 +306,8 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
 static void fsl_lpspi_setup_transfer(struct spi_device *spi,
 				     struct spi_transfer *t)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(spi->master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(spi->controller);
 
 	fsl_lpspi->config.mode = spi->mode;
 	fsl_lpspi->config.bpw = t ? t->bits_per_word : spi->bits_per_word;
@@ -315,14 +331,51 @@ static void fsl_lpspi_setup_transfer(struct spi_device *spi,
 		fsl_lpspi->tx = fsl_lpspi_buf_tx_u32;
 	}
 
+	if (t->len <= fsl_lpspi->txfifosize)
+		fsl_lpspi->watermark = t->len;
+	else
+		fsl_lpspi->watermark = fsl_lpspi->txfifosize;
+
 	fsl_lpspi_config(fsl_lpspi);
 }
 
-static int fsl_lpspi_transfer_one(struct spi_master *master,
+static int fsl_lpspi_slave_abort(struct spi_controller *controller)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+
+	fsl_lpspi->slave_aborted = true;
+	complete(&fsl_lpspi->xfer_done);
+	return 0;
+}
+
+static int fsl_lpspi_wait_for_completion(struct spi_controller *controller)
+{
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
+
+	if (fsl_lpspi->is_slave) {
+		if (wait_for_completion_interruptible(&fsl_lpspi->xfer_done) ||
+			fsl_lpspi->slave_aborted) {
+			dev_dbg(fsl_lpspi->dev, "interrupted\n");
+			return -EINTR;
+		}
+	} else {
+		if (!wait_for_completion_timeout(&fsl_lpspi->xfer_done, HZ)) {
+			dev_dbg(fsl_lpspi->dev, "wait for completion timeout\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+static int fsl_lpspi_transfer_one(struct spi_controller *controller,
 				  struct spi_device *spi,
 				  struct spi_transfer *t)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 	int ret;
 
 	fsl_lpspi->tx_buf = t->tx_buf;
@@ -330,13 +383,13 @@ static int fsl_lpspi_transfer_one(struct spi_master *master,
 	fsl_lpspi->remain = t->len;
 
 	reinit_completion(&fsl_lpspi->xfer_done);
+	fsl_lpspi->slave_aborted = false;
+
 	fsl_lpspi_write_tx_fifo(fsl_lpspi);
 
-	ret = wait_for_completion_timeout(&fsl_lpspi->xfer_done, HZ);
-	if (!ret) {
-		dev_dbg(fsl_lpspi->dev, "wait for completion timeout\n");
-		return -ETIMEDOUT;
-	}
+	ret = fsl_lpspi_wait_for_completion(controller);
+	if (ret)
+		return ret;
 
 	ret = fsl_lpspi_txfifo_empty(fsl_lpspi);
 	if (ret)
@@ -347,10 +400,11 @@ static int fsl_lpspi_transfer_one(struct spi_master *master,
 	return 0;
 }
 
-static int fsl_lpspi_transfer_one_msg(struct spi_master *master,
+static int fsl_lpspi_transfer_one_msg(struct spi_controller *controller,
 				      struct spi_message *msg)
 {
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 	struct spi_device *spi = msg->spi;
 	struct spi_transfer *xfer;
 	bool is_first_xfer = true;
@@ -366,7 +420,7 @@ static int fsl_lpspi_transfer_one_msg(struct spi_master *master,
 
 		is_first_xfer = false;
 
-		ret = fsl_lpspi_transfer_one(master, spi, xfer);
+		ret = fsl_lpspi_transfer_one(controller, spi, xfer);
 		if (ret < 0)
 			goto complete;
 
@@ -374,13 +428,15 @@ static int fsl_lpspi_transfer_one_msg(struct spi_master *master,
 	}
 
 complete:
-	/* de-assert SS, then finalize current message */
-	temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
-	temp &= ~TCR_CONTC;
-	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
+	if (!fsl_lpspi->is_slave) {
+		/* de-assert SS, then finalize current message */
+		temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
+		temp &= ~TCR_CONTC;
+		writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
+	}
 
 	msg->status = ret;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(controller);
 
 	return ret;
 }
@@ -410,30 +466,39 @@ static irqreturn_t fsl_lpspi_isr(int irq, void *dev_id)
 static int fsl_lpspi_probe(struct platform_device *pdev)
 {
 	struct fsl_lpspi_data *fsl_lpspi;
-	struct spi_master *master;
+	struct spi_controller *controller;
 	struct resource *res;
 	int ret, irq;
 	u32 temp;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_lpspi_data));
-	if (!master)
+	if (of_property_read_bool((&pdev->dev)->of_node, "spi-slave"))
+		controller = spi_alloc_slave(&pdev->dev,
+					sizeof(struct fsl_lpspi_data));
+	else
+		controller = spi_alloc_master(&pdev->dev,
+					sizeof(struct fsl_lpspi_data));
+
+	if (!controller)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, controller);
 
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
-	master->bus_num = pdev->id;
+	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
+	controller->bus_num = pdev->id;
 
-	fsl_lpspi = spi_master_get_devdata(master);
+	fsl_lpspi = spi_controller_get_devdata(controller);
 	fsl_lpspi->dev = &pdev->dev;
+	fsl_lpspi->is_slave = of_property_read_bool((&pdev->dev)->of_node,
+						    "spi-slave");
 
-	master->transfer_one_message = fsl_lpspi_transfer_one_msg;
-	master->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
-	master->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = pdev->id;
+	controller->transfer_one_message = fsl_lpspi_transfer_one_msg;
+	controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
+	controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	controller->dev.of_node = pdev->dev.of_node;
+	controller->bus_num = pdev->id;
+	controller->slave_abort = fsl_lpspi_slave_abort;
 
 	init_completion(&fsl_lpspi->xfer_done);
 
@@ -441,32 +506,32 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	fsl_lpspi->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(fsl_lpspi->base)) {
 		ret = PTR_ERR(fsl_lpspi->base);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		ret = irq;
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
 			       dev_name(&pdev->dev), fsl_lpspi);
 	if (ret) {
 		dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	fsl_lpspi->clk = devm_clk_get(&pdev->dev, "ipg");
 	if (IS_ERR(fsl_lpspi->clk)) {
 		ret = PTR_ERR(fsl_lpspi->clk);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	ret = clk_prepare_enable(fsl_lpspi->clk);
 	if (ret) {
 		dev_err(&pdev->dev, "can't enable lpspi clock, ret=%d\n", ret);
-		goto out_master_put;
+		goto out_controller_put;
 	}
 
 	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
@@ -475,24 +540,25 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 
 	clk_disable_unprepare(fsl_lpspi->clk);
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, controller);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master error.\n");
-		goto out_master_put;
+		dev_err(&pdev->dev, "spi_register_controller error.\n");
+		goto out_controller_put;
 	}
 
 	return 0;
 
-out_master_put:
-	spi_master_put(master);
+out_controller_put:
+	spi_controller_put(controller);
 
 	return ret;
 }
 
 static int fsl_lpspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct spi_controller *controller = platform_get_drvdata(pdev);
+	struct fsl_lpspi_data *fsl_lpspi =
+				spi_controller_get_devdata(controller);
 
 	clk_disable_unprepare(fsl_lpspi->clk);
 
@@ -509,6 +575,6 @@ static struct platform_driver fsl_lpspi_driver = {
 };
 module_platform_driver(fsl_lpspi_driver);
 
-MODULE_DESCRIPTION("LPSPI Master Controller driver");
+MODULE_DESCRIPTION("LPSPI Controller driver");
 MODULE_AUTHOR("Gao Pan <pandy.gao@nxp.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 6432ecc..fdb7cb88 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -64,15 +64,13 @@
 #define TIMESTAMP_AFTER		BIT(3)
 #define POST_CMD_DELAY		BIT(4)
 
-/* SPI M_COMMAND OPCODE */
-enum spi_mcmd_code {
+enum spi_m_cmd_opcode {
 	CMD_NONE,
 	CMD_XFER,
 	CMD_CS,
 	CMD_CANCEL,
 };
 
-
 struct spi_geni_master {
 	struct geni_se se;
 	struct device *dev;
@@ -87,7 +85,7 @@ struct spi_geni_master {
 	struct completion xfer_done;
 	unsigned int oversampling;
 	spinlock_t lock;
-	unsigned int cur_mcmd;
+	enum spi_m_cmd_opcode cur_mcmd;
 	int irq;
 };
 
@@ -129,7 +127,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 	struct spi_geni_master *mas = spi_master_get_devdata(slv->master);
 	struct spi_master *spi = dev_get_drvdata(mas->dev);
 	struct geni_se *se = &mas->se;
-	unsigned long timeout;
+	unsigned long time_left;
 
 	reinit_completion(&mas->xfer_done);
 	pm_runtime_get_sync(mas->dev);
@@ -142,8 +140,8 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 	else
 		geni_se_setup_m_cmd(se, SPI_CS_DEASSERT, 0);
 
-	timeout = wait_for_completion_timeout(&mas->xfer_done, HZ);
-	if (!timeout)
+	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	if (!time_left)
 		handle_fifo_timeout(spi, NULL);
 
 	pm_runtime_put(mas->dev);
@@ -485,7 +483,6 @@ static irqreturn_t geni_spi_isr(int irq, void *data)
 	struct geni_se *se = &mas->se;
 	u32 m_irq;
 	unsigned long flags;
-	irqreturn_t ret = IRQ_HANDLED;
 
 	if (mas->cur_mcmd == CMD_NONE)
 		return IRQ_NONE;
@@ -533,16 +530,35 @@ static irqreturn_t geni_spi_isr(int irq, void *data)
 
 	writel(m_irq, se->base + SE_GENI_M_IRQ_CLEAR);
 	spin_unlock_irqrestore(&mas->lock, flags);
-	return ret;
+	return IRQ_HANDLED;
 }
 
 static int spi_geni_probe(struct platform_device *pdev)
 {
-	int ret;
+	int ret, irq;
 	struct spi_master *spi;
 	struct spi_geni_master *mas;
 	struct resource *res;
-	struct geni_se *se;
+	void __iomem *base;
+	struct clk *clk;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Err getting IRQ %d\n", irq);
+		return irq;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk = devm_clk_get(&pdev->dev, "se");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Err getting SE Core clk %ld\n",
+						PTR_ERR(clk));
+		return PTR_ERR(clk);
+	}
 
 	spi = spi_alloc_master(&pdev->dev, sizeof(*mas));
 	if (!spi)
@@ -550,27 +566,15 @@ static int spi_geni_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, spi);
 	mas = spi_master_get_devdata(spi);
+	mas->irq = irq;
 	mas->dev = &pdev->dev;
 	mas->se.dev = &pdev->dev;
 	mas->se.wrapper = dev_get_drvdata(pdev->dev.parent);
-	se = &mas->se;
+	mas->se.base = base;
+	mas->se.clk = clk;
 
 	spi->bus_num = -1;
 	spi->dev.of_node = pdev->dev.of_node;
-	mas->se.clk = devm_clk_get(&pdev->dev, "se");
-	if (IS_ERR(mas->se.clk)) {
-		ret = PTR_ERR(mas->se.clk);
-		dev_err(&pdev->dev, "Err getting SE Core clk %d\n", ret);
-		goto spi_geni_probe_err;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	se->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(se->base)) {
-		ret = PTR_ERR(se->base);
-		goto spi_geni_probe_err;
-	}
-
 	spi->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_CS_HIGH;
 	spi->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	spi->num_chipselect = 4;
@@ -589,13 +593,6 @@ static int spi_geni_probe(struct platform_device *pdev)
 	if (ret)
 		goto spi_geni_probe_runtime_disable;
 
-	mas->irq = platform_get_irq(pdev, 0);
-	if (mas->irq < 0) {
-		ret = mas->irq;
-		dev_err(&pdev->dev, "Err getting IRQ %d\n", ret);
-		goto spi_geni_probe_runtime_disable;
-	}
-
 	ret = request_irq(mas->irq, geni_spi_isr,
 			IRQF_TRIGGER_HIGH, "spi_geni", spi);
 	if (ret)
@@ -610,7 +607,6 @@ static int spi_geni_probe(struct platform_device *pdev)
 	free_irq(mas->irq, spi);
 spi_geni_probe_runtime_disable:
 	pm_runtime_disable(&pdev->dev);
-spi_geni_probe_err:
 	spi_master_put(spi);
 	return ret;
 }
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 45973ee..a4aee26 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -256,11 +256,29 @@ static int spi_gpio_setup(struct spi_device *spi)
 static int spi_gpio_set_direction(struct spi_device *spi, bool output)
 {
 	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
+	int ret;
 
 	if (output)
 		return gpiod_direction_output(spi_gpio->mosi, 1);
-	else
-		return gpiod_direction_input(spi_gpio->mosi);
+
+	ret = gpiod_direction_input(spi_gpio->mosi);
+	if (ret)
+		return ret;
+	/*
+	 * Send a turnaround high impedance cycle when switching
+	 * from output to input. Theoretically there should be
+	 * a clock delay here, but as has been noted above, the
+	 * nsec delay function for bit-banged GPIO is simply
+	 * {} because bit-banging just doesn't get fast enough
+	 * anyway.
+	 */
+	if (spi->mode & SPI_3WIRE_HIZ) {
+		gpiod_set_value_cansleep(spi_gpio->sck,
+					 !(spi->mode & SPI_CPOL));
+		gpiod_set_value_cansleep(spi_gpio->sck,
+					 !!(spi->mode & SPI_CPOL));
+	}
+	return 0;
 }
 
 static void spi_gpio_cleanup(struct spi_device *spi)
@@ -410,7 +428,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 		return status;
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-	master->mode_bits = SPI_3WIRE | SPI_CPHA | SPI_CPOL;
+	master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL;
 	master->flags = master_flags;
 	master->bus_num = pdev->id;
 	/* The master needs to think there is a chipselect even if not connected */
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index dd1ce12..6ec647b 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -39,8 +39,8 @@
 #define MXC_INT_TE	(1 << 1) /* Transmit FIFO empty interrupt */
 #define MXC_INT_RDR	BIT(4) /* Receive date threshold interrupt */
 
-/* The maximum  bytes that a sdma BD can transfer.*/
-#define MAX_SDMA_BD_BYTES  (1 << 15)
+/* The maximum bytes that a sdma BD can transfer. */
+#define MAX_SDMA_BD_BYTES (1 << 15)
 #define MX51_ECSPI_CTRL_MAX_BURST	512
 /* The maximum bytes that IMX53_ECSPI can transfer in slave mode.*/
 #define MX53_MAX_TRANSFER_BYTES		512
@@ -59,7 +59,9 @@ struct spi_imx_data;
 
 struct spi_imx_devtype_data {
 	void (*intctrl)(struct spi_imx_data *, int);
-	int (*config)(struct spi_device *);
+	int (*prepare_message)(struct spi_imx_data *, struct spi_message *);
+	int (*prepare_transfer)(struct spi_imx_data *, struct spi_device *,
+				struct spi_transfer *);
 	void (*trigger)(struct spi_imx_data *);
 	int (*rx_available)(struct spi_imx_data *);
 	void (*reset)(struct spi_imx_data *);
@@ -85,7 +87,6 @@ struct spi_imx_data {
 	unsigned long spi_clk;
 	unsigned int spi_bus_clk;
 
-	unsigned int speed_hz;
 	unsigned int bits_per_word;
 	unsigned int spi_drctl;
 
@@ -256,7 +257,7 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
 #define MX51_ECSPI_INT_RREN		(1 <<  3)
 #define MX51_ECSPI_INT_RDREN		(1 <<  4)
 
-#define MX51_ECSPI_DMA      0x14
+#define MX51_ECSPI_DMA		0x14
 #define MX51_ECSPI_DMA_TX_WML(wml)	((wml) & 0x3f)
 #define MX51_ECSPI_DMA_RX_WML(wml)	(((wml) & 0x3f) << 16)
 #define MX51_ECSPI_DMA_RXT_WML(wml)	(((wml) & 0x3f) << 24)
@@ -486,11 +487,12 @@ static void mx51_ecspi_disable(struct spi_imx_data *spi_imx)
 	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
 }
 
-static int mx51_ecspi_config(struct spi_device *spi)
+static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
+				      struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	struct spi_device *spi = msg->spi;
 	u32 ctrl = MX51_ECSPI_CTRL_ENABLE;
-	u32 clk = spi_imx->speed_hz, delay, reg;
+	u32 testreg;
 	u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
 
 	/* set Master or Slave mode */
@@ -505,19 +507,21 @@ static int mx51_ecspi_config(struct spi_device *spi)
 	if (spi->mode & SPI_READY)
 		ctrl |= MX51_ECSPI_CTRL_DRCTL(spi_imx->spi_drctl);
 
-	/* set clock speed */
-	ctrl |= mx51_ecspi_clkdiv(spi_imx, spi_imx->speed_hz, &clk);
-	spi_imx->spi_bus_clk = clk;
-
 	/* set chip select to use */
 	ctrl |= MX51_ECSPI_CTRL_CS(spi->chip_select);
 
-	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
-		ctrl |= (spi_imx->slave_burst * 8 - 1)
-			<< MX51_ECSPI_CTRL_BL_OFFSET;
+	/*
+	 * The ctrl register must be written first, with the EN bit set other
+	 * registers must not be written to.
+	 */
+	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
+
+	testreg = readl(spi_imx->base + MX51_ECSPI_TESTREG);
+	if (spi->mode & SPI_LOOP)
+		testreg |= MX51_ECSPI_TESTREG_LBC;
 	else
-		ctrl |= (spi_imx->bits_per_word - 1)
-			<< MX51_ECSPI_CTRL_BL_OFFSET;
+		testreg &= ~MX51_ECSPI_TESTREG_LBC;
+	writel(testreg, spi_imx->base + MX51_ECSPI_TESTREG);
 
 	/*
 	 * eCSPI burst completion by Chip Select signal in Slave mode
@@ -541,26 +545,44 @@ static int mx51_ecspi_config(struct spi_device *spi)
 		cfg &= ~MX51_ECSPI_CONFIG_SCLKPOL(spi->chip_select);
 		cfg &= ~MX51_ECSPI_CONFIG_SCLKCTL(spi->chip_select);
 	}
+
 	if (spi->mode & SPI_CS_HIGH)
 		cfg |= MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select);
 	else
 		cfg &= ~MX51_ECSPI_CONFIG_SSBPOL(spi->chip_select);
 
+	writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
+
+	return 0;
+}
+
+static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
+				       struct spi_device *spi,
+				       struct spi_transfer *t)
+{
+	u32 ctrl = readl(spi_imx->base + MX51_ECSPI_CTRL);
+	u32 clk = t->speed_hz, delay;
+
+	/* Clear BL field and set the right value */
+	ctrl &= ~MX51_ECSPI_CTRL_BL_MASK;
+	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
+		ctrl |= (spi_imx->slave_burst * 8 - 1)
+			<< MX51_ECSPI_CTRL_BL_OFFSET;
+	else
+		ctrl |= (spi_imx->bits_per_word - 1)
+			<< MX51_ECSPI_CTRL_BL_OFFSET;
+
+	/* set clock speed */
+	ctrl &= ~(0xf << MX51_ECSPI_CTRL_POSTDIV_OFFSET |
+		  0xf << MX51_ECSPI_CTRL_PREDIV_OFFSET);
+	ctrl |= mx51_ecspi_clkdiv(spi_imx, t->speed_hz, &clk);
+	spi_imx->spi_bus_clk = clk;
+
 	if (spi_imx->usedma)
 		ctrl |= MX51_ECSPI_CTRL_SMC;
 
-	/* CTRL register always go first to bring out controller from reset */
 	writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL);
 
-	reg = readl(spi_imx->base + MX51_ECSPI_TESTREG);
-	if (spi->mode & SPI_LOOP)
-		reg |= MX51_ECSPI_TESTREG_LBC;
-	else
-		reg &= ~MX51_ECSPI_TESTREG_LBC;
-	writel(reg, spi_imx->base + MX51_ECSPI_TESTREG);
-
-	writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG);
-
 	/*
 	 * Wait until the changes in the configuration register CONFIGREG
 	 * propagate into the hardware. It takes exactly one tick of the
@@ -587,7 +609,6 @@ static void mx51_setup_wml(struct spi_imx_data *spi_imx)
 	 * Configure the DMA register: setup the watermark
 	 * and enable DMA request.
 	 */
-
 	writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml - 1) |
 		MX51_ECSPI_DMA_TX_WML(spi_imx->wml) |
 		MX51_ECSPI_DMA_RXT_WML(spi_imx->wml) |
@@ -659,13 +680,20 @@ static void mx31_trigger(struct spi_imx_data *spi_imx)
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx31_config(struct spi_device *spi)
+static int mx31_prepare_message(struct spi_imx_data *spi_imx,
+				struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx31_prepare_transfer(struct spi_imx_data *spi_imx,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
 	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_MASTER;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->speed_hz, &clk) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, t->speed_hz, &clk) <<
 		MX31_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -700,8 +728,10 @@ static int mx31_config(struct spi_device *spi)
 	writel(reg, spi_imx->base + MX31_CSPI_TESTREG);
 
 	if (spi_imx->usedma) {
-		/* configure DMA requests when RXFIFO is half full and
-		   when TXFIFO is half empty */
+		/*
+		 * configure DMA requests when RXFIFO is half full and
+		 * when TXFIFO is half empty
+		 */
 		writel(MX31_DMAREG_RH_DEN | MX31_DMAREG_TH_DEN,
 			spi_imx->base + MX31_CSPI_DMAREG);
 	}
@@ -755,14 +785,21 @@ static void mx21_trigger(struct spi_imx_data *spi_imx)
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx21_config(struct spi_device *spi)
+static int mx21_prepare_message(struct spi_imx_data *spi_imx,
+				struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx21_prepare_transfer(struct spi_imx_data *spi_imx,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
 	unsigned int reg = MX21_CSPICTRL_ENABLE | MX21_CSPICTRL_MASTER;
 	unsigned int max = is_imx27_cspi(spi_imx) ? 16 : 18;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, spi_imx->speed_hz, max, &clk)
+	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, t->speed_hz, max, &clk)
 		<< MX21_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -824,13 +861,20 @@ static void mx1_trigger(struct spi_imx_data *spi_imx)
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 }
 
-static int mx1_config(struct spi_device *spi)
+static int mx1_prepare_message(struct spi_imx_data *spi_imx,
+			       struct spi_message *msg)
 {
-	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
+	return 0;
+}
+
+static int mx1_prepare_transfer(struct spi_imx_data *spi_imx,
+				struct spi_device *spi,
+				struct spi_transfer *t)
+{
 	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_MASTER;
 	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->speed_hz, &clk) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, t->speed_hz, &clk) <<
 		MX1_CSPICTRL_DR_SHIFT;
 	spi_imx->spi_bus_clk = clk;
 
@@ -858,7 +902,8 @@ static void mx1_reset(struct spi_imx_data *spi_imx)
 
 static struct spi_imx_devtype_data imx1_cspi_devtype_data = {
 	.intctrl = mx1_intctrl,
-	.config = mx1_config,
+	.prepare_message = mx1_prepare_message,
+	.prepare_transfer = mx1_prepare_transfer,
 	.trigger = mx1_trigger,
 	.rx_available = mx1_rx_available,
 	.reset = mx1_reset,
@@ -871,7 +916,8 @@ static struct spi_imx_devtype_data imx1_cspi_devtype_data = {
 
 static struct spi_imx_devtype_data imx21_cspi_devtype_data = {
 	.intctrl = mx21_intctrl,
-	.config = mx21_config,
+	.prepare_message = mx21_prepare_message,
+	.prepare_transfer = mx21_prepare_transfer,
 	.trigger = mx21_trigger,
 	.rx_available = mx21_rx_available,
 	.reset = mx21_reset,
@@ -885,7 +931,8 @@ static struct spi_imx_devtype_data imx21_cspi_devtype_data = {
 static struct spi_imx_devtype_data imx27_cspi_devtype_data = {
 	/* i.mx27 cspi shares the functions with i.mx21 one */
 	.intctrl = mx21_intctrl,
-	.config = mx21_config,
+	.prepare_message = mx21_prepare_message,
+	.prepare_transfer = mx21_prepare_transfer,
 	.trigger = mx21_trigger,
 	.rx_available = mx21_rx_available,
 	.reset = mx21_reset,
@@ -898,7 +945,8 @@ static struct spi_imx_devtype_data imx27_cspi_devtype_data = {
 
 static struct spi_imx_devtype_data imx31_cspi_devtype_data = {
 	.intctrl = mx31_intctrl,
-	.config = mx31_config,
+	.prepare_message = mx31_prepare_message,
+	.prepare_transfer = mx31_prepare_transfer,
 	.trigger = mx31_trigger,
 	.rx_available = mx31_rx_available,
 	.reset = mx31_reset,
@@ -912,7 +960,8 @@ static struct spi_imx_devtype_data imx31_cspi_devtype_data = {
 static struct spi_imx_devtype_data imx35_cspi_devtype_data = {
 	/* i.mx35 and later cspi shares the functions with i.mx31 one */
 	.intctrl = mx31_intctrl,
-	.config = mx31_config,
+	.prepare_message = mx31_prepare_message,
+	.prepare_transfer = mx31_prepare_transfer,
 	.trigger = mx31_trigger,
 	.rx_available = mx31_rx_available,
 	.reset = mx31_reset,
@@ -925,7 +974,8 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = {
 
 static struct spi_imx_devtype_data imx51_ecspi_devtype_data = {
 	.intctrl = mx51_ecspi_intctrl,
-	.config = mx51_ecspi_config,
+	.prepare_message = mx51_ecspi_prepare_message,
+	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
@@ -940,7 +990,8 @@ static struct spi_imx_devtype_data imx51_ecspi_devtype_data = {
 
 static struct spi_imx_devtype_data imx53_ecspi_devtype_data = {
 	.intctrl = mx51_ecspi_intctrl,
-	.config = mx51_ecspi_config,
+	.prepare_message = mx51_ecspi_prepare_message,
+	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
@@ -1048,7 +1099,7 @@ static void spi_imx_push(struct spi_imx_data *spi_imx)
 		if (!spi_imx->count)
 			break;
 		if (spi_imx->dynamic_burst &&
-		    spi_imx->txfifo >=  DIV_ROUND_UP(spi_imx->remainder,
+		    spi_imx->txfifo >= DIV_ROUND_UP(spi_imx->remainder,
 						     fifo_words))
 			break;
 		spi_imx->tx(spi_imx);
@@ -1142,7 +1193,6 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 		return 0;
 
 	spi_imx->bits_per_word = t->bits_per_word;
-	spi_imx->speed_hz  = t->speed_hz;
 
 	/*
 	 * Initialize the functions for transfer. To transfer non byte-aligned
@@ -1183,7 +1233,7 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 		spi_imx->slave_burst = t->len;
 	}
 
-	spi_imx->devtype_data->config(spi);
+	spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t);
 
 	return 0;
 }
@@ -1492,7 +1542,13 @@ spi_imx_prepare_message(struct spi_master *master, struct spi_message *msg)
 		return ret;
 	}
 
-	return 0;
+	ret = spi_imx->devtype_data->prepare_message(spi_imx, msg);
+	if (ret) {
+		clk_disable(spi_imx->clk_ipg);
+		clk_disable(spi_imx->clk_per);
+	}
+
+	return ret;
 }
 
 static int
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 62a7b80..5e15d62 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -12,7 +12,7 @@
 
 #include "internals.h"
 
-#define SPI_MEM_MAX_BUSWIDTH		4
+#define SPI_MEM_MAX_BUSWIDTH		8
 
 /**
  * spi_controller_dma_map_mem_op_data() - DMA-map the buffer attached to a
@@ -121,6 +121,13 @@ static int spi_check_buswidth_req(struct spi_mem *mem, u8 buswidth, bool tx)
 
 		break;
 
+	case 8:
+		if ((tx && (mode & SPI_TX_OCTAL)) ||
+		    (!tx && (mode & SPI_RX_OCTAL)))
+			return 0;
+
+		break;
+
 	default:
 		break;
 	}
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 0c2867de..0cce6f0 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -120,6 +120,12 @@ static const struct mtk_spi_compatible mt8173_compat = {
 	.must_tx = true,
 };
 
+static const struct mtk_spi_compatible mt8183_compat = {
+	.need_pad_sel = true,
+	.must_tx = true,
+	.enhance_timing = true,
+};
+
 /*
  * A piece of default chip info unless the platform
  * supplies it.
@@ -144,12 +150,18 @@ static const struct of_device_id mtk_spi_of_match[] = {
 	{ .compatible = "mediatek,mt7622-spi",
 		.data = (void *)&mt7622_compat,
 	},
+	{ .compatible = "mediatek,mt7629-spi",
+		.data = (void *)&mt7622_compat,
+	},
 	{ .compatible = "mediatek,mt8135-spi",
 		.data = (void *)&mtk_common_compat,
 	},
 	{ .compatible = "mediatek,mt8173-spi",
 		.data = (void *)&mt8173_compat,
 	},
+	{ .compatible = "mediatek,mt8183-spi",
+		.data = (void *)&mt8183_compat,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mtk_spi_of_match);
diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
new file mode 100644
index 0000000..e41ae6e
--- /dev/null
+++ b/drivers/spi/spi-mxic.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2018 Macronix International Co., Ltd.
+//
+// Authors:
+//	Mason Yang <masonccyang@mxic.com.tw>
+//	zhengxunli <zhengxunli@mxic.com.tw>
+//	Boris Brezillon <boris.brezillon@bootlin.com>
+//
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#define HC_CFG			0x0
+#define HC_CFG_IF_CFG(x)	((x) << 27)
+#define HC_CFG_DUAL_SLAVE	BIT(31)
+#define HC_CFG_INDIVIDUAL	BIT(30)
+#define HC_CFG_NIO(x)		(((x) / 4) << 27)
+#define HC_CFG_TYPE(s, t)	((t) << (23 + ((s) * 2)))
+#define HC_CFG_TYPE_SPI_NOR	0
+#define HC_CFG_TYPE_SPI_NAND	1
+#define HC_CFG_TYPE_SPI_RAM	2
+#define HC_CFG_TYPE_RAW_NAND	3
+#define HC_CFG_SLV_ACT(x)	((x) << 21)
+#define HC_CFG_CLK_PH_EN	BIT(20)
+#define HC_CFG_CLK_POL_INV	BIT(19)
+#define HC_CFG_BIG_ENDIAN	BIT(18)
+#define HC_CFG_DATA_PASS	BIT(17)
+#define HC_CFG_IDLE_SIO_LVL(x)	((x) << 16)
+#define HC_CFG_MAN_START_EN	BIT(3)
+#define HC_CFG_MAN_START	BIT(2)
+#define HC_CFG_MAN_CS_EN	BIT(1)
+#define HC_CFG_MAN_CS_ASSERT	BIT(0)
+
+#define INT_STS			0x4
+#define INT_STS_EN		0x8
+#define INT_SIG_EN		0xc
+#define INT_STS_ALL		GENMASK(31, 0)
+#define INT_RDY_PIN		BIT(26)
+#define INT_RDY_SR		BIT(25)
+#define INT_LNR_SUSP		BIT(24)
+#define INT_ECC_ERR		BIT(17)
+#define INT_CRC_ERR		BIT(16)
+#define INT_LWR_DIS		BIT(12)
+#define INT_LRD_DIS		BIT(11)
+#define INT_SDMA_INT		BIT(10)
+#define INT_DMA_FINISH		BIT(9)
+#define INT_RX_NOT_FULL		BIT(3)
+#define INT_RX_NOT_EMPTY	BIT(2)
+#define INT_TX_NOT_FULL		BIT(1)
+#define INT_TX_EMPTY		BIT(0)
+
+#define HC_EN			0x10
+#define HC_EN_BIT		BIT(0)
+
+#define TXD(x)			(0x14 + ((x) * 4))
+#define RXD			0x24
+
+#define SS_CTRL(s)		(0x30 + ((s) * 4))
+#define LRD_CFG			0x44
+#define LWR_CFG			0x80
+#define RWW_CFG			0x70
+#define OP_READ			BIT(23)
+#define OP_DUMMY_CYC(x)		((x) << 17)
+#define OP_ADDR_BYTES(x)	((x) << 14)
+#define OP_CMD_BYTES(x)		(((x) - 1) << 13)
+#define OP_OCTA_CRC_EN		BIT(12)
+#define OP_DQS_EN		BIT(11)
+#define OP_ENHC_EN		BIT(10)
+#define OP_PREAMBLE_EN		BIT(9)
+#define OP_DATA_DDR		BIT(8)
+#define OP_DATA_BUSW(x)		((x) << 6)
+#define OP_ADDR_DDR		BIT(5)
+#define OP_ADDR_BUSW(x)		((x) << 3)
+#define OP_CMD_DDR		BIT(2)
+#define OP_CMD_BUSW(x)		(x)
+#define OP_BUSW_1		0
+#define OP_BUSW_2		1
+#define OP_BUSW_4		2
+#define OP_BUSW_8		3
+
+#define OCTA_CRC		0x38
+#define OCTA_CRC_IN_EN(s)	BIT(3 + ((s) * 16))
+#define OCTA_CRC_CHUNK(s, x)	((fls((x) / 32)) << (1 + ((s) * 16)))
+#define OCTA_CRC_OUT_EN(s)	BIT(0 + ((s) * 16))
+
+#define ONFI_DIN_CNT(s)		(0x3c + (s))
+
+#define LRD_CTRL		0x48
+#define RWW_CTRL		0x74
+#define LWR_CTRL		0x84
+#define LMODE_EN		BIT(31)
+#define LMODE_SLV_ACT(x)	((x) << 21)
+#define LMODE_CMD1(x)		((x) << 8)
+#define LMODE_CMD0(x)		(x)
+
+#define LRD_ADDR		0x4c
+#define LWR_ADDR		0x88
+#define LRD_RANGE		0x50
+#define LWR_RANGE		0x8c
+
+#define AXI_SLV_ADDR		0x54
+
+#define DMAC_RD_CFG		0x58
+#define DMAC_WR_CFG		0x94
+#define DMAC_CFG_PERIPH_EN	BIT(31)
+#define DMAC_CFG_ALLFLUSH_EN	BIT(30)
+#define DMAC_CFG_LASTFLUSH_EN	BIT(29)
+#define DMAC_CFG_QE(x)		(((x) + 1) << 16)
+#define DMAC_CFG_BURST_LEN(x)	(((x) + 1) << 12)
+#define DMAC_CFG_BURST_SZ(x)	((x) << 8)
+#define DMAC_CFG_DIR_READ	BIT(1)
+#define DMAC_CFG_START		BIT(0)
+
+#define DMAC_RD_CNT		0x5c
+#define DMAC_WR_CNT		0x98
+
+#define SDMA_ADDR		0x60
+
+#define DMAM_CFG		0x64
+#define DMAM_CFG_START		BIT(31)
+#define DMAM_CFG_CONT		BIT(30)
+#define DMAM_CFG_SDMA_GAP(x)	(fls((x) / 8192) << 2)
+#define DMAM_CFG_DIR_READ	BIT(1)
+#define DMAM_CFG_EN		BIT(0)
+
+#define DMAM_CNT		0x68
+
+#define LNR_TIMER_TH		0x6c
+
+#define RDM_CFG0		0x78
+#define RDM_CFG0_POLY(x)	(x)
+
+#define RDM_CFG1		0x7c
+#define RDM_CFG1_RDM_EN		BIT(31)
+#define RDM_CFG1_SEED(x)	(x)
+
+#define LWR_SUSP_CTRL		0x90
+#define LWR_SUSP_CTRL_EN	BIT(31)
+
+#define DMAS_CTRL		0x9c
+#define DMAS_CTRL_DIR_READ	BIT(31)
+#define DMAS_CTRL_EN		BIT(30)
+
+#define DATA_STROB		0xa0
+#define DATA_STROB_EDO_EN	BIT(2)
+#define DATA_STROB_INV_POL	BIT(1)
+#define DATA_STROB_DELAY_2CYC	BIT(0)
+
+#define IDLY_CODE(x)		(0xa4 + ((x) * 4))
+#define IDLY_CODE_VAL(x, v)	((v) << (((x) % 4) * 8))
+
+#define GPIO			0xc4
+#define GPIO_PT(x)		BIT(3 + ((x) * 16))
+#define GPIO_RESET(x)		BIT(2 + ((x) * 16))
+#define GPIO_HOLDB(x)		BIT(1 + ((x) * 16))
+#define GPIO_WPB(x)		BIT((x) * 16)
+
+#define HC_VER			0xd0
+
+#define HW_TEST(x)		(0xe0 + ((x) * 4))
+
+struct mxic_spi {
+	struct clk *ps_clk;
+	struct clk *send_clk;
+	struct clk *send_dly_clk;
+	void __iomem *regs;
+	u32 cur_speed_hz;
+};
+
+static int mxic_spi_clk_enable(struct mxic_spi *mxic)
+{
+	int ret;
+
+	ret = clk_prepare_enable(mxic->send_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(mxic->send_dly_clk);
+	if (ret)
+		goto err_send_dly_clk;
+
+	return ret;
+
+err_send_dly_clk:
+	clk_disable_unprepare(mxic->send_clk);
+
+	return ret;
+}
+
+static void mxic_spi_clk_disable(struct mxic_spi *mxic)
+{
+	clk_disable_unprepare(mxic->send_clk);
+	clk_disable_unprepare(mxic->send_dly_clk);
+}
+
+static void mxic_spi_set_input_delay_dqs(struct mxic_spi *mxic, u8 idly_code)
+{
+	writel(IDLY_CODE_VAL(0, idly_code) |
+	       IDLY_CODE_VAL(1, idly_code) |
+	       IDLY_CODE_VAL(2, idly_code) |
+	       IDLY_CODE_VAL(3, idly_code),
+	       mxic->regs + IDLY_CODE(0));
+	writel(IDLY_CODE_VAL(4, idly_code) |
+	       IDLY_CODE_VAL(5, idly_code) |
+	       IDLY_CODE_VAL(6, idly_code) |
+	       IDLY_CODE_VAL(7, idly_code),
+	       mxic->regs + IDLY_CODE(1));
+}
+
+static int mxic_spi_clk_setup(struct mxic_spi *mxic, unsigned long freq)
+{
+	int ret;
+
+	ret = clk_set_rate(mxic->send_clk, freq);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(mxic->send_dly_clk, freq);
+	if (ret)
+		return ret;
+
+	/*
+	 * A constant delay range from 0x0 ~ 0x1F for input delay,
+	 * the unit is 78 ps, the max input delay is 2.418 ns.
+	 */
+	mxic_spi_set_input_delay_dqs(mxic, 0xf);
+
+	/*
+	 * Phase degree = 360 * freq * output-delay
+	 * where output-delay is a constant value 1 ns in FPGA.
+	 *
+	 * Get Phase degree = 360 * freq * 1 ns
+	 *                  = 360 * freq * 1 sec / 1000000000
+	 *                  = 9 * freq / 25000000
+	 */
+	ret = clk_set_phase(mxic->send_dly_clk, 9 * freq / 25000000);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mxic_spi_set_freq(struct mxic_spi *mxic, unsigned long freq)
+{
+	int ret;
+
+	if (mxic->cur_speed_hz == freq)
+		return 0;
+
+	mxic_spi_clk_disable(mxic);
+	ret = mxic_spi_clk_setup(mxic, freq);
+	if (ret)
+		return ret;
+
+	ret = mxic_spi_clk_enable(mxic);
+	if (ret)
+		return ret;
+
+	mxic->cur_speed_hz = freq;
+
+	return 0;
+}
+
+static void mxic_spi_hw_init(struct mxic_spi *mxic)
+{
+	writel(0, mxic->regs + DATA_STROB);
+	writel(INT_STS_ALL, mxic->regs + INT_STS_EN);
+	writel(0, mxic->regs + HC_EN);
+	writel(0, mxic->regs + LRD_CFG);
+	writel(0, mxic->regs + LRD_CTRL);
+	writel(HC_CFG_NIO(1) | HC_CFG_TYPE(0, HC_CFG_TYPE_SPI_NAND) |
+	       HC_CFG_SLV_ACT(0) | HC_CFG_MAN_CS_EN | HC_CFG_IDLE_SIO_LVL(1),
+	       mxic->regs + HC_CFG);
+}
+
+static int mxic_spi_data_xfer(struct mxic_spi *mxic, const void *txbuf,
+			      void *rxbuf, unsigned int len)
+{
+	unsigned int pos = 0;
+
+	while (pos < len) {
+		unsigned int nbytes = len - pos;
+		u32 data = 0xffffffff;
+		u32 sts;
+		int ret;
+
+		if (nbytes > 4)
+			nbytes = 4;
+
+		if (txbuf)
+			memcpy(&data, txbuf + pos, nbytes);
+
+		ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+					 sts & INT_TX_EMPTY, 0, USEC_PER_SEC);
+		if (ret)
+			return ret;
+
+		writel(data, mxic->regs + TXD(nbytes % 4));
+
+		if (rxbuf) {
+			ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+						 sts & INT_TX_EMPTY, 0,
+						 USEC_PER_SEC);
+			if (ret)
+				return ret;
+
+			ret = readl_poll_timeout(mxic->regs + INT_STS, sts,
+						 sts & INT_RX_NOT_EMPTY, 0,
+						 USEC_PER_SEC);
+			if (ret)
+				return ret;
+
+			data = readl(mxic->regs + RXD);
+			data >>= (8 * (4 - nbytes));
+			memcpy(rxbuf + pos, &data, nbytes);
+			WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+		} else {
+			readl(mxic->regs + RXD);
+		}
+		WARN_ON(readl(mxic->regs + INT_STS) & INT_RX_NOT_EMPTY);
+
+		pos += nbytes;
+	}
+
+	return 0;
+}
+
+static bool mxic_spi_mem_supports_op(struct spi_mem *mem,
+				     const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 4 || op->addr.buswidth > 4 ||
+	    op->dummy.buswidth > 4 || op->cmd.buswidth > 4)
+		return false;
+
+	if (op->data.nbytes && op->dummy.nbytes &&
+	    op->data.buswidth != op->dummy.buswidth)
+		return false;
+
+	if (op->addr.nbytes > 7)
+		return false;
+
+	return true;
+}
+
+static int mxic_spi_mem_exec_op(struct spi_mem *mem,
+				const struct spi_mem_op *op)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(mem->spi->master);
+	int nio = 1, i, ret;
+	u32 ss_ctrl;
+	u8 addr[8];
+
+	ret = mxic_spi_set_freq(mxic, mem->spi->max_speed_hz);
+	if (ret)
+		return ret;
+
+	if (mem->spi->mode & (SPI_TX_QUAD | SPI_RX_QUAD))
+		nio = 4;
+	else if (mem->spi->mode & (SPI_TX_DUAL | SPI_RX_DUAL))
+		nio = 2;
+
+	writel(HC_CFG_NIO(nio) |
+	       HC_CFG_TYPE(mem->spi->chip_select, HC_CFG_TYPE_SPI_NOR) |
+	       HC_CFG_SLV_ACT(mem->spi->chip_select) | HC_CFG_IDLE_SIO_LVL(1) |
+	       HC_CFG_MAN_CS_EN,
+	       mxic->regs + HC_CFG);
+	writel(HC_EN_BIT, mxic->regs + HC_EN);
+
+	ss_ctrl = OP_CMD_BYTES(1) | OP_CMD_BUSW(fls(op->cmd.buswidth) - 1);
+
+	if (op->addr.nbytes)
+		ss_ctrl |= OP_ADDR_BYTES(op->addr.nbytes) |
+			   OP_ADDR_BUSW(fls(op->addr.buswidth) - 1);
+
+	if (op->dummy.nbytes)
+		ss_ctrl |= OP_DUMMY_CYC(op->dummy.nbytes);
+
+	if (op->data.nbytes) {
+		ss_ctrl |= OP_DATA_BUSW(fls(op->data.buswidth) - 1);
+		if (op->data.dir == SPI_MEM_DATA_IN)
+			ss_ctrl |= OP_READ;
+	}
+
+	writel(ss_ctrl, mxic->regs + SS_CTRL(mem->spi->chip_select));
+
+	writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+	       mxic->regs + HC_CFG);
+
+	ret = mxic_spi_data_xfer(mxic, &op->cmd.opcode, NULL, 1);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < op->addr.nbytes; i++)
+		addr[i] = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+	ret = mxic_spi_data_xfer(mxic, addr, NULL, op->addr.nbytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(mxic, NULL, NULL, op->dummy.nbytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(mxic,
+				 op->data.dir == SPI_MEM_DATA_OUT ?
+				 op->data.buf.out : NULL,
+				 op->data.dir == SPI_MEM_DATA_IN ?
+				 op->data.buf.in : NULL,
+				 op->data.nbytes);
+
+out:
+	writel(readl(mxic->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+	       mxic->regs + HC_CFG);
+	writel(0, mxic->regs + HC_EN);
+
+	return ret;
+}
+
+static const struct spi_controller_mem_ops mxic_spi_mem_ops = {
+	.supports_op = mxic_spi_mem_supports_op,
+	.exec_op = mxic_spi_mem_exec_op,
+};
+
+static void mxic_spi_set_cs(struct spi_device *spi, bool lvl)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(spi->master);
+
+	if (!lvl) {
+		writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_EN,
+		       mxic->regs + HC_CFG);
+		writel(HC_EN_BIT, mxic->regs + HC_EN);
+		writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+		       mxic->regs + HC_CFG);
+	} else {
+		writel(readl(mxic->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+		       mxic->regs + HC_CFG);
+		writel(0, mxic->regs + HC_EN);
+	}
+}
+
+static int mxic_spi_transfer_one(struct spi_master *master,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+	unsigned int busw = OP_BUSW_1;
+	int ret;
+
+	if (t->rx_buf && t->tx_buf) {
+		if (((spi->mode & SPI_TX_QUAD) &&
+		     !(spi->mode & SPI_RX_QUAD)) ||
+		    ((spi->mode & SPI_TX_DUAL) &&
+		     !(spi->mode & SPI_RX_DUAL)))
+			return -ENOTSUPP;
+	}
+
+	ret = mxic_spi_set_freq(mxic, t->speed_hz);
+	if (ret)
+		return ret;
+
+	if (t->tx_buf) {
+		if (spi->mode & SPI_TX_QUAD)
+			busw = OP_BUSW_4;
+		else if (spi->mode & SPI_TX_DUAL)
+			busw = OP_BUSW_2;
+	} else if (t->rx_buf) {
+		if (spi->mode & SPI_RX_QUAD)
+			busw = OP_BUSW_4;
+		else if (spi->mode & SPI_RX_DUAL)
+			busw = OP_BUSW_2;
+	}
+
+	writel(OP_CMD_BYTES(1) | OP_CMD_BUSW(busw) |
+	       OP_DATA_BUSW(busw) | (t->rx_buf ? OP_READ : 0),
+	       mxic->regs + SS_CTRL(0));
+
+	ret = mxic_spi_data_xfer(mxic, t->tx_buf, t->rx_buf, t->len);
+	if (ret)
+		return ret;
+
+	spi_finalize_current_transfer(master);
+
+	return 0;
+}
+
+static int __maybe_unused mxic_spi_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+
+	mxic_spi_clk_disable(mxic);
+	clk_disable_unprepare(mxic->ps_clk);
+
+	return 0;
+}
+
+static int __maybe_unused mxic_spi_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct mxic_spi *mxic = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(mxic->ps_clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable ps_clock.\n");
+		return ret;
+	}
+
+	return mxic_spi_clk_enable(mxic);
+}
+
+static const struct dev_pm_ops mxic_spi_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(mxic_spi_runtime_suspend,
+			   mxic_spi_runtime_resume, NULL)
+};
+
+static int mxic_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct resource *res;
+	struct mxic_spi *mxic;
+	int ret;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct mxic_spi));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	mxic = spi_master_get_devdata(master);
+
+	master->dev.of_node = pdev->dev.of_node;
+
+	mxic->ps_clk = devm_clk_get(&pdev->dev, "ps_clk");
+	if (IS_ERR(mxic->ps_clk))
+		return PTR_ERR(mxic->ps_clk);
+
+	mxic->send_clk = devm_clk_get(&pdev->dev, "send_clk");
+	if (IS_ERR(mxic->send_clk))
+		return PTR_ERR(mxic->send_clk);
+
+	mxic->send_dly_clk = devm_clk_get(&pdev->dev, "send_dly_clk");
+	if (IS_ERR(mxic->send_dly_clk))
+		return PTR_ERR(mxic->send_dly_clk);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	mxic->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mxic->regs))
+		return PTR_ERR(mxic->regs);
+
+	pm_runtime_enable(&pdev->dev);
+	master->auto_runtime_pm = true;
+
+	master->num_chipselect = 1;
+	master->mem_ops = &mxic_spi_mem_ops;
+
+	master->set_cs = mxic_spi_set_cs;
+	master->transfer_one = mxic_spi_transfer_one;
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	master->mode_bits = SPI_CPOL | SPI_CPHA |
+			SPI_RX_DUAL | SPI_TX_DUAL |
+			SPI_RX_QUAD | SPI_TX_QUAD;
+
+	mxic_spi_hw_init(mxic);
+
+	ret = spi_register_master(master);
+	if (ret) {
+		dev_err(&pdev->dev, "spi_register_master failed\n");
+		goto err_put_master;
+	}
+
+	return 0;
+
+err_put_master:
+	spi_master_put(master);
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static int mxic_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	spi_unregister_master(master);
+
+	return 0;
+}
+
+static const struct of_device_id mxic_spi_of_ids[] = {
+	{ .compatible = "mxicy,mx25f0a-spi", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxic_spi_of_ids);
+
+static struct platform_driver mxic_spi_driver = {
+	.probe = mxic_spi_probe,
+	.remove = mxic_spi_remove,
+	.driver = {
+		.name = "mxic-spi",
+		.of_match_table = mxic_spi_of_ids,
+		.pm = &mxic_spi_dev_pm_ops,
+	},
+};
+module_platform_driver(mxic_spi_driver);
+
+MODULE_AUTHOR("Mason Yang <masonccyang@mxic.com.tw>");
+MODULE_DESCRIPTION("MX25F0A SPI controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c
new file mode 100644
index 0000000..e1dca79
--- /dev/null
+++ b/drivers/spi/spi-npcm-pspi.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Nuvoton Technology corporation.
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+
+#include <asm/unaligned.h>
+
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+
+struct npcm_pspi {
+	struct completion xfer_done;
+	struct regmap *rst_regmap;
+	struct spi_master *master;
+	unsigned int tx_bytes;
+	unsigned int rx_bytes;
+	void __iomem *base;
+	bool is_save_param;
+	u8 bits_per_word;
+	const u8 *tx_buf;
+	struct clk *clk;
+	u32 speed_hz;
+	u8 *rx_buf;
+	u16 mode;
+	u32 id;
+};
+
+#define DRIVER_NAME "npcm-pspi"
+
+#define NPCM_PSPI_DATA		0x00
+#define NPCM_PSPI_CTL1		0x02
+#define NPCM_PSPI_STAT		0x04
+
+/* definitions for control and status register */
+#define NPCM_PSPI_CTL1_SPIEN	BIT(0)
+#define NPCM_PSPI_CTL1_MOD	BIT(2)
+#define NPCM_PSPI_CTL1_EIR	BIT(5)
+#define NPCM_PSPI_CTL1_EIW	BIT(6)
+#define NPCM_PSPI_CTL1_SCM	BIT(7)
+#define NPCM_PSPI_CTL1_SCIDL	BIT(8)
+#define NPCM_PSPI_CTL1_SCDV6_0	GENMASK(15, 9)
+
+#define NPCM_PSPI_STAT_BSY	BIT(0)
+#define NPCM_PSPI_STAT_RBF	BIT(1)
+
+/* general definitions */
+#define NPCM_PSPI_TIMEOUT_MS		2000
+#define NPCM_PSPI_MAX_CLK_DIVIDER	256
+#define NPCM_PSPI_MIN_CLK_DIVIDER	4
+#define NPCM_PSPI_DEFAULT_CLK		25000000
+
+/* reset register */
+#define NPCM7XX_IPSRST2_OFFSET	0x24
+
+#define NPCM7XX_PSPI1_RESET	BIT(22)
+#define NPCM7XX_PSPI2_RESET	BIT(23)
+
+static inline unsigned int bytes_per_word(unsigned int bits)
+{
+	return bits <= 8 ? 1 : 2;
+}
+
+static inline void npcm_pspi_irq_enable(struct npcm_pspi *priv, u16 mask)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val |= mask;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_irq_disable(struct npcm_pspi *priv, u16 mask)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val &= ~mask;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_enable(struct npcm_pspi *priv)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val |= NPCM_PSPI_CTL1_SPIEN;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static inline void npcm_pspi_disable(struct npcm_pspi *priv)
+{
+	u16 val;
+
+	val = ioread16(priv->base + NPCM_PSPI_CTL1);
+	val &= ~NPCM_PSPI_CTL1_SPIEN;
+	iowrite16(val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static void npcm_pspi_set_mode(struct spi_device *spi)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(spi->master);
+	u16 regtemp;
+	u16 mode_val;
+
+	switch (spi->mode & (SPI_CPOL | SPI_CPHA)) {
+	case SPI_MODE_0:
+		mode_val = 0;
+		break;
+	case SPI_MODE_1:
+		mode_val = NPCM_PSPI_CTL1_SCIDL;
+		break;
+	case SPI_MODE_2:
+		mode_val = NPCM_PSPI_CTL1_SCM;
+		break;
+	case SPI_MODE_3:
+		mode_val = NPCM_PSPI_CTL1_SCIDL | NPCM_PSPI_CTL1_SCM;
+		break;
+	}
+
+	regtemp = ioread16(priv->base + NPCM_PSPI_CTL1);
+	regtemp &= ~(NPCM_PSPI_CTL1_SCM | NPCM_PSPI_CTL1_SCIDL);
+	iowrite16(regtemp | mode_val, priv->base + NPCM_PSPI_CTL1);
+}
+
+static void npcm_pspi_set_transfer_size(struct npcm_pspi *priv, int size)
+{
+	u16 regtemp;
+
+	regtemp = ioread16(NPCM_PSPI_CTL1 + priv->base);
+
+	switch (size) {
+	case 8:
+		regtemp &= ~NPCM_PSPI_CTL1_MOD;
+		break;
+	case 16:
+		regtemp |= NPCM_PSPI_CTL1_MOD;
+		break;
+	}
+
+	iowrite16(regtemp, NPCM_PSPI_CTL1 + priv->base);
+}
+
+static void npcm_pspi_set_baudrate(struct npcm_pspi *priv, unsigned int speed)
+{
+	u32 ckdiv;
+	u16 regtemp;
+
+	/* the supported rates are numbers from 4 to 256. */
+	ckdiv = DIV_ROUND_CLOSEST(clk_get_rate(priv->clk), (2 * speed)) - 1;
+
+	regtemp = ioread16(NPCM_PSPI_CTL1 + priv->base);
+	regtemp &= ~NPCM_PSPI_CTL1_SCDV6_0;
+	iowrite16(regtemp | (ckdiv << 9), NPCM_PSPI_CTL1 + priv->base);
+}
+
+static void npcm_pspi_setup_transfer(struct spi_device *spi,
+				     struct spi_transfer *t)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(spi->master);
+
+	priv->tx_buf = t->tx_buf;
+	priv->rx_buf = t->rx_buf;
+	priv->tx_bytes = t->len;
+	priv->rx_bytes = t->len;
+
+	if (!priv->is_save_param || priv->mode != spi->mode) {
+		npcm_pspi_set_mode(spi);
+		priv->mode = spi->mode;
+	}
+
+	if (!priv->is_save_param || priv->bits_per_word != t->bits_per_word) {
+		npcm_pspi_set_transfer_size(priv, t->bits_per_word);
+		priv->bits_per_word = t->bits_per_word;
+	}
+
+	if (!priv->is_save_param || priv->speed_hz != t->speed_hz) {
+		npcm_pspi_set_baudrate(priv, t->speed_hz);
+		priv->speed_hz = t->speed_hz;
+	}
+
+	if (!priv->is_save_param)
+		priv->is_save_param = true;
+}
+
+static void npcm_pspi_send(struct npcm_pspi *priv)
+{
+	int wsize;
+
+	wsize = min(bytes_per_word(priv->bits_per_word), priv->tx_bytes);
+	priv->tx_bytes -= wsize;
+
+	if (!priv->tx_buf)
+		return;
+
+	switch (wsize) {
+	case 1:
+		iowrite8(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		break;
+	case 2:
+		iowrite16(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	priv->tx_buf += wsize;
+}
+
+static void npcm_pspi_recv(struct npcm_pspi *priv)
+{
+	int rsize;
+	u16 val;
+
+	rsize = min(bytes_per_word(priv->bits_per_word), priv->rx_bytes);
+	priv->rx_bytes -= rsize;
+
+	if (!priv->rx_buf)
+		return;
+
+	switch (rsize) {
+	case 1:
+		val = ioread8(priv->base + NPCM_PSPI_DATA);
+		break;
+	case 2:
+		val = ioread16(priv->base + NPCM_PSPI_DATA);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	*priv->rx_buf = val;
+	priv->rx_buf += rsize;
+}
+
+static int npcm_pspi_transfer_one(struct spi_master *master,
+				  struct spi_device *spi,
+				  struct spi_transfer *t)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+	int status;
+
+	npcm_pspi_setup_transfer(spi, t);
+	reinit_completion(&priv->xfer_done);
+	npcm_pspi_enable(priv);
+	status = wait_for_completion_timeout(&priv->xfer_done,
+					     msecs_to_jiffies
+					     (NPCM_PSPI_TIMEOUT_MS));
+	if (status == 0) {
+		npcm_pspi_disable(priv);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int npcm_pspi_prepare_transfer_hardware(struct spi_master *master)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+
+	npcm_pspi_irq_enable(priv, NPCM_PSPI_CTL1_EIR | NPCM_PSPI_CTL1_EIW);
+
+	return 0;
+}
+
+static int npcm_pspi_unprepare_transfer_hardware(struct spi_master *master)
+{
+	struct npcm_pspi *priv = spi_master_get_devdata(master);
+
+	npcm_pspi_irq_disable(priv, NPCM_PSPI_CTL1_EIR | NPCM_PSPI_CTL1_EIW);
+
+	return 0;
+}
+
+static void npcm_pspi_reset_hw(struct npcm_pspi *priv)
+{
+	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET,
+		     NPCM7XX_PSPI1_RESET << priv->id);
+	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET, 0x0);
+}
+
+static irqreturn_t npcm_pspi_handler(int irq, void *dev_id)
+{
+	struct npcm_pspi *priv = dev_id;
+	u16 val;
+	u8 stat;
+
+	stat = ioread8(priv->base + NPCM_PSPI_STAT);
+
+	if (!priv->tx_buf && !priv->rx_buf)
+		return IRQ_NONE;
+
+	if (priv->tx_buf) {
+		if (stat & NPCM_PSPI_STAT_RBF) {
+			val = ioread8(NPCM_PSPI_DATA + priv->base);
+			if (priv->tx_bytes == 0) {
+				npcm_pspi_disable(priv);
+				complete(&priv->xfer_done);
+				return IRQ_HANDLED;
+			}
+		}
+
+		if ((stat & NPCM_PSPI_STAT_BSY) == 0)
+			if (priv->tx_bytes)
+				npcm_pspi_send(priv);
+	}
+
+	if (priv->rx_buf) {
+		if (stat & NPCM_PSPI_STAT_RBF) {
+			if (!priv->rx_bytes)
+				return IRQ_NONE;
+
+			npcm_pspi_recv(priv);
+
+			if (!priv->rx_bytes) {
+				npcm_pspi_disable(priv);
+				complete(&priv->xfer_done);
+				return IRQ_HANDLED;
+			}
+		}
+
+		if (((stat & NPCM_PSPI_STAT_BSY) == 0) && !priv->tx_buf)
+			iowrite8(0x0, NPCM_PSPI_DATA + priv->base);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int npcm_pspi_probe(struct platform_device *pdev)
+{
+	struct npcm_pspi *priv;
+	struct spi_master *master;
+	struct resource *res;
+	unsigned long clk_hz;
+	struct device_node *np = pdev->dev.of_node;
+	int num_cs, i;
+	int csgpio;
+	int irq;
+	int ret;
+
+	num_cs = of_gpio_named_count(np, "cs-gpios");
+	if (num_cs < 0)
+		return num_cs;
+
+	pdev->id = of_alias_get_id(np, "spi");
+	if (pdev->id < 0)
+		pdev->id = 0;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*priv));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	priv = spi_master_get_devdata(master);
+	priv->master = master;
+	priv->is_save_param = false;
+	priv->id = pdev->id;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto out_master_put;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		ret = PTR_ERR(priv->clk);
+		goto out_master_put;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto out_master_put;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ\n");
+		ret = irq;
+		goto out_disable_clk;
+	}
+
+	priv->rst_regmap =
+		syscon_regmap_lookup_by_compatible("nuvoton,npcm750-rst");
+	if (IS_ERR(priv->rst_regmap)) {
+		dev_err(&pdev->dev, "failed to find nuvoton,npcm750-rst\n");
+		return PTR_ERR(priv->rst_regmap);
+	}
+
+	/* reset SPI-HW block */
+	npcm_pspi_reset_hw(priv);
+
+	ret = devm_request_irq(&pdev->dev, irq, npcm_pspi_handler, 0,
+			       "npcm-pspi", priv);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ\n");
+		goto out_disable_clk;
+	}
+
+	init_completion(&priv->xfer_done);
+
+	clk_hz = clk_get_rate(priv->clk);
+
+	master->max_speed_hz = DIV_ROUND_UP(clk_hz, NPCM_PSPI_MIN_CLK_DIVIDER);
+	master->min_speed_hz = DIV_ROUND_UP(clk_hz, NPCM_PSPI_MAX_CLK_DIVIDER);
+	master->mode_bits = SPI_CPHA | SPI_CPOL;
+	master->dev.of_node = pdev->dev.of_node;
+	master->bus_num = pdev->id;
+	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+	master->transfer_one = npcm_pspi_transfer_one;
+	master->prepare_transfer_hardware =
+		npcm_pspi_prepare_transfer_hardware;
+	master->unprepare_transfer_hardware =
+		npcm_pspi_unprepare_transfer_hardware;
+	master->num_chipselect = num_cs;
+
+	for (i = 0; i < num_cs; i++) {
+		csgpio = of_get_named_gpio(np, "cs-gpios", i);
+		if (csgpio < 0) {
+			dev_err(&pdev->dev, "failed to get csgpio#%u\n", i);
+			goto out_disable_clk;
+		}
+		dev_dbg(&pdev->dev, "csgpio#%u = %d\n", i, csgpio);
+		ret = devm_gpio_request_one(&pdev->dev, csgpio,
+					    GPIOF_OUT_INIT_HIGH, DRIVER_NAME);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"failed to configure csgpio#%u %d\n"
+				, i, csgpio);
+			goto out_disable_clk;
+		}
+	}
+
+	/* set to default clock rate */
+	npcm_pspi_set_baudrate(priv, NPCM_PSPI_DEFAULT_CLK);
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret)
+		goto out_disable_clk;
+
+	pr_info("NPCM Peripheral SPI %d probed\n", pdev->id);
+
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(priv->clk);
+
+out_master_put:
+	spi_master_put(master);
+	return ret;
+}
+
+static int npcm_pspi_remove(struct platform_device *pdev)
+{
+	struct npcm_pspi *priv = platform_get_drvdata(pdev);
+
+	npcm_pspi_reset_hw(priv);
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct of_device_id npcm_pspi_match[] = {
+	{ .compatible = "nuvoton,npcm750-pspi", .data = NULL },
+	{}
+};
+MODULE_DEVICE_TABLE(of, npcm_pspi_match);
+
+static struct platform_driver npcm_pspi_driver = {
+	.driver		= {
+		.name		= DRIVER_NAME,
+		.of_match_table	= npcm_pspi_match,
+	},
+	.probe		= npcm_pspi_probe,
+	.remove		= npcm_pspi_remove,
+};
+module_platform_driver(npcm_pspi_driver);
+
+MODULE_DESCRIPTION("NPCM peripheral SPI Controller driver");
+MODULE_AUTHOR("Tomer Maimon <tomer.maimon@nuvoton.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 6120e6a..0c793e3 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -861,11 +861,10 @@ static void dma_callback(void *data)
 
 	/* Update total bytes transferred */
 	msg->actual_length += pl022->cur_transfer->len;
-	if (pl022->cur_transfer->cs_change)
-		pl022_cs_control(pl022, SSP_CHIP_DESELECT);
-
 	/* Move to next transfer */
 	msg->state = next_transfer(pl022);
+	if (msg->state != STATE_DONE && pl022->cur_transfer->cs_change)
+		pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 	tasklet_schedule(&pl022->pump_transfers);
 }
 
@@ -1333,10 +1332,10 @@ static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
 		}
 		/* Update total bytes transferred */
 		msg->actual_length += pl022->cur_transfer->len;
-		if (pl022->cur_transfer->cs_change)
-			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		/* Move to next transfer */
 		msg->state = next_transfer(pl022);
+		if (msg->state != STATE_DONE && pl022->cur_transfer->cs_change)
+			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		tasklet_schedule(&pl022->pump_transfers);
 		return IRQ_HANDLED;
 	}
@@ -1544,10 +1543,11 @@ static void do_polling_transfer(struct pl022 *pl022)
 
 		/* Update total byte transferred */
 		message->actual_length += pl022->cur_transfer->len;
-		if (pl022->cur_transfer->cs_change)
-			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 		/* Move to next transfer */
 		message->state = next_transfer(pl022);
+		if (message->state != STATE_DONE
+		    && pl022->cur_transfer->cs_change)
+			pl022_cs_control(pl022, SSP_CHIP_DESELECT);
 	}
 out:
 	/* Handle end of message */
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 612cc49..d84b893 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -626,6 +626,11 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 		return IRQ_HANDLED;
 	}
 
+	if (irq_status & SSSR_TUR) {
+		int_error_stop(drv_data, "interrupt_transfer: fifo underrun");
+		return IRQ_HANDLED;
+	}
+
 	if (irq_status & SSSR_TINT) {
 		pxa2xx_spi_write(drv_data, SSSR, SSSR_TINT);
 		if (drv_data->read(drv_data)) {
@@ -1073,6 +1078,30 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 	}
 
+	if (drv_data->ssp_type == MMP2_SSP) {
+		u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR)
+					& SSSR_TFL_MASK) >> 8;
+
+		if (tx_level) {
+			/* On MMP2, flipping SSE doesn't to empty TXFIFO. */
+			dev_warn(&spi->dev, "%d bytes of garbage in TXFIFO!\n",
+								tx_level);
+			if (tx_level > transfer->len)
+				tx_level = transfer->len;
+			drv_data->tx += tx_level;
+		}
+	}
+
+	if (spi_controller_is_slave(master)) {
+		while (drv_data->write(drv_data))
+			;
+		if (drv_data->gpiod_ready) {
+			gpiod_set_value(drv_data->gpiod_ready, 1);
+			udelay(1);
+			gpiod_set_value(drv_data->gpiod_ready, 0);
+		}
+	}
+
 	/*
 	 * Release the data by enabling service requests and interrupts,
 	 * without changing any mode bits
@@ -1082,6 +1111,27 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 	return 1;
 }
 
+static int pxa2xx_spi_slave_abort(struct spi_master *master)
+{
+	struct driver_data *drv_data = spi_controller_get_devdata(master);
+
+	/* Stop and reset SSP */
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	reset_sccr1(drv_data);
+	if (!pxa25x_ssp_comp(drv_data))
+		pxa2xx_spi_write(drv_data, SSTO, 0);
+	pxa2xx_spi_flush(drv_data);
+	pxa2xx_spi_write(drv_data, SSCR0,
+			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+
+	dev_dbg(&drv_data->pdev->dev, "transfer aborted\n");
+
+	drv_data->master->cur_msg->status = -EINTR;
+	spi_finalize_current_transfer(drv_data->master);
+
+	return 0;
+}
+
 static void pxa2xx_spi_handle_err(struct spi_controller *master,
 				 struct spi_message *msg)
 {
@@ -1209,9 +1259,14 @@ static int setup(struct spi_device *spi)
 		rx_thres = config->rx_threshold;
 		break;
 	default:
-		tx_thres = TX_THRESH_DFLT;
 		tx_hi_thres = 0;
-		rx_thres = RX_THRESH_DFLT;
+		if (spi_controller_is_slave(drv_data->master)) {
+			tx_thres = 1;
+			rx_thres = 2;
+		} else {
+			tx_thres = TX_THRESH_DFLT;
+			rx_thres = RX_THRESH_DFLT;
+		}
 		break;
 	}
 
@@ -1255,6 +1310,12 @@ static int setup(struct spi_device *spi)
 		if (chip_info->enable_loopback)
 			chip->cr1 = SSCR1_LBM;
 	}
+	if (spi_controller_is_slave(drv_data->master)) {
+		chip->cr1 |= SSCR1_SCFR;
+		chip->cr1 |= SSCR1_SCLKDIR;
+		chip->cr1 |= SSCR1_SFRMDIR;
+		chip->cr1 |= SSCR1_SPH;
+	}
 
 	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
 	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
@@ -1500,6 +1561,7 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	ssp->pdev = pdev;
 	ssp->port_id = pxa2xx_spi_get_port_id(adev);
 
+	pdata->is_slave = of_property_read_bool(pdev->dev.of_node, "spi-slave");
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
 
@@ -1559,7 +1621,11 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	master = spi_alloc_master(dev, sizeof(struct driver_data));
+	if (platform_info->is_slave)
+		master = spi_alloc_slave(dev, sizeof(struct driver_data));
+	else
+		master = spi_alloc_master(dev, sizeof(struct driver_data));
+
 	if (!master) {
 		dev_err(&pdev->dev, "cannot alloc spi_master\n");
 		pxa_ssp_free(ssp);
@@ -1581,6 +1647,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	master->setup = setup;
 	master->set_cs = pxa2xx_spi_set_cs;
 	master->transfer_one = pxa2xx_spi_transfer_one;
+	master->slave_abort = pxa2xx_spi_slave_abort;
 	master->handle_err = pxa2xx_spi_handle_err;
 	master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
 	master->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
@@ -1610,7 +1677,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE;
 		drv_data->dma_cr1 = DEFAULT_DMA_CR1;
 		drv_data->clear_sr = SSSR_ROR | SSSR_TINT;
-		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS | SSSR_ROR;
+		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS
+						| SSSR_ROR | SSSR_TUR;
 	}
 
 	status = request_irq(ssp->irq, ssp_int, IRQF_SHARED, dev_name(dev),
@@ -1658,10 +1726,22 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
 	default:
-		tmp = SSCR1_RxTresh(RX_THRESH_DFLT) |
-		      SSCR1_TxTresh(TX_THRESH_DFLT);
+
+		if (spi_controller_is_slave(master)) {
+			tmp = SSCR1_SCFR |
+			      SSCR1_SCLKDIR |
+			      SSCR1_SFRMDIR |
+			      SSCR1_RxTresh(2) |
+			      SSCR1_TxTresh(1) |
+			      SSCR1_SPH;
+		} else {
+			tmp = SSCR1_RxTresh(RX_THRESH_DFLT) |
+			      SSCR1_TxTresh(TX_THRESH_DFLT);
+		}
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
-		tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8);
+		tmp = SSCR0_Motorola | SSCR0_DataSize(8);
+		if (!spi_controller_is_slave(master))
+			tmp |= SSCR0_SCR(2);
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
 	}
@@ -1711,7 +1791,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 				if (PTR_ERR(gpiod) == -ENOENT)
 					continue;
 
-				status = (int)PTR_ERR(gpiod);
+				status = PTR_ERR(gpiod);
 				goto out_error_clock_enabled;
 			} else {
 				drv_data->cs_gpiods[i] = gpiod;
@@ -1719,6 +1799,15 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (platform_info->is_slave) {
+		drv_data->gpiod_ready = devm_gpiod_get_optional(dev,
+						"ready", GPIOD_OUT_LOW);
+		if (IS_ERR(drv_data->gpiod_ready)) {
+			status = PTR_ERR(drv_data->gpiod_ready);
+			goto out_error_clock_enabled;
+		}
+	}
+
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
@@ -1811,10 +1900,6 @@ static int pxa2xx_spi_resume(struct device *dev)
 			return status;
 	}
 
-	/* Restore LPSS private register bits */
-	if (is_lpss_ssp(drv_data))
-		lpss_ssp_setup(drv_data);
-
 	/* Start the queue running */
 	return spi_controller_resume(drv_data->master);
 }
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 513c53a..4e324da 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -64,6 +64,9 @@ struct driver_data {
 
 	/* GPIOs for chip selects */
 	struct gpio_desc **cs_gpiods;
+
+	/* Optional slave FIFO ready signal */
+	struct gpio_desc *gpiod_ready;
 };
 
 struct chip_data {
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 51ef632..3912526 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -54,6 +54,9 @@
 
 /* Bit fields in CTRLR0 */
 #define CR0_DFS_OFFSET				0
+#define CR0_DFS_4BIT				0x0
+#define CR0_DFS_8BIT				0x1
+#define CR0_DFS_16BIT				0x2
 
 #define CR0_CFS_OFFSET				2
 
@@ -94,6 +97,7 @@
 #define CR0_BHT_8BIT				0x1
 
 #define CR0_RSD_OFFSET				14
+#define CR0_RSD_MAX				0x3
 
 #define CR0_FRF_OFFSET				16
 #define CR0_FRF_SPI					0x0
@@ -115,6 +119,10 @@
 /* Bit fields in SER, 2bit */
 #define SER_MASK					0x3
 
+/* Bit fields in BAUDR */
+#define BAUDR_SCKDV_MIN				2
+#define BAUDR_SCKDV_MAX				65534
+
 /* Bit fields in SR, 5bit */
 #define SR_MASK						0x1f
 #define SR_BUSY						(1 << 0)
@@ -142,11 +150,12 @@
 #define RF_DMA_EN					(1 << 0)
 #define TF_DMA_EN					(1 << 1)
 
-#define RXBUSY						(1 << 0)
-#define TXBUSY						(1 << 1)
+/* Driver state flags */
+#define RXDMA					(1 << 0)
+#define TXDMA					(1 << 1)
 
 /* sclk_out: spi master internal logic in rk3x can support 50Mhz */
-#define MAX_SCLK_OUT		50000000
+#define MAX_SCLK_OUT				50000000U
 
 /*
  * SPI_CTRLR1 is 16-bits, so we should support lengths of 0xffff + 1. However,
@@ -156,72 +165,37 @@
 
 #define ROCKCHIP_SPI_MAX_CS_NUM			2
 
-enum rockchip_ssi_type {
-	SSI_MOTO_SPI = 0,
-	SSI_TI_SSP,
-	SSI_NS_MICROWIRE,
-};
-
-struct rockchip_spi_dma_data {
-	struct dma_chan *ch;
-	dma_addr_t addr;
-};
-
 struct rockchip_spi {
 	struct device *dev;
-	struct spi_master *master;
 
 	struct clk *spiclk;
 	struct clk *apb_pclk;
 
 	void __iomem *regs;
-	/*depth of the FIFO buffer */
-	u32 fifo_len;
-	/* max bus freq supported */
-	u32 max_freq;
-	/* supported slave numbers */
-	enum rockchip_ssi_type type;
-
-	u16 mode;
-	u8 tmode;
-	u8 bpw;
-	u8 n_bytes;
-	u32 rsd_nsecs;
-	unsigned len;
-	u32 speed;
+	dma_addr_t dma_addr_rx;
+	dma_addr_t dma_addr_tx;
 
 	const void *tx;
-	const void *tx_end;
 	void *rx;
-	void *rx_end;
+	unsigned int tx_left;
+	unsigned int rx_left;
 
-	u32 state;
-	/* protect state */
-	spinlock_t lock;
+	atomic_t state;
+
+	/*depth of the FIFO buffer */
+	u32 fifo_len;
+	/* frequency of spiclk */
+	u32 freq;
+
+	u8 n_bytes;
+	u8 rsd;
 
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
-
-	bool use_dma;
-	struct sg_table tx_sg;
-	struct sg_table rx_sg;
-	struct rockchip_spi_dma_data dma_rx;
-	struct rockchip_spi_dma_data dma_tx;
 };
 
-static inline void spi_enable_chip(struct rockchip_spi *rs, int enable)
+static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
 {
-	writel_relaxed((enable ? 1 : 0), rs->regs + ROCKCHIP_SPI_SSIENR);
-}
-
-static inline void spi_set_clk(struct rockchip_spi *rs, u16 div)
-{
-	writel_relaxed(div, rs->regs + ROCKCHIP_SPI_BAUDR);
-}
-
-static inline void flush_fifo(struct rockchip_spi *rs)
-{
-	while (readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR))
-		readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+	writel_relaxed((enable ? 1U : 0U), rs->regs + ROCKCHIP_SPI_SSIENR);
 }
 
 static inline void wait_for_idle(struct rockchip_spi *rs)
@@ -251,24 +225,6 @@ static u32 get_fifo_len(struct rockchip_spi *rs)
 	return (fifo == 31) ? 0 : fifo;
 }
 
-static inline u32 tx_max(struct rockchip_spi *rs)
-{
-	u32 tx_left, tx_room;
-
-	tx_left = (rs->tx_end - rs->tx) / rs->n_bytes;
-	tx_room = rs->fifo_len - readl_relaxed(rs->regs + ROCKCHIP_SPI_TXFLR);
-
-	return min(tx_left, tx_room);
-}
-
-static inline u32 rx_max(struct rockchip_spi *rs)
-{
-	u32 rx_left = (rs->rx_end - rs->rx) / rs->n_bytes;
-	u32 rx_room = (u32)readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
-
-	return min(rx_left, rx_room);
-}
-
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct spi_master *master = spi->master;
@@ -296,64 +252,39 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 	rs->cs_asserted[spi->chip_select] = cs_asserted;
 }
 
-static int rockchip_spi_prepare_message(struct spi_master *master,
-					struct spi_message *msg)
-{
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
-	struct spi_device *spi = msg->spi;
-
-	rs->mode = spi->mode;
-
-	return 0;
-}
-
 static void rockchip_spi_handle_err(struct spi_master *master,
 				    struct spi_message *msg)
 {
-	unsigned long flags;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	spin_lock_irqsave(&rs->lock, flags);
-
-	/*
-	 * For DMA mode, we need terminate DMA channel and flush
-	 * fifo for the next transfer if DMA thansfer timeout.
-	 * handle_err() was called by core if transfer failed.
-	 * Maybe it is reasonable for error handling here.
+	/* stop running spi transfer
+	 * this also flushes both rx and tx fifos
 	 */
-	if (rs->use_dma) {
-		if (rs->state & RXBUSY) {
-			dmaengine_terminate_async(rs->dma_rx.ch);
-			flush_fifo(rs);
-		}
+	spi_enable_chip(rs, false);
 
-		if (rs->state & TXBUSY)
-			dmaengine_terminate_async(rs->dma_tx.ch);
-	}
+	/* make sure all interrupts are masked */
+	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
 
-	spin_unlock_irqrestore(&rs->lock, flags);
-}
+	if (atomic_read(&rs->state) & TXDMA)
+		dmaengine_terminate_async(master->dma_tx);
 
-static int rockchip_spi_unprepare_message(struct spi_master *master,
-					  struct spi_message *msg)
-{
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
-
-	spi_enable_chip(rs, 0);
-
-	return 0;
+	if (atomic_read(&rs->state) & RXDMA)
+		dmaengine_terminate_async(master->dma_rx);
 }
 
 static void rockchip_spi_pio_writer(struct rockchip_spi *rs)
 {
-	u32 max = tx_max(rs);
-	u32 txw = 0;
+	u32 tx_free = rs->fifo_len - readl_relaxed(rs->regs + ROCKCHIP_SPI_TXFLR);
+	u32 words = min(rs->tx_left, tx_free);
 
-	while (max--) {
+	rs->tx_left -= words;
+	for (; words; words--) {
+		u32 txw;
+
 		if (rs->n_bytes == 1)
-			txw = *(u8 *)(rs->tx);
+			txw = *(u8 *)rs->tx;
 		else
-			txw = *(u16 *)(rs->tx);
+			txw = *(u16 *)rs->tx;
 
 		writel_relaxed(txw, rs->regs + ROCKCHIP_SPI_TXDR);
 		rs->tx += rs->n_bytes;
@@ -362,229 +293,249 @@ static void rockchip_spi_pio_writer(struct rockchip_spi *rs)
 
 static void rockchip_spi_pio_reader(struct rockchip_spi *rs)
 {
-	u32 max = rx_max(rs);
-	u32 rxw;
+	u32 words = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFLR);
+	u32 rx_left = rs->rx_left - words;
 
-	while (max--) {
-		rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+	/* the hardware doesn't allow us to change fifo threshold
+	 * level while spi is enabled, so instead make sure to leave
+	 * enough words in the rx fifo to get the last interrupt
+	 * exactly when all words have been received
+	 */
+	if (rx_left) {
+		u32 ftl = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXFTLR) + 1;
+
+		if (rx_left < ftl) {
+			rx_left = ftl;
+			words = rs->rx_left - rx_left;
+		}
+	}
+
+	rs->rx_left = rx_left;
+	for (; words; words--) {
+		u32 rxw = readl_relaxed(rs->regs + ROCKCHIP_SPI_RXDR);
+
+		if (!rs->rx)
+			continue;
+
 		if (rs->n_bytes == 1)
-			*(u8 *)(rs->rx) = (u8)rxw;
+			*(u8 *)rs->rx = (u8)rxw;
 		else
-			*(u16 *)(rs->rx) = (u16)rxw;
+			*(u16 *)rs->rx = (u16)rxw;
 		rs->rx += rs->n_bytes;
 	}
 }
 
-static int rockchip_spi_pio_transfer(struct rockchip_spi *rs)
+static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 {
-	int remain = 0;
+	struct spi_master *master = dev_id;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	spi_enable_chip(rs, 1);
+	if (rs->tx_left)
+		rockchip_spi_pio_writer(rs);
 
-	do {
-		if (rs->tx) {
-			remain = rs->tx_end - rs->tx;
-			rockchip_spi_pio_writer(rs);
-		}
+	rockchip_spi_pio_reader(rs);
+	if (!rs->rx_left) {
+		spi_enable_chip(rs, false);
+		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
+		spi_finalize_current_transfer(master);
+	}
 
-		if (rs->rx) {
-			remain = rs->rx_end - rs->rx;
-			rockchip_spi_pio_reader(rs);
-		}
+	return IRQ_HANDLED;
+}
 
-		cpu_relax();
-	} while (remain);
+static int rockchip_spi_prepare_irq(struct rockchip_spi *rs,
+		struct spi_transfer *xfer)
+{
+	rs->tx = xfer->tx_buf;
+	rs->rx = xfer->rx_buf;
+	rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
+	rs->rx_left = xfer->len / rs->n_bytes;
 
-	/* If tx, wait until the FIFO data completely. */
-	if (rs->tx)
-		wait_for_idle(rs);
+	writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+	spi_enable_chip(rs, true);
 
-	spi_enable_chip(rs, 0);
+	if (rs->tx_left)
+		rockchip_spi_pio_writer(rs);
 
-	return 0;
+	/* 1 means the transfer is in progress */
+	return 1;
 }
 
 static void rockchip_spi_dma_rxcb(void *data)
 {
-	unsigned long flags;
-	struct rockchip_spi *rs = data;
+	struct spi_master *master = data;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(RXDMA, &rs->state);
 
-	spin_lock_irqsave(&rs->lock, flags);
+	if (state & TXDMA)
+		return;
 
-	rs->state &= ~RXBUSY;
-	if (!(rs->state & TXBUSY)) {
-		spi_enable_chip(rs, 0);
-		spi_finalize_current_transfer(rs->master);
-	}
-
-	spin_unlock_irqrestore(&rs->lock, flags);
+	spi_enable_chip(rs, false);
+	spi_finalize_current_transfer(master);
 }
 
 static void rockchip_spi_dma_txcb(void *data)
 {
-	unsigned long flags;
-	struct rockchip_spi *rs = data;
+	struct spi_master *master = data;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(TXDMA, &rs->state);
+
+	if (state & RXDMA)
+		return;
 
 	/* Wait until the FIFO data completely. */
 	wait_for_idle(rs);
 
-	spin_lock_irqsave(&rs->lock, flags);
-
-	rs->state &= ~TXBUSY;
-	if (!(rs->state & RXBUSY)) {
-		spi_enable_chip(rs, 0);
-		spi_finalize_current_transfer(rs->master);
-	}
-
-	spin_unlock_irqrestore(&rs->lock, flags);
+	spi_enable_chip(rs, false);
+	spi_finalize_current_transfer(master);
 }
 
-static int rockchip_spi_prepare_dma(struct rockchip_spi *rs)
+static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
+		struct spi_master *master, struct spi_transfer *xfer)
 {
-	unsigned long flags;
-	struct dma_slave_config rxconf, txconf;
 	struct dma_async_tx_descriptor *rxdesc, *txdesc;
 
-	memset(&rxconf, 0, sizeof(rxconf));
-	memset(&txconf, 0, sizeof(txconf));
-
-	spin_lock_irqsave(&rs->lock, flags);
-	rs->state &= ~RXBUSY;
-	rs->state &= ~TXBUSY;
-	spin_unlock_irqrestore(&rs->lock, flags);
+	atomic_set(&rs->state, 0);
 
 	rxdesc = NULL;
-	if (rs->rx) {
-		rxconf.direction = DMA_DEV_TO_MEM;
-		rxconf.src_addr = rs->dma_rx.addr;
-		rxconf.src_addr_width = rs->n_bytes;
-		rxconf.src_maxburst = 1;
-		dmaengine_slave_config(rs->dma_rx.ch, &rxconf);
+	if (xfer->rx_buf) {
+		struct dma_slave_config rxconf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = rs->dma_addr_rx,
+			.src_addr_width = rs->n_bytes,
+			.src_maxburst = 1,
+		};
+
+		dmaengine_slave_config(master->dma_rx, &rxconf);
 
 		rxdesc = dmaengine_prep_slave_sg(
-				rs->dma_rx.ch,
-				rs->rx_sg.sgl, rs->rx_sg.nents,
+				master->dma_rx,
+				xfer->rx_sg.sgl, xfer->rx_sg.nents,
 				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 		if (!rxdesc)
 			return -EINVAL;
 
 		rxdesc->callback = rockchip_spi_dma_rxcb;
-		rxdesc->callback_param = rs;
+		rxdesc->callback_param = master;
 	}
 
 	txdesc = NULL;
-	if (rs->tx) {
-		txconf.direction = DMA_MEM_TO_DEV;
-		txconf.dst_addr = rs->dma_tx.addr;
-		txconf.dst_addr_width = rs->n_bytes;
-		txconf.dst_maxburst = rs->fifo_len / 2;
-		dmaengine_slave_config(rs->dma_tx.ch, &txconf);
+	if (xfer->tx_buf) {
+		struct dma_slave_config txconf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = rs->dma_addr_tx,
+			.dst_addr_width = rs->n_bytes,
+			.dst_maxburst = rs->fifo_len / 2,
+		};
+
+		dmaengine_slave_config(master->dma_tx, &txconf);
 
 		txdesc = dmaengine_prep_slave_sg(
-				rs->dma_tx.ch,
-				rs->tx_sg.sgl, rs->tx_sg.nents,
+				master->dma_tx,
+				xfer->tx_sg.sgl, xfer->tx_sg.nents,
 				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 		if (!txdesc) {
 			if (rxdesc)
-				dmaengine_terminate_sync(rs->dma_rx.ch);
+				dmaengine_terminate_sync(master->dma_rx);
 			return -EINVAL;
 		}
 
 		txdesc->callback = rockchip_spi_dma_txcb;
-		txdesc->callback_param = rs;
+		txdesc->callback_param = master;
 	}
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
-		spin_lock_irqsave(&rs->lock, flags);
-		rs->state |= RXBUSY;
-		spin_unlock_irqrestore(&rs->lock, flags);
+		atomic_or(RXDMA, &rs->state);
 		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(rs->dma_rx.ch);
+		dma_async_issue_pending(master->dma_rx);
 	}
 
-	spi_enable_chip(rs, 1);
+	spi_enable_chip(rs, true);
 
 	if (txdesc) {
-		spin_lock_irqsave(&rs->lock, flags);
-		rs->state |= TXBUSY;
-		spin_unlock_irqrestore(&rs->lock, flags);
+		atomic_or(TXDMA, &rs->state);
 		dmaengine_submit(txdesc);
-		dma_async_issue_pending(rs->dma_tx.ch);
+		dma_async_issue_pending(master->dma_tx);
 	}
 
 	/* 1 means the transfer is in progress */
 	return 1;
 }
 
-static void rockchip_spi_config(struct rockchip_spi *rs)
+static void rockchip_spi_config(struct rockchip_spi *rs,
+		struct spi_device *spi, struct spi_transfer *xfer,
+		bool use_dma)
 {
-	u32 div = 0;
+	u32 cr0 = CR0_FRF_SPI  << CR0_FRF_OFFSET
+	        | CR0_BHT_8BIT << CR0_BHT_OFFSET
+	        | CR0_SSD_ONE  << CR0_SSD_OFFSET
+	        | CR0_EM_BIG   << CR0_EM_OFFSET;
+	u32 cr1;
 	u32 dmacr = 0;
-	int rsd = 0;
 
-	u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
-		| (CR0_SSD_ONE << CR0_SSD_OFFSET)
-		| (CR0_EM_BIG << CR0_EM_OFFSET);
+	cr0 |= rs->rsd << CR0_RSD_OFFSET;
+	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
+	if (spi->mode & SPI_LSB_FIRST)
+		cr0 |= CR0_FBM_LSB << CR0_FBM_OFFSET;
 
-	cr0 |= (rs->n_bytes << CR0_DFS_OFFSET);
-	cr0 |= ((rs->mode & 0x3) << CR0_SCPH_OFFSET);
-	cr0 |= (rs->tmode << CR0_XFM_OFFSET);
-	cr0 |= (rs->type << CR0_FRF_OFFSET);
+	if (xfer->rx_buf && xfer->tx_buf)
+		cr0 |= CR0_XFM_TR << CR0_XFM_OFFSET;
+	else if (xfer->rx_buf)
+		cr0 |= CR0_XFM_RO << CR0_XFM_OFFSET;
+	else if (use_dma)
+		cr0 |= CR0_XFM_TO << CR0_XFM_OFFSET;
 
-	if (rs->use_dma) {
-		if (rs->tx)
+	switch (xfer->bits_per_word) {
+	case 4:
+		cr0 |= CR0_DFS_4BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len - 1;
+		break;
+	case 8:
+		cr0 |= CR0_DFS_8BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len - 1;
+		break;
+	case 16:
+		cr0 |= CR0_DFS_16BIT << CR0_DFS_OFFSET;
+		cr1 = xfer->len / 2 - 1;
+		break;
+	default:
+		/* we only whitelist 4, 8 and 16 bit words in
+		 * master->bits_per_word_mask, so this shouldn't
+		 * happen
+		 */
+		unreachable();
+	}
+
+	if (use_dma) {
+		if (xfer->tx_buf)
 			dmacr |= TF_DMA_EN;
-		if (rs->rx)
+		if (xfer->rx_buf)
 			dmacr |= RF_DMA_EN;
 	}
 
-	if (WARN_ON(rs->speed > MAX_SCLK_OUT))
-		rs->speed = MAX_SCLK_OUT;
-
-	/* the minimum divisor is 2 */
-	if (rs->max_freq < 2 * rs->speed) {
-		clk_set_rate(rs->spiclk, 2 * rs->speed);
-		rs->max_freq = clk_get_rate(rs->spiclk);
-	}
-
-	/* div doesn't support odd number */
-	div = DIV_ROUND_UP(rs->max_freq, rs->speed);
-	div = (div + 1) & 0xfffe;
-
-	/* Rx sample delay is expressed in parent clock cycles (max 3) */
-	rsd = DIV_ROUND_CLOSEST(rs->rsd_nsecs * (rs->max_freq >> 8),
-				1000000000 >> 8);
-	if (!rsd && rs->rsd_nsecs) {
-		pr_warn_once("rockchip-spi: %u Hz are too slow to express %u ns delay\n",
-			     rs->max_freq, rs->rsd_nsecs);
-	} else if (rsd > 3) {
-		rsd = 3;
-		pr_warn_once("rockchip-spi: %u Hz are too fast to express %u ns delay, clamping at %u ns\n",
-			     rs->max_freq, rs->rsd_nsecs,
-			     rsd * 1000000000U / rs->max_freq);
-	}
-	cr0 |= rsd << CR0_RSD_OFFSET;
-
 	writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
+	writel_relaxed(cr1, rs->regs + ROCKCHIP_SPI_CTRLR1);
 
-	if (rs->n_bytes == 1)
-		writel_relaxed(rs->len - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
-	else if (rs->n_bytes == 2)
-		writel_relaxed((rs->len / 2) - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
+	/* unfortunately setting the fifo threshold level to generate an
+	 * interrupt exactly when the fifo is full doesn't seem to work,
+	 * so we need the strict inequality here
+	 */
+	if (xfer->len < rs->fifo_len)
+		writel_relaxed(xfer->len - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 	else
-		writel_relaxed((rs->len * 2) - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
-
-	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_TXFTLR);
-	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
+		writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 
 	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_DMATDLR);
 	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_DMARDLR);
 	writel_relaxed(dmacr, rs->regs + ROCKCHIP_SPI_DMACR);
 
-	spi_set_clk(rs, div);
-
-	dev_dbg(rs->dev, "cr0 0x%x, div %d\n", cr0, div);
+	/* the hardware only supports an even clock divisor, so
+	 * round divisor = spiclk / speed up to nearest even number
+	 * so that the resulting speed is <= the requested speed
+	 */
+	writel_relaxed(2 * DIV_ROUND_UP(rs->freq, 2 * xfer->speed_hz),
+			rs->regs + ROCKCHIP_SPI_BAUDR);
 }
 
 static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
@@ -598,6 +549,7 @@ static int rockchip_spi_transfer_one(
 		struct spi_transfer *xfer)
 {
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	bool use_dma;
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
 		(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
@@ -612,38 +564,16 @@ static int rockchip_spi_transfer_one(
 		return -EINVAL;
 	}
 
-	rs->speed = xfer->speed_hz;
-	rs->bpw = xfer->bits_per_word;
-	rs->n_bytes = rs->bpw >> 3;
+	rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	rs->tx = xfer->tx_buf;
-	rs->tx_end = rs->tx + xfer->len;
-	rs->rx = xfer->rx_buf;
-	rs->rx_end = rs->rx + xfer->len;
-	rs->len = xfer->len;
+	use_dma = master->can_dma ? master->can_dma(master, spi, xfer) : false;
 
-	rs->tx_sg = xfer->tx_sg;
-	rs->rx_sg = xfer->rx_sg;
+	rockchip_spi_config(rs, spi, xfer, use_dma);
 
-	if (rs->tx && rs->rx)
-		rs->tmode = CR0_XFM_TR;
-	else if (rs->tx)
-		rs->tmode = CR0_XFM_TO;
-	else if (rs->rx)
-		rs->tmode = CR0_XFM_RO;
+	if (use_dma)
+		return rockchip_spi_prepare_dma(rs, master, xfer);
 
-	/* we need prepare dma before spi was enabled */
-	if (master->can_dma && master->can_dma(master, spi, xfer))
-		rs->use_dma = true;
-	else
-		rs->use_dma = false;
-
-	rockchip_spi_config(rs);
-
-	if (rs->use_dma)
-		return rockchip_spi_prepare_dma(rs);
-
-	return rockchip_spi_pio_transfer(rs);
+	return rockchip_spi_prepare_irq(rs, xfer);
 }
 
 static bool rockchip_spi_can_dma(struct spi_master *master,
@@ -651,8 +581,13 @@ static bool rockchip_spi_can_dma(struct spi_master *master,
 				 struct spi_transfer *xfer)
 {
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	unsigned int bytes_per_word = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	return (xfer->len > rs->fifo_len);
+	/* if the numbor of spi words to transfer is less than the fifo
+	 * length we can just fill the fifo and wait for a single irq,
+	 * so don't bother setting up dma
+	 */
+	return xfer->len / bytes_per_word >= rs->fifo_len;
 }
 
 static int rockchip_spi_probe(struct platform_device *pdev)
@@ -705,16 +640,36 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 		goto err_disable_apbclk;
 	}
 
-	spi_enable_chip(rs, 0);
+	spi_enable_chip(rs, false);
 
-	rs->type = SSI_MOTO_SPI;
-	rs->master = master;
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_disable_spiclk;
+
+	ret = devm_request_threaded_irq(&pdev->dev, ret, rockchip_spi_isr, NULL,
+			IRQF_ONESHOT, dev_name(&pdev->dev), master);
+	if (ret)
+		goto err_disable_spiclk;
+
 	rs->dev = &pdev->dev;
-	rs->max_freq = clk_get_rate(rs->spiclk);
+	rs->freq = clk_get_rate(rs->spiclk);
 
 	if (!of_property_read_u32(pdev->dev.of_node, "rx-sample-delay-ns",
-				  &rsd_nsecs))
-		rs->rsd_nsecs = rsd_nsecs;
+				  &rsd_nsecs)) {
+		/* rx sample delay is expressed in parent clock cycles (max 3) */
+		u32 rsd = DIV_ROUND_CLOSEST(rsd_nsecs * (rs->freq >> 8),
+				1000000000 >> 8);
+		if (!rsd) {
+			dev_warn(rs->dev, "%u Hz are too slow to express %u ns delay\n",
+					rs->freq, rsd_nsecs);
+		} else if (rsd > CR0_RSD_MAX) {
+			rsd = CR0_RSD_MAX;
+			dev_warn(rs->dev, "%u Hz are too fast to express %u ns delay, clamping at %u ns\n",
+					rs->freq, rsd_nsecs,
+					CR0_RSD_MAX * 1000000000U / rs->freq);
+		}
+		rs->rsd = rsd;
+	}
 
 	rs->fifo_len = get_fifo_len(rs);
 	if (!rs->fifo_len) {
@@ -723,54 +678,49 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 		goto err_disable_spiclk;
 	}
 
-	spin_lock_init(&rs->lock);
-
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
 	master->auto_runtime_pm = true;
 	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
 	master->num_chipselect = ROCKCHIP_SPI_MAX_CS_NUM;
 	master->dev.of_node = pdev->dev.of_node;
-	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
+	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
+	master->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
+	master->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
 
 	master->set_cs = rockchip_spi_set_cs;
-	master->prepare_message = rockchip_spi_prepare_message;
-	master->unprepare_message = rockchip_spi_unprepare_message;
 	master->transfer_one = rockchip_spi_transfer_one;
 	master->max_transfer_size = rockchip_spi_max_transfer_size;
 	master->handle_err = rockchip_spi_handle_err;
 	master->flags = SPI_MASTER_GPIO_SS;
 
-	rs->dma_tx.ch = dma_request_chan(rs->dev, "tx");
-	if (IS_ERR(rs->dma_tx.ch)) {
+	master->dma_tx = dma_request_chan(rs->dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
 		/* Check tx to see if we need defer probing driver */
-		if (PTR_ERR(rs->dma_tx.ch) == -EPROBE_DEFER) {
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_disable_pm_runtime;
 		}
 		dev_warn(rs->dev, "Failed to request TX DMA channel\n");
-		rs->dma_tx.ch = NULL;
+		master->dma_tx = NULL;
 	}
 
-	rs->dma_rx.ch = dma_request_chan(rs->dev, "rx");
-	if (IS_ERR(rs->dma_rx.ch)) {
-		if (PTR_ERR(rs->dma_rx.ch) == -EPROBE_DEFER) {
+	master->dma_rx = dma_request_chan(rs->dev, "rx");
+	if (IS_ERR(master->dma_rx)) {
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_free_dma_tx;
 		}
 		dev_warn(rs->dev, "Failed to request RX DMA channel\n");
-		rs->dma_rx.ch = NULL;
+		master->dma_rx = NULL;
 	}
 
-	if (rs->dma_tx.ch && rs->dma_rx.ch) {
-		rs->dma_tx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_TXDR);
-		rs->dma_rx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_RXDR);
-
+	if (master->dma_tx && master->dma_rx) {
+		rs->dma_addr_tx = mem->start + ROCKCHIP_SPI_TXDR;
+		rs->dma_addr_rx = mem->start + ROCKCHIP_SPI_RXDR;
 		master->can_dma = rockchip_spi_can_dma;
-		master->dma_tx = rs->dma_tx.ch;
-		master->dma_rx = rs->dma_rx.ch;
 	}
 
 	ret = devm_spi_register_master(&pdev->dev, master);
@@ -782,11 +732,11 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	return 0;
 
 err_free_dma_rx:
-	if (rs->dma_rx.ch)
-		dma_release_channel(rs->dma_rx.ch);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 err_free_dma_tx:
-	if (rs->dma_tx.ch)
-		dma_release_channel(rs->dma_tx.ch);
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
 err_disable_pm_runtime:
 	pm_runtime_disable(&pdev->dev);
 err_disable_spiclk:
@@ -813,10 +763,10 @@ static int rockchip_spi_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	if (rs->dma_tx.ch)
-		dma_release_channel(rs->dma_tx.ch);
-	if (rs->dma_rx.ch)
-		dma_release_channel(rs->dma_rx.ch);
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 
 	spi_master_put(master);
 
@@ -828,9 +778,8 @@ static int rockchip_spi_suspend(struct device *dev)
 {
 	int ret;
 	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
-	ret = spi_master_suspend(rs->master);
+	ret = spi_master_suspend(master);
 	if (ret < 0)
 		return ret;
 
@@ -855,7 +804,7 @@ static int rockchip_spi_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 
-	ret = spi_master_resume(rs->master);
+	ret = spi_master_resume(master);
 	if (ret < 0) {
 		clk_disable_unprepare(rs->spiclk);
 		clk_disable_unprepare(rs->apb_pclk);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 55f8e55..a4ef641 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1347,16 +1347,14 @@ MODULE_DEVICE_TABLE(platform, spi_driver_ids);
 #ifdef CONFIG_PM_SLEEP
 static int rspi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rspi_data *rspi = platform_get_drvdata(pdev);
+	struct rspi_data *rspi = dev_get_drvdata(dev);
 
 	return spi_master_suspend(rspi->master);
 }
 
 static int rspi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rspi_data *rspi = platform_get_drvdata(pdev);
+	struct rspi_data *rspi = dev_get_drvdata(dev);
 
 	return spi_master_resume(rspi->master);
 }
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index adf3843..d14b407 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -977,7 +977,7 @@ static int sh_msiof_transfer_one(struct spi_master *master,
 			return 0;
 	}
 
-	if (bits <= 8 && len > 15 && !(len & 3)) {
+	if (bits <= 8 && len > 15) {
 		bits = 32;
 		swab = true;
 	} else {
@@ -1038,6 +1038,14 @@ static int sh_msiof_transfer_one(struct spi_master *master,
 		if (rx_buf)
 			rx_buf += n * bytes_per_word;
 		words -= n;
+
+		if (words == 0 && (len % bytes_per_word)) {
+			words = len % bytes_per_word;
+			bits = t->bits_per_word;
+			bytes_per_word = 1;
+			tx_fifo = sh_msiof_spi_write_fifo_8;
+			rx_fifo = sh_msiof_spi_read_fifo_8;
+		}
 	}
 
 	return 0;
@@ -1426,16 +1434,14 @@ MODULE_DEVICE_TABLE(platform, spi_driver_ids);
 #ifdef CONFIG_PM_SLEEP
 static int sh_msiof_spi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+	struct sh_msiof_spi_priv *p = dev_get_drvdata(dev);
 
 	return spi_master_suspend(p->master);
 }
 
 static int sh_msiof_spi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+	struct sh_msiof_spi_priv *p = dev_get_drvdata(dev);
 
 	return spi_master_resume(p->master);
 }
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index cc4d310..9f83e1b 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -960,8 +960,7 @@ static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
  */
 static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 
 	clk_disable(xqspi->refclk);
@@ -980,8 +979,7 @@ static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
  */
 static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 	int ret;
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 6ca5940..9a7def7 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1,10 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SPI init/core code
- *
- * Copyright (C) 2005 David Brownell
- * Copyright (C) 2008 Secret Lab Technologies Ltd.
- */
+// SPI init/core code
+//
+// Copyright (C) 2005 David Brownell
+// Copyright (C) 2008 Secret Lab Technologies Ltd.
 
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -1037,6 +1035,42 @@ static int spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 	return __spi_map_msg(ctlr, msg);
 }
 
+static int spi_transfer_wait(struct spi_controller *ctlr,
+			     struct spi_message *msg,
+			     struct spi_transfer *xfer)
+{
+	struct spi_statistics *statm = &ctlr->statistics;
+	struct spi_statistics *stats = &msg->spi->statistics;
+	unsigned long long ms = 1;
+
+	if (spi_controller_is_slave(ctlr)) {
+		if (wait_for_completion_interruptible(&ctlr->xfer_completion)) {
+			dev_dbg(&msg->spi->dev, "SPI transfer interrupted\n");
+			return -EINTR;
+		}
+	} else {
+		ms = 8LL * 1000LL * xfer->len;
+		do_div(ms, xfer->speed_hz);
+		ms += ms + 200; /* some tolerance */
+
+		if (ms > UINT_MAX)
+			ms = UINT_MAX;
+
+		ms = wait_for_completion_timeout(&ctlr->xfer_completion,
+						 msecs_to_jiffies(ms));
+
+		if (ms == 0) {
+			SPI_STATISTICS_INCREMENT_FIELD(statm, timedout);
+			SPI_STATISTICS_INCREMENT_FIELD(stats, timedout);
+			dev_err(&msg->spi->dev,
+				"SPI transfer timed out\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * spi_transfer_one_message - Default implementation of transfer_one_message()
  *
@@ -1050,7 +1084,6 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
 	struct spi_transfer *xfer;
 	bool keep_cs = false;
 	int ret = 0;
-	unsigned long long ms = 1;
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
 
@@ -1080,26 +1113,9 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
 			}
 
 			if (ret > 0) {
-				ret = 0;
-				ms = 8LL * 1000LL * xfer->len;
-				do_div(ms, xfer->speed_hz);
-				ms += ms + 200; /* some tolerance */
-
-				if (ms > UINT_MAX)
-					ms = UINT_MAX;
-
-				ms = wait_for_completion_timeout(&ctlr->xfer_completion,
-								 msecs_to_jiffies(ms));
-			}
-
-			if (ms == 0) {
-				SPI_STATISTICS_INCREMENT_FIELD(statm,
-							       timedout);
-				SPI_STATISTICS_INCREMENT_FIELD(stats,
-							       timedout);
-				dev_err(&msg->spi->dev,
-					"SPI transfer timed out\n");
-				msg->status = -ETIMEDOUT;
+				ret = spi_transfer_wait(ctlr, msg, xfer);
+				if (ret < 0)
+					msg->status = ret;
 			}
 		} else {
 			if (xfer->len)
@@ -1617,6 +1633,9 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
 		case 4:
 			spi->mode |= SPI_TX_QUAD;
 			break;
+		case 8:
+			spi->mode |= SPI_TX_OCTAL;
+			break;
 		default:
 			dev_warn(&ctlr->dev,
 				"spi-tx-bus-width %d not supported\n",
@@ -1635,6 +1654,9 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
 		case 4:
 			spi->mode |= SPI_RX_QUAD;
 			break;
+		case 8:
+			spi->mode |= SPI_RX_OCTAL;
+			break;
 		default:
 			dev_warn(&ctlr->dev,
 				"spi-rx-bus-width %d not supported\n",
@@ -1644,7 +1666,7 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
 	}
 
 	if (spi_controller_is_slave(ctlr)) {
-		if (strcmp(nc->name, "slave")) {
+		if (!of_node_name_eq(nc, "slave")) {
 			dev_err(&ctlr->dev, "%pOF is not called 'slave'\n",
 				nc);
 			return -EINVAL;
@@ -2823,7 +2845,8 @@ int spi_setup(struct spi_device *spi)
 	/* if it is SPI_3WIRE mode, DUAL and QUAD should be forbidden
 	 */
 	if ((spi->mode & SPI_3WIRE) && (spi->mode &
-		(SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)))
+		(SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
+		 SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL)))
 		return -EINVAL;
 	/* help drivers fail *cleanly* when they need options
 	 * that aren't supported with their current controller
@@ -2832,7 +2855,8 @@ int spi_setup(struct spi_device *spi)
 	 */
 	bad_bits = spi->mode & ~(spi->controller->mode_bits | SPI_CS_WORD);
 	ugly_bits = bad_bits &
-		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD);
+		    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_TX_OCTAL |
+		     SPI_RX_DUAL | SPI_RX_QUAD | SPI_RX_OCTAL);
 	if (ugly_bits) {
 		dev_warn(&spi->dev,
 			 "setup: ignoring unsupported mode bits %x\n",
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 9ec4c14..b0674e3 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -25,6 +25,7 @@ struct dma_chan;
 struct pxa2xx_spi_master {
 	u16 num_chipselect;
 	u8 enable_dma;
+	bool is_slave;
 
 	/* DMA engine specific config */
 	bool (*dma_filter)(struct dma_chan *chan, void *param);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 6be77fa..314d922 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -154,7 +154,10 @@ struct spi_device {
 #define	SPI_TX_QUAD	0x200			/* transmit with 4 wires */
 #define	SPI_RX_DUAL	0x400			/* receive with 2 wires */
 #define	SPI_RX_QUAD	0x800			/* receive with 4 wires */
-#define SPI_CS_WORD	0x1000			/* toggle cs after each word */
+#define	SPI_CS_WORD	0x1000			/* toggle cs after each word */
+#define	SPI_TX_OCTAL	0x2000			/* transmit with 8 wires */
+#define	SPI_RX_OCTAL	0x4000			/* receive with 8 wires */
+#define	SPI_3WIRE_HIZ	0x8000			/* high impedance turnaround */
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;