Merge branch 'topic/pl330' into for-linus
diff --git a/Documentation/devicetree/bindings/dma/owl-dma.txt b/Documentation/devicetree/bindings/dma/owl-dma.txt
new file mode 100644
index 0000000..03e9bb12
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/owl-dma.txt
@@ -0,0 +1,47 @@
+* Actions Semi Owl SoCs DMA controller
+
+This binding follows the generic DMA bindings defined in dma.txt.
+
+Required properties:
+- compatible: Should be "actions,s900-dma".
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain 4 interrupts shared by all channel.
+- #dma-cells: Must be <1>. Used to represent the number of integer
+              cells in the dmas property of client device.
+- dma-channels: Physical channels supported.
+- dma-requests: Number of DMA request signals supported by the controller.
+                Refer to Documentation/devicetree/bindings/dma/dma.txt
+- clocks: Phandle and Specifier of the clock feeding the DMA controller.
+
+Example:
+
+Controller:
+                dma: dma-controller@e0260000 {
+                        compatible = "actions,s900-dma";
+                        reg = <0x0 0xe0260000 0x0 0x1000>;
+                        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                                     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+                                     <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>,
+                                     <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>;
+                        #dma-cells = <1>;
+                        dma-channels = <12>;
+                        dma-requests = <46>;
+                        clocks = <&clock CLK_DMAC>;
+                };
+
+Client:
+
+DMA clients connected to the Actions Semi Owl SoCs DMA controller must
+use the format described in the dma.txt file, using a two-cell specifier
+for each channel.
+
+The two cells in order are:
+1. A phandle pointing to the DMA controller.
+2. The channel id.
+
+uart5: serial@e012a000 {
+        ...
+        dma-names = "tx", "rx";
+        dmas = <&dma 26>, <&dma 27>;
+        ...
+};
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index b1ba6395..946229c 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -29,6 +29,7 @@
 		- "renesas,dmac-r8a77965" (R-Car M3-N)
 		- "renesas,dmac-r8a77970" (R-Car V3M)
 		- "renesas,dmac-r8a77980" (R-Car V3H)
+		- "renesas,dmac-r8a77990" (R-Car E3)
 		- "renesas,dmac-r8a77995" (R-Car D3)
 
 - reg: base address and length of the registers block for the DMAC
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
index a2b8bfa..174af2c 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
@@ -66,6 +66,8 @@
 Optional child node properties for VDMA:
 - xlnx,genlock-mode: Tells Genlock synchronization is
 	enabled/disabled in hardware.
+- xlnx,enable-vert-flip: Tells vertical flip is
+	enabled/disabled in hardware(S2MM path).
 Optional child node properties for AXI DMA:
 -dma-channels: Number of dma channels in child node.
 
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 7c1bb3d..43681ca 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -240,6 +240,7 @@
   devm_of_clk_add_hw_provider()
 
 DMA
+  dmaenginem_async_device_register()
   dmam_alloc_coherent()
   dmam_alloc_attrs()
   dmam_declare_coherent_memory()
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 56bd612..80dc567 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -42,6 +42,8 @@ static struct page *pq_scribble_page;
 #define P(b, d) (b[d-2])
 #define Q(b, d) (b[d-1])
 
+#define MAX_DISKS 255
+
 /**
  * do_async_gen_syndrome - asynchronously calculate P and/or Q
  */
@@ -184,7 +186,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dmaengine_unmap_data *unmap = NULL;
 
-	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+	BUG_ON(disks > MAX_DISKS || !(P(blocks, disks) || Q(blocks, disks)));
 
 	if (device)
 		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
@@ -196,7 +198,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = 0;
-		unsigned char coefs[src_cnt];
+		unsigned char coefs[MAX_DISKS];
 		int i, j;
 
 		/* run the p+q asynchronously */
@@ -299,11 +301,11 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx;
-	unsigned char coefs[disks-2];
+	unsigned char coefs[MAX_DISKS];
 	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 	struct dmaengine_unmap_data *unmap = NULL;
 
-	BUG_ON(disks < 4);
+	BUG_ON(disks < 4 || disks > MAX_DISKS);
 
 	if (device)
 		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index dad95f4..a5edaab 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -81,11 +81,13 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
 			init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
 			tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
 		} else {
-			struct page *blocks[disks];
+			struct page *blocks[NDISKS];
 			struct page *dest;
 			int count = 0;
 			int i;
 
+			BUG_ON(disks > NDISKS);
+
 			/* data+Q failure.  Reconstruct data from P,
 			 * then rebuild syndrome
 			 */
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index ca1680a..dacf3f4 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -250,6 +250,7 @@
 	tristate "i.MX SDMA support"
 	depends on ARCH_MXC
 	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
 	help
 	  Support the i.MX SDMA engine. This engine is integrated into
 	  Freescale i.MX25/31/35/51/53/6 chips.
@@ -413,6 +414,14 @@
 	help
 	  Support for "Type-AXI" NBPF DMA IPs from Renesas
 
+config OWL_DMA
+	tristate "Actions Semi Owl SoCs DMA support"
+	depends on ARCH_ACTIONS
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Actions Semi Owl SoCs DMA controller.
+
 config PCH_DMA
 	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
 	depends on PCI && (X86_32 || COMPILE_TEST)
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 203a99d..c91702d 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -52,6 +52,7 @@
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
+obj-$(CONFIG_OWL_DMA) += owl-dma.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 08ba847..272bed6 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -500,12 +500,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 	caps->max_burst = device->max_burst;
 	caps->residue_granularity = device->residue_granularity;
 	caps->descriptor_reuse = device->descriptor_reuse;
-
-	/*
-	 * Some devices implement only pause (e.g. to get residuum) but no
-	 * resume. However cmd_pause is advertised as pause AND resume.
-	 */
-	caps->cmd_pause = !!(device->device_pause && device->device_resume);
+	caps->cmd_pause = !!device->device_pause;
+	caps->cmd_resume = !!device->device_resume;
 	caps->cmd_terminate = !!device->device_terminate_all;
 
 	return 0;
@@ -774,8 +770,14 @@ struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
 		return ERR_PTR(-ENODEV);
 
 	chan = __dma_request_channel(mask, NULL, NULL);
-	if (!chan)
-		chan = ERR_PTR(-ENODEV);
+	if (!chan) {
+		mutex_lock(&dma_list_mutex);
+		if (list_empty(&dma_device_list))
+			chan = ERR_PTR(-EPROBE_DEFER);
+		else
+			chan = ERR_PTR(-ENODEV);
+		mutex_unlock(&dma_list_mutex);
+	}
 
 	return chan;
 }
@@ -1139,6 +1141,41 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
+static void dmam_device_release(struct device *dev, void *res)
+{
+	struct dma_device *device;
+
+	device = *(struct dma_device **)res;
+	dma_async_device_unregister(device);
+}
+
+/**
+ * dmaenginem_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ *
+ * The operation is managed and will be undone on driver detach.
+ */
+int dmaenginem_async_device_register(struct dma_device *device)
+{
+	void *p;
+	int ret;
+
+	p = devres_alloc(dmam_device_release, sizeof(void *), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = dma_async_device_register(device);
+	if (!ret) {
+		*(struct dma_device **)p = device;
+		devres_add(device->dev, p);
+	} else {
+		devres_free(p);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(dmaenginem_async_device_register);
+
 struct dmaengine_unmap_pool {
 	struct kmem_cache *cache;
 	const char *name;
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 29d04ca..202ffa9 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -413,6 +413,13 @@ static void hsu_dma_free_chan_resources(struct dma_chan *chan)
 	vchan_free_chan_resources(to_virt_chan(chan));
 }
 
+static void hsu_dma_synchronize(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+
+	vchan_synchronize(&hsuc->vchan);
+}
+
 int hsu_dma_probe(struct hsu_dma_chip *chip)
 {
 	struct hsu_dma *hsu;
@@ -459,6 +466,7 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 	hsu->dma.device_pause = hsu_dma_pause;
 	hsu->dma.device_resume = hsu_dma_resume;
 	hsu->dma.device_terminate_all = hsu_dma_terminate_all;
+	hsu->dma.device_synchronize = hsu_dma_synchronize;
 
 	hsu->dma.src_addr_widths = HSU_DMA_BUSWIDTHS;
 	hsu->dma.dst_addr_widths = HSU_DMA_BUSWIDTHS;
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index e5c9112..1fbf9cb 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -496,6 +496,13 @@ static int idma64_terminate_all(struct dma_chan *chan)
 	return 0;
 }
 
+static void idma64_synchronize(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	vchan_synchronize(&idma64c->vchan);
+}
+
 static int idma64_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct idma64_chan *idma64c = to_idma64_chan(chan);
@@ -583,6 +590,7 @@ static int idma64_probe(struct idma64_chip *chip)
 	idma64->dma.device_pause = idma64_pause;
 	idma64->dma.device_resume = idma64_resume;
 	idma64->dma.device_terminate_all = idma64_terminate_all;
+	idma64->dma.device_synchronize = idma64_synchronize;
 
 	idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS;
 	idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index f0779926..b4ec2d2 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
@@ -41,6 +42,7 @@
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 
 #include "dmaengine.h"
+#include "virt-dma.h"
 
 /* SDMA registers */
 #define SDMA_H_C0PTR		0x000
@@ -183,6 +185,7 @@
  * Mode/Count of data node descriptors - IPCv2
  */
 struct sdma_mode_count {
+#define SDMA_BD_MAX_CNT	0xffff
 	u32 count   : 16; /* size of the buffer pointed by this BD */
 	u32 status  :  8; /* E,R,I,C,W,D status bits stored here */
 	u32 command :  8; /* command mostly used for channel 0 */
@@ -200,9 +203,9 @@ struct sdma_buffer_descriptor {
 /**
  * struct sdma_channel_control - Channel control Block
  *
- * @current_bd_ptr	current buffer descriptor processed
- * @base_bd_ptr		first element of buffer descriptor array
- * @unused		padding. The SDMA engine expects an array of 128 byte
+ * @current_bd_ptr:	current buffer descriptor processed
+ * @base_bd_ptr:	first element of buffer descriptor array
+ * @unused:		padding. The SDMA engine expects an array of 128 byte
  *			control blocks
  */
 struct sdma_channel_control {
@@ -215,10 +218,13 @@ struct sdma_channel_control {
  * struct sdma_state_registers - SDMA context for a channel
  *
  * @pc:		program counter
+ * @unused1:	unused
  * @t:		test bit: status of arithmetic & test instruction
  * @rpc:	return program counter
+ * @unused0:	unused
  * @sf:		source fault while loading data
  * @spc:	loop start program counter
+ * @unused2:	unused
  * @df:		destination fault while storing data
  * @epc:	loop end program counter
  * @lm:		loop mode
@@ -256,6 +262,14 @@ struct sdma_state_registers {
  * @dsa:		dedicated core source address register
  * @ds:			dedicated core status register
  * @dd:			dedicated core data register
+ * @scratch0:		1st word of dedicated ram for context switch
+ * @scratch1:		2nd word of dedicated ram for context switch
+ * @scratch2:		3rd word of dedicated ram for context switch
+ * @scratch3:		4th word of dedicated ram for context switch
+ * @scratch4:		5th word of dedicated ram for context switch
+ * @scratch5:		6th word of dedicated ram for context switch
+ * @scratch6:		7th word of dedicated ram for context switch
+ * @scratch7:		8th word of dedicated ram for context switch
  */
 struct sdma_context_data {
 	struct sdma_state_registers  channel_state;
@@ -284,25 +298,67 @@ struct sdma_context_data {
 	u32  scratch7;
 } __attribute__ ((packed));
 
-#define NUM_BD (int)(PAGE_SIZE / sizeof(struct sdma_buffer_descriptor))
 
 struct sdma_engine;
 
 /**
+ * struct sdma_desc - descriptor structor for one transfer
+ * @vd:			descriptor for virt dma
+ * @num_bd:		number of descriptors currently handling
+ * @bd_phys:		physical address of bd
+ * @buf_tail:		ID of the buffer that was processed
+ * @buf_ptail:		ID of the previous buffer that was processed
+ * @period_len:		period length, used in cyclic.
+ * @chn_real_count:	the real count updated from bd->mode.count
+ * @chn_count:		the transfer count set
+ * @sdmac:		sdma_channel pointer
+ * @bd:			pointer of allocate bd
+ */
+struct sdma_desc {
+	struct virt_dma_desc	vd;
+	unsigned int		num_bd;
+	dma_addr_t		bd_phys;
+	unsigned int		buf_tail;
+	unsigned int		buf_ptail;
+	unsigned int		period_len;
+	unsigned int		chn_real_count;
+	unsigned int		chn_count;
+	struct sdma_channel	*sdmac;
+	struct sdma_buffer_descriptor *bd;
+};
+
+/**
  * struct sdma_channel - housekeeping for a SDMA channel
  *
- * @sdma		pointer to the SDMA engine for this channel
- * @channel		the channel number, matches dmaengine chan_id + 1
- * @direction		transfer type. Needed for setting SDMA script
- * @peripheral_type	Peripheral type. Needed for setting SDMA script
- * @event_id0		aka dma request line
- * @event_id1		for channels that use 2 events
- * @word_size		peripheral access size
- * @buf_tail		ID of the buffer that was processed
- * @buf_ptail		ID of the previous buffer that was processed
- * @num_bd		max NUM_BD. number of descriptors currently handling
+ * @vc:			virt_dma base structure
+ * @desc:		sdma description including vd and other special member
+ * @sdma:		pointer to the SDMA engine for this channel
+ * @channel:		the channel number, matches dmaengine chan_id + 1
+ * @direction:		transfer type. Needed for setting SDMA script
+ * @peripheral_type:	Peripheral type. Needed for setting SDMA script
+ * @event_id0:		aka dma request line
+ * @event_id1:		for channels that use 2 events
+ * @word_size:		peripheral access size
+ * @pc_from_device:	script address for those device_2_memory
+ * @pc_to_device:	script address for those memory_2_device
+ * @device_to_device:	script address for those device_2_device
+ * @pc_to_pc:		script address for those memory_2_memory
+ * @flags:		loop mode or not
+ * @per_address:	peripheral source or destination address in common case
+ *                      destination address in p_2_p case
+ * @per_address2:	peripheral source address in p_2_p case
+ * @event_mask:		event mask used in p_2_p script
+ * @watermark_level:	value for gReg[7], some script will extend it from
+ *			basic watermark such as p_2_p
+ * @shp_addr:		value for gReg[6]
+ * @per_addr:		value for gReg[2]
+ * @status:		status of dma channel
+ * @data:		specific sdma interface structure
+ * @bd_pool:		dma_pool for bd
  */
 struct sdma_channel {
+	struct virt_dma_chan		vc;
+	struct sdma_desc		*desc;
 	struct sdma_engine		*sdma;
 	unsigned int			channel;
 	enum dma_transfer_direction		direction;
@@ -310,28 +366,17 @@ struct sdma_channel {
 	unsigned int			event_id0;
 	unsigned int			event_id1;
 	enum dma_slave_buswidth		word_size;
-	unsigned int			buf_tail;
-	unsigned int			buf_ptail;
-	unsigned int			num_bd;
-	unsigned int			period_len;
-	struct sdma_buffer_descriptor	*bd;
-	dma_addr_t			bd_phys;
 	unsigned int			pc_from_device, pc_to_device;
 	unsigned int			device_to_device;
+	unsigned int                    pc_to_pc;
 	unsigned long			flags;
 	dma_addr_t			per_address, per_address2;
 	unsigned long			event_mask[2];
 	unsigned long			watermark_level;
 	u32				shp_addr, per_addr;
-	struct dma_chan			chan;
-	spinlock_t			lock;
-	struct dma_async_tx_descriptor	desc;
 	enum dma_status			status;
-	unsigned int			chn_count;
-	unsigned int			chn_real_count;
-	struct tasklet_struct		tasklet;
 	struct imx_dma_data		data;
-	bool				enabled;
+	struct dma_pool			*bd_pool;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -346,15 +391,15 @@ struct sdma_channel {
 /**
  * struct sdma_firmware_header - Layout of the firmware image
  *
- * @magic		"SDMA"
- * @version_major	increased whenever layout of struct sdma_script_start_addrs
- *			changes.
- * @version_minor	firmware minor version (for binary compatible changes)
- * @script_addrs_start	offset of struct sdma_script_start_addrs in this image
- * @num_script_addrs	Number of script addresses in this image
- * @ram_code_start	offset of SDMA ram image in this firmware image
- * @ram_code_size	size of SDMA ram image
- * @script_addrs	Stores the start address of the SDMA scripts
+ * @magic:		"SDMA"
+ * @version_major:	increased whenever layout of struct
+ *			sdma_script_start_addrs changes.
+ * @version_minor:	firmware minor version (for binary compatible changes)
+ * @script_addrs_start:	offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs:	Number of script addresses in this image
+ * @ram_code_start:	offset of SDMA ram image in this firmware image
+ * @ram_code_size:	size of SDMA ram image
+ * @script_addrs:	Stores the start address of the SDMA scripts
  *			(in SDMA memory space)
  */
 struct sdma_firmware_header {
@@ -391,6 +436,8 @@ struct sdma_engine {
 	u32				spba_start_addr;
 	u32				spba_end_addr;
 	unsigned int			irq;
+	dma_addr_t			bd0_phys;
+	struct sdma_buffer_descriptor	*bd0;
 };
 
 static struct sdma_driver_data sdma_imx31 = {
@@ -590,14 +637,7 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
-	unsigned long flags;
-	struct sdma_channel *sdmac = &sdma->channel[channel];
-
 	writel(BIT(channel), sdma->regs + SDMA_H_START);
-
-	spin_lock_irqsave(&sdmac->lock, flags);
-	sdmac->enabled = true;
-	spin_unlock_irqrestore(&sdmac->lock, flags);
 }
 
 /*
@@ -625,7 +665,7 @@ static int sdma_run_channel0(struct sdma_engine *sdma)
 static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
 		u32 address)
 {
-	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	struct sdma_buffer_descriptor *bd0 = sdma->bd0;
 	void *buf_virt;
 	dma_addr_t buf_phys;
 	int ret;
@@ -681,26 +721,49 @@ static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
 	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
+static struct sdma_desc *to_sdma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct sdma_desc, vd.tx);
+}
+
+static void sdma_start_desc(struct sdma_channel *sdmac)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&sdmac->vc);
+	struct sdma_desc *desc;
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (!vd) {
+		sdmac->desc = NULL;
+		return;
+	}
+	sdmac->desc = desc = to_sdma_desc(&vd->tx);
+	/*
+	 * Do not delete the node in desc_issued list in cyclic mode, otherwise
+	 * the desc allocated will never be freed in vchan_dma_desc_free_list
+	 */
+	if (!(sdmac->flags & IMX_DMA_SG_LOOP))
+		list_del(&vd->node);
+
+	sdma->channel_control[channel].base_bd_ptr = desc->bd_phys;
+	sdma->channel_control[channel].current_bd_ptr = desc->bd_phys;
+	sdma_enable_channel(sdma, sdmac->channel);
+}
+
 static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 {
 	struct sdma_buffer_descriptor *bd;
 	int error = 0;
 	enum dma_status	old_status = sdmac->status;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sdmac->lock, flags);
-	if (!sdmac->enabled) {
-		spin_unlock_irqrestore(&sdmac->lock, flags);
-		return;
-	}
-	spin_unlock_irqrestore(&sdmac->lock, flags);
 
 	/*
 	 * loop mode. Iterate over descriptors, re-setup them and
 	 * call callback function.
 	 */
-	while (1) {
-		bd = &sdmac->bd[sdmac->buf_tail];
+	while (sdmac->desc) {
+		struct sdma_desc *desc = sdmac->desc;
+
+		bd = &desc->bd[desc->buf_tail];
 
 		if (bd->mode.status & BD_DONE)
 			break;
@@ -716,11 +779,11 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 		* the number of bytes present in the current buffer descriptor.
 		*/
 
-		sdmac->chn_real_count = bd->mode.count;
+		desc->chn_real_count = bd->mode.count;
 		bd->mode.status |= BD_DONE;
-		bd->mode.count = sdmac->period_len;
-		sdmac->buf_ptail = sdmac->buf_tail;
-		sdmac->buf_tail = (sdmac->buf_tail + 1) % sdmac->num_bd;
+		bd->mode.count = desc->period_len;
+		desc->buf_ptail = desc->buf_tail;
+		desc->buf_tail = (desc->buf_tail + 1) % desc->num_bd;
 
 		/*
 		 * The callback is called from the interrupt context in order
@@ -728,41 +791,38 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 		 * SDMA transaction status by the time the client tasklet is
 		 * executed.
 		 */
-
-		dmaengine_desc_get_callback_invoke(&sdmac->desc, NULL);
+		spin_unlock(&sdmac->vc.lock);
+		dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL);
+		spin_lock(&sdmac->vc.lock);
 
 		if (error)
 			sdmac->status = old_status;
 	}
 }
 
-static void mxc_sdma_handle_channel_normal(unsigned long data)
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
 {
 	struct sdma_channel *sdmac = (struct sdma_channel *) data;
 	struct sdma_buffer_descriptor *bd;
 	int i, error = 0;
 
-	sdmac->chn_real_count = 0;
+	sdmac->desc->chn_real_count = 0;
 	/*
 	 * non loop mode. Iterate over all descriptors, collect
 	 * errors and call callback function
 	 */
-	for (i = 0; i < sdmac->num_bd; i++) {
-		bd = &sdmac->bd[i];
+	for (i = 0; i < sdmac->desc->num_bd; i++) {
+		bd = &sdmac->desc->bd[i];
 
 		 if (bd->mode.status & (BD_DONE | BD_RROR))
 			error = -EIO;
-		 sdmac->chn_real_count += bd->mode.count;
+		 sdmac->desc->chn_real_count += bd->mode.count;
 	}
 
 	if (error)
 		sdmac->status = DMA_ERROR;
 	else
 		sdmac->status = DMA_COMPLETE;
-
-	dma_cookie_complete(&sdmac->desc);
-
-	dmaengine_desc_get_callback_invoke(&sdmac->desc, NULL);
 }
 
 static irqreturn_t sdma_int_handler(int irq, void *dev_id)
@@ -778,12 +838,21 @@ static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 	while (stat) {
 		int channel = fls(stat) - 1;
 		struct sdma_channel *sdmac = &sdma->channel[channel];
+		struct sdma_desc *desc;
 
-		if (sdmac->flags & IMX_DMA_SG_LOOP)
-			sdma_update_channel_loop(sdmac);
-		else
-			tasklet_schedule(&sdmac->tasklet);
+		spin_lock(&sdmac->vc.lock);
+		desc = sdmac->desc;
+		if (desc) {
+			if (sdmac->flags & IMX_DMA_SG_LOOP) {
+				sdma_update_channel_loop(sdmac);
+			} else {
+				mxc_sdma_handle_channel_normal(sdmac);
+				vchan_cookie_complete(&desc->vd);
+				sdma_start_desc(sdmac);
+			}
+		}
 
+		spin_unlock(&sdmac->vc.lock);
 		__clear_bit(channel, &stat);
 	}
 
@@ -802,14 +871,16 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 	 * These are needed once we start to support transfers between
 	 * two peripherals or memory-to-memory transfers
 	 */
-	int per_2_per = 0;
+	int per_2_per = 0, emi_2_emi = 0;
 
 	sdmac->pc_from_device = 0;
 	sdmac->pc_to_device = 0;
 	sdmac->device_to_device = 0;
+	sdmac->pc_to_pc = 0;
 
 	switch (peripheral_type) {
 	case IMX_DMATYPE_MEMORY:
+		emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
 		break;
 	case IMX_DMATYPE_DSP:
 		emi_2_per = sdma->script_addrs->bp_2_ap_addr;
@@ -882,6 +953,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 	sdmac->pc_from_device = per_2_emi;
 	sdmac->pc_to_device = emi_2_per;
 	sdmac->device_to_device = per_2_per;
+	sdmac->pc_to_pc = emi_2_emi;
 }
 
 static int sdma_load_context(struct sdma_channel *sdmac)
@@ -890,7 +962,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	int channel = sdmac->channel;
 	int load_address;
 	struct sdma_context_data *context = sdma->context;
-	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
+	struct sdma_buffer_descriptor *bd0 = sdma->bd0;
 	int ret;
 	unsigned long flags;
 
@@ -898,6 +970,8 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 		load_address = sdmac->pc_from_device;
 	else if (sdmac->direction == DMA_DEV_TO_DEV)
 		load_address = sdmac->device_to_device;
+	else if (sdmac->direction == DMA_MEM_TO_MEM)
+		load_address = sdmac->pc_to_pc;
 	else
 		load_address = sdmac->pc_to_device;
 
@@ -939,7 +1013,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 
 static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
 {
-	return container_of(chan, struct sdma_channel, chan);
+	return container_of(chan, struct sdma_channel, vc.chan);
 }
 
 static int sdma_disable_channel(struct dma_chan *chan)
@@ -947,21 +1021,25 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
-	unsigned long flags;
 
 	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 
-	spin_lock_irqsave(&sdmac->lock, flags);
-	sdmac->enabled = false;
-	spin_unlock_irqrestore(&sdmac->lock, flags);
-
 	return 0;
 }
 
 static int sdma_disable_channel_with_delay(struct dma_chan *chan)
 {
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
 	sdma_disable_channel(chan);
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	vchan_get_all_descriptors(&sdmac->vc, &head);
+	sdmac->desc = NULL;
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+	vchan_dma_desc_free_list(&sdmac->vc, &head);
 
 	/*
 	 * According to NXP R&D team a delay of one BD SDMA cost time
@@ -1090,52 +1168,81 @@ static int sdma_set_channel_priority(struct sdma_channel *sdmac,
 	return 0;
 }
 
-static int sdma_request_channel(struct sdma_channel *sdmac)
+static int sdma_request_channel0(struct sdma_engine *sdma)
 {
-	struct sdma_engine *sdma = sdmac->sdma;
-	int channel = sdmac->channel;
 	int ret = -EBUSY;
 
-	sdmac->bd = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys,
-					GFP_KERNEL);
-	if (!sdmac->bd) {
+	sdma->bd0 = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdma->bd0_phys,
+					GFP_NOWAIT);
+	if (!sdma->bd0) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
-	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
+	sdma->channel_control[0].base_bd_ptr = sdma->bd0_phys;
+	sdma->channel_control[0].current_bd_ptr = sdma->bd0_phys;
 
-	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
+	sdma_set_channel_priority(&sdma->channel[0], MXC_SDMA_DEFAULT_PRIORITY);
 	return 0;
 out:
 
 	return ret;
 }
 
-static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
+
+static int sdma_alloc_bd(struct sdma_desc *desc)
 {
-	unsigned long flags;
-	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
-	dma_cookie_t cookie;
+	int ret = 0;
 
-	spin_lock_irqsave(&sdmac->lock, flags);
+	desc->bd = dma_pool_alloc(desc->sdmac->bd_pool, GFP_NOWAIT,
+				  &desc->bd_phys);
+	if (!desc->bd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+out:
+	return ret;
+}
 
-	cookie = dma_cookie_assign(tx);
+static void sdma_free_bd(struct sdma_desc *desc)
+{
+	dma_pool_free(desc->sdmac->bd_pool, desc->bd, desc->bd_phys);
+}
 
-	spin_unlock_irqrestore(&sdmac->lock, flags);
+static void sdma_desc_free(struct virt_dma_desc *vd)
+{
+	struct sdma_desc *desc = container_of(vd, struct sdma_desc, vd);
 
-	return cookie;
+	sdma_free_bd(desc);
+	kfree(desc);
 }
 
 static int sdma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct imx_dma_data *data = chan->private;
+	struct imx_dma_data mem_data;
 	int prio, ret;
 
-	if (!data)
-		return -EINVAL;
+	/*
+	 * MEMCPY may never setup chan->private by filter function such as
+	 * dmatest, thus create 'struct imx_dma_data mem_data' for this case.
+	 * Please note in any other slave case, you have to setup chan->private
+	 * with 'struct imx_dma_data' in your own filter function if you want to
+	 * request dma channel by dma_request_channel() rather than
+	 * dma_request_slave_channel(). Othwise, 'MEMCPY in case?' will appear
+	 * to warn you to correct your filter function.
+	 */
+	if (!data) {
+		dev_dbg(sdmac->sdma->dev, "MEMCPY in case?\n");
+		mem_data.priority = 2;
+		mem_data.peripheral_type = IMX_DMATYPE_MEMORY;
+		mem_data.dma_request = 0;
+		mem_data.dma_request2 = 0;
+		data = &mem_data;
+
+		sdma_get_pc(sdmac, IMX_DMATYPE_MEMORY);
+	}
 
 	switch (data->priority) {
 	case DMA_PRIO_HIGH:
@@ -1161,18 +1268,13 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 	if (ret)
 		goto disable_clk_ipg;
 
-	ret = sdma_request_channel(sdmac);
-	if (ret)
-		goto disable_clk_ahb;
-
 	ret = sdma_set_channel_priority(sdmac, prio);
 	if (ret)
 		goto disable_clk_ahb;
 
-	dma_async_tx_descriptor_init(&sdmac->desc, chan);
-	sdmac->desc.tx_submit = sdma_tx_submit;
-	/* txd.flags will be overwritten in prep funcs */
-	sdmac->desc.flags = DMA_CTRL_ACK;
+	sdmac->bd_pool = dma_pool_create("bd_pool", chan->device->dev,
+				sizeof(struct sdma_buffer_descriptor),
+				32, 0);
 
 	return 0;
 
@@ -1188,7 +1290,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 
-	sdma_disable_channel(chan);
+	sdma_disable_channel_with_delay(chan);
 
 	if (sdmac->event_id0)
 		sdma_event_disable(sdmac, sdmac->event_id0);
@@ -1200,10 +1302,105 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
 	sdma_set_channel_priority(sdmac, 0);
 
-	dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys);
-
 	clk_disable(sdma->clk_ipg);
 	clk_disable(sdma->clk_ahb);
+
+	dma_pool_destroy(sdmac->bd_pool);
+	sdmac->bd_pool = NULL;
+}
+
+static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
+				enum dma_transfer_direction direction, u32 bds)
+{
+	struct sdma_desc *desc;
+
+	desc = kzalloc((sizeof(*desc)), GFP_NOWAIT);
+	if (!desc)
+		goto err_out;
+
+	sdmac->status = DMA_IN_PROGRESS;
+	sdmac->direction = direction;
+	sdmac->flags = 0;
+
+	desc->chn_count = 0;
+	desc->chn_real_count = 0;
+	desc->buf_tail = 0;
+	desc->buf_ptail = 0;
+	desc->sdmac = sdmac;
+	desc->num_bd = bds;
+
+	if (sdma_alloc_bd(desc))
+		goto err_desc_out;
+
+	/* No slave_config called in MEMCPY case, so do here */
+	if (direction == DMA_MEM_TO_MEM)
+		sdma_config_ownership(sdmac, false, true, false);
+
+	if (sdma_load_context(sdmac))
+		goto err_desc_out;
+
+	return desc;
+
+err_desc_out:
+	kfree(desc);
+err_out:
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dma_dst,
+		dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	size_t count;
+	int i = 0, param;
+	struct sdma_buffer_descriptor *bd;
+	struct sdma_desc *desc;
+
+	if (!chan || !len)
+		return NULL;
+
+	dev_dbg(sdma->dev, "memcpy: %pad->%pad, len=%zu, channel=%d.\n",
+		&dma_src, &dma_dst, len, channel);
+
+	desc = sdma_transfer_init(sdmac, DMA_MEM_TO_MEM,
+					len / SDMA_BD_MAX_CNT + 1);
+	if (!desc)
+		return NULL;
+
+	do {
+		count = min_t(size_t, len, SDMA_BD_MAX_CNT);
+		bd = &desc->bd[i];
+		bd->buffer_addr = dma_src;
+		bd->ext_buffer_addr = dma_dst;
+		bd->mode.count = count;
+		desc->chn_count += count;
+		bd->mode.command = 0;
+
+		dma_src += count;
+		dma_dst += count;
+		len -= count;
+		i++;
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+		/* last bd */
+		if (!len) {
+			param |= BD_INTR;
+			param |= BD_LAST;
+			param &= ~BD_CONT;
+		}
+
+		dev_dbg(sdma->dev, "entry %d: count: %zd dma: 0x%x %s%s\n",
+				i, count, bd->buffer_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	} while (len);
+
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
 }
 
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
@@ -1213,75 +1410,54 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
-	int ret, i, count;
+	int i, count;
 	int channel = sdmac->channel;
 	struct scatterlist *sg;
+	struct sdma_desc *desc;
 
-	if (sdmac->status == DMA_IN_PROGRESS)
-		return NULL;
-	sdmac->status = DMA_IN_PROGRESS;
-
-	sdmac->flags = 0;
-
-	sdmac->buf_tail = 0;
-	sdmac->buf_ptail = 0;
-	sdmac->chn_real_count = 0;
+	desc = sdma_transfer_init(sdmac, direction, sg_len);
+	if (!desc)
+		goto err_out;
 
 	dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
 			sg_len, channel);
 
-	sdmac->direction = direction;
-	ret = sdma_load_context(sdmac);
-	if (ret)
-		goto err_out;
-
-	if (sg_len > NUM_BD) {
-		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
-				channel, sg_len, NUM_BD);
-		ret = -EINVAL;
-		goto err_out;
-	}
-
-	sdmac->chn_count = 0;
 	for_each_sg(sgl, sg, sg_len, i) {
-		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		struct sdma_buffer_descriptor *bd = &desc->bd[i];
 		int param;
 
 		bd->buffer_addr = sg->dma_address;
 
 		count = sg_dma_len(sg);
 
-		if (count > 0xffff) {
+		if (count > SDMA_BD_MAX_CNT) {
 			dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
-					channel, count, 0xffff);
-			ret = -EINVAL;
-			goto err_out;
+					channel, count, SDMA_BD_MAX_CNT);
+			goto err_bd_out;
 		}
 
 		bd->mode.count = count;
-		sdmac->chn_count += count;
+		desc->chn_count += count;
 
-		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES) {
-			ret =  -EINVAL;
-			goto err_out;
-		}
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_bd_out;
 
 		switch (sdmac->word_size) {
 		case DMA_SLAVE_BUSWIDTH_4_BYTES:
 			bd->mode.command = 0;
 			if (count & 3 || sg->dma_address & 3)
-				return NULL;
+				goto err_bd_out;
 			break;
 		case DMA_SLAVE_BUSWIDTH_2_BYTES:
 			bd->mode.command = 2;
 			if (count & 1 || sg->dma_address & 1)
-				return NULL;
+				goto err_bd_out;
 			break;
 		case DMA_SLAVE_BUSWIDTH_1_BYTE:
 			bd->mode.command = 1;
 			break;
 		default:
-			return NULL;
+			goto err_bd_out;
 		}
 
 		param = BD_DONE | BD_EXTD | BD_CONT;
@@ -1300,10 +1476,10 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		bd->mode.status = param;
 	}
 
-	sdmac->num_bd = sg_len;
-	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
-
-	return &sdmac->desc;
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+	sdma_free_bd(desc);
+	kfree(desc);
 err_out:
 	sdmac->status = DMA_ERROR;
 	return NULL;
@@ -1318,40 +1494,27 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 	struct sdma_engine *sdma = sdmac->sdma;
 	int num_periods = buf_len / period_len;
 	int channel = sdmac->channel;
-	int ret, i = 0, buf = 0;
+	int i = 0, buf = 0;
+	struct sdma_desc *desc;
 
 	dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
 
-	if (sdmac->status == DMA_IN_PROGRESS)
-		return NULL;
+	desc = sdma_transfer_init(sdmac, direction, num_periods);
+	if (!desc)
+		goto err_out;
 
-	sdmac->status = DMA_IN_PROGRESS;
-
-	sdmac->buf_tail = 0;
-	sdmac->buf_ptail = 0;
-	sdmac->chn_real_count = 0;
-	sdmac->period_len = period_len;
+	desc->period_len = period_len;
 
 	sdmac->flags |= IMX_DMA_SG_LOOP;
-	sdmac->direction = direction;
-	ret = sdma_load_context(sdmac);
-	if (ret)
-		goto err_out;
 
-	if (num_periods > NUM_BD) {
-		dev_err(sdma->dev, "SDMA channel %d: maximum number of sg exceeded: %d > %d\n",
-				channel, num_periods, NUM_BD);
-		goto err_out;
-	}
-
-	if (period_len > 0xffff) {
+	if (period_len > SDMA_BD_MAX_CNT) {
 		dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %zu > %d\n",
-				channel, period_len, 0xffff);
-		goto err_out;
+				channel, period_len, SDMA_BD_MAX_CNT);
+		goto err_bd_out;
 	}
 
 	while (buf < buf_len) {
-		struct sdma_buffer_descriptor *bd = &sdmac->bd[i];
+		struct sdma_buffer_descriptor *bd = &desc->bd[i];
 		int param;
 
 		bd->buffer_addr = dma_addr;
@@ -1359,7 +1522,7 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		bd->mode.count = period_len;
 
 		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
-			goto err_out;
+			goto err_bd_out;
 		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
 			bd->mode.command = 0;
 		else
@@ -1382,10 +1545,10 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		i++;
 	}
 
-	sdmac->num_bd = num_periods;
-	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
-
-	return &sdmac->desc;
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+	sdma_free_bd(desc);
+	kfree(desc);
 err_out:
 	sdmac->status = DMA_ERROR;
 	return NULL;
@@ -1424,13 +1587,31 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 				      struct dma_tx_state *txstate)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_desc *desc;
 	u32 residue;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
 
-	if (sdmac->flags & IMX_DMA_SG_LOOP)
-		residue = (sdmac->num_bd - sdmac->buf_ptail) *
-			   sdmac->period_len - sdmac->chn_real_count;
-	else
-		residue = sdmac->chn_count - sdmac->chn_real_count;
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	vd = vchan_find_desc(&sdmac->vc, cookie);
+	if (vd) {
+		desc = to_sdma_desc(&vd->tx);
+		if (sdmac->flags & IMX_DMA_SG_LOOP)
+			residue = (desc->num_bd - desc->buf_ptail) *
+				desc->period_len - desc->chn_real_count;
+		else
+			residue = desc->chn_count - desc->chn_real_count;
+	} else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie) {
+		residue = sdmac->desc->chn_count - sdmac->desc->chn_real_count;
+	} else {
+		residue = 0;
+	}
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 
 	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
 			 residue);
@@ -1441,10 +1622,12 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 static void sdma_issue_pending(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
-	struct sdma_engine *sdma = sdmac->sdma;
+	unsigned long flags;
 
-	if (sdmac->status == DMA_IN_PROGRESS)
-		sdma_enable_channel(sdma, sdmac->channel);
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	if (vchan_issue_pending(&sdmac->vc) && !sdmac->desc)
+		sdma_start_desc(sdmac);
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 }
 
 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1	34
@@ -1650,7 +1833,7 @@ static int sdma_init(struct sdma_engine *sdma)
 	for (i = 0; i < MAX_DMA_CHANNELS; i++)
 		writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
 
-	ret = sdma_request_channel(&sdma->channel[0]);
+	ret = sdma_request_channel0(sdma);
 	if (ret)
 		goto err_dma_alloc;
 
@@ -1805,6 +1988,7 @@ static int sdma_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, sdma->dma_device.cap_mask);
 
 	INIT_LIST_HEAD(&sdma->dma_device.channels);
 	/* Initialize channel parameters */
@@ -1812,22 +1996,16 @@ static int sdma_probe(struct platform_device *pdev)
 		struct sdma_channel *sdmac = &sdma->channel[i];
 
 		sdmac->sdma = sdma;
-		spin_lock_init(&sdmac->lock);
 
-		sdmac->chan.device = &sdma->dma_device;
-		dma_cookie_init(&sdmac->chan);
 		sdmac->channel = i;
-
-		tasklet_init(&sdmac->tasklet, mxc_sdma_handle_channel_normal,
-			     (unsigned long) sdmac);
+		sdmac->vc.desc_free = sdma_desc_free;
 		/*
 		 * Add the channel to the DMAC list. Do not add channel 0 though
 		 * because we need it internally in the SDMA driver. This also means
 		 * that channel 0 in dmaengine counting matches sdma channel 1.
 		 */
 		if (i)
-			list_add_tail(&sdmac->chan.device_node,
-					&sdma->dma_device.channels);
+			vchan_init(&sdmac->vc, &sdma->dma_device);
 	}
 
 	ret = sdma_init(sdma);
@@ -1877,9 +2055,10 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
 	sdma->dma_device.directions = SDMA_DMA_DIRECTIONS;
 	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy;
 	sdma->dma_device.device_issue_pending = sdma_issue_pending;
 	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
-	dma_set_max_seg_size(sdma->dma_device.dev, 65535);
+	dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT);
 
 	platform_set_drvdata(pdev, sdma);
 
@@ -1932,7 +2111,8 @@ static int sdma_remove(struct platform_device *pdev)
 	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
 		struct sdma_channel *sdmac = &sdma->channel[i];
 
-		tasklet_kill(&sdmac->tasklet);
+		tasklet_kill(&sdmac->vc.task);
+		sdma_free_chan_resources(&sdmac->vc.chan);
 	}
 
 	platform_set_drvdata(pdev, NULL);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 8b5b23a..23fb2fa 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -688,6 +688,12 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
 {
 	u64 phys_complete;
 
+	/* set the completion address register again */
+	writel(lower_32_bits(ioat_chan->completion_dma),
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(upper_32_bits(ioat_chan->completion_dma),
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
 	ioat_quiesce(ioat_chan, 0);
 	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
 		__cleanup(ioat_chan, phys_complete);
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
index 68dd797..b76cb17 100644
--- a/drivers/dma/mic_x100_dma.c
+++ b/drivers/dma/mic_x100_dma.c
@@ -470,11 +470,6 @@ static void mic_dma_chan_destroy(struct mic_dma_chan *ch)
 	mic_dma_chan_mask_intr(ch);
 }
 
-static void mic_dma_unregister_dma_device(struct mic_dma_device *mic_dma_dev)
-{
-	dma_async_device_unregister(&mic_dma_dev->dma_dev);
-}
-
 static int mic_dma_setup_irq(struct mic_dma_chan *ch)
 {
 	ch->cookie =
@@ -630,7 +625,7 @@ static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev,
 		list_add_tail(&mic_dma_dev->mic_ch[i].api_ch.device_node,
 			      &mic_dma_dev->dma_dev.channels);
 	}
-	return dma_async_device_register(&mic_dma_dev->dma_dev);
+	return dmaenginem_async_device_register(&mic_dma_dev->dma_dev);
 }
 
 /*
@@ -678,7 +673,6 @@ static struct mic_dma_device *mic_dma_dev_reg(struct mbus_device *mbdev,
 
 static void mic_dma_dev_unreg(struct mic_dma_device *mic_dma_dev)
 {
-	mic_dma_unregister_dma_device(mic_dma_dev);
 	mic_dma_uninit(mic_dma_dev);
 	kfree(mic_dma_dev);
 }
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index c6589cc..8dc0aa4 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -174,6 +174,7 @@ struct mv_xor_v2_device {
 	int desc_size;
 	unsigned int npendings;
 	unsigned int hw_queue_idx;
+	struct msi_desc *msi_desc;
 };
 
 /**
@@ -588,11 +589,9 @@ static void mv_xor_v2_tasklet(unsigned long data)
 			 */
 			dma_cookie_complete(&next_pending_sw_desc->async_tx);
 
-			if (next_pending_sw_desc->async_tx.callback)
-				next_pending_sw_desc->async_tx.callback(
-				next_pending_sw_desc->async_tx.callback_param);
-
 			dma_descriptor_unmap(&next_pending_sw_desc->async_tx);
+			dmaengine_desc_get_callback_invoke(
+					&next_pending_sw_desc->async_tx, NULL);
 		}
 
 		dma_run_dependencies(&next_pending_sw_desc->async_tx);
@@ -643,9 +642,9 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
 	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_SIZE_OFF);
 
 	/* write the DESQ address to the DMA enngine*/
-	writel(xor_dev->hw_desq & 0xFFFFFFFF,
+	writel(lower_32_bits(xor_dev->hw_desq),
 	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BALR_OFF);
-	writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32,
+	writel(upper_32_bits(xor_dev->hw_desq),
 	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
 
 	/*
@@ -780,6 +779,7 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 	msi_desc = first_msi_entry(&pdev->dev);
 	if (!msi_desc)
 		goto free_msi_irqs;
+	xor_dev->msi_desc = msi_desc;
 
 	ret = devm_request_irq(&pdev->dev, msi_desc->irq,
 			       mv_xor_v2_interrupt_handler, 0,
@@ -897,8 +897,12 @@ static int mv_xor_v2_remove(struct platform_device *pdev)
 			  xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
 			  xor_dev->hw_desq_virt, xor_dev->hw_desq);
 
+	devm_free_irq(&pdev->dev, xor_dev->msi_desc->irq, xor_dev);
+
 	platform_msi_domain_free_irqs(&pdev->dev);
 
+	tasklet_kill(&xor_dev->irq_tasklet);
+
 	clk_disable_unprepare(xor_dev->clk);
 
 	return 0;
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 2f9974d..8c7b2e8 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -479,6 +479,7 @@ static size_t nbpf_xfer_size(struct nbpf_device *nbpf,
 
 	default:
 		pr_warn("%s(): invalid bus width %u\n", __func__, width);
+		/* fall through */
 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
 		size = burst;
 	}
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
new file mode 100644
index 0000000..7812a63
--- /dev/null
+++ b/drivers/dma/owl-dma.c
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Actions Semi Owl SoCs DMA driver
+//
+// Copyright (c) 2014 Actions Semi Inc.
+// Author: David Liu <liuwei@actions-semi.com>
+//
+// Copyright (c) 2018 Linaro Ltd.
+// Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include "virt-dma.h"
+
+#define OWL_DMA_FRAME_MAX_LENGTH		0xfffff
+
+/* Global DMA Controller Registers */
+#define OWL_DMA_IRQ_PD0				0x00
+#define OWL_DMA_IRQ_PD1				0x04
+#define OWL_DMA_IRQ_PD2				0x08
+#define OWL_DMA_IRQ_PD3				0x0C
+#define OWL_DMA_IRQ_EN0				0x10
+#define OWL_DMA_IRQ_EN1				0x14
+#define OWL_DMA_IRQ_EN2				0x18
+#define OWL_DMA_IRQ_EN3				0x1C
+#define OWL_DMA_SECURE_ACCESS_CTL		0x20
+#define OWL_DMA_NIC_QOS				0x24
+#define OWL_DMA_DBGSEL				0x28
+#define OWL_DMA_IDLE_STAT			0x2C
+
+/* Channel Registers */
+#define OWL_DMA_CHAN_BASE(i)			(0x100 + (i) * 0x100)
+#define OWL_DMAX_MODE				0x00
+#define OWL_DMAX_SOURCE				0x04
+#define OWL_DMAX_DESTINATION			0x08
+#define OWL_DMAX_FRAME_LEN			0x0C
+#define OWL_DMAX_FRAME_CNT			0x10
+#define OWL_DMAX_REMAIN_FRAME_CNT		0x14
+#define OWL_DMAX_REMAIN_CNT			0x18
+#define OWL_DMAX_SOURCE_STRIDE			0x1C
+#define OWL_DMAX_DESTINATION_STRIDE		0x20
+#define OWL_DMAX_START				0x24
+#define OWL_DMAX_PAUSE				0x28
+#define OWL_DMAX_CHAINED_CTL			0x2C
+#define OWL_DMAX_CONSTANT			0x30
+#define OWL_DMAX_LINKLIST_CTL			0x34
+#define OWL_DMAX_NEXT_DESCRIPTOR		0x38
+#define OWL_DMAX_CURRENT_DESCRIPTOR_NUM		0x3C
+#define OWL_DMAX_INT_CTL			0x40
+#define OWL_DMAX_INT_STATUS			0x44
+#define OWL_DMAX_CURRENT_SOURCE_POINTER		0x48
+#define OWL_DMAX_CURRENT_DESTINATION_POINTER	0x4C
+
+/* OWL_DMAX_MODE Bits */
+#define OWL_DMA_MODE_TS(x)			(((x) & GENMASK(5, 0)) << 0)
+#define OWL_DMA_MODE_ST(x)			(((x) & GENMASK(1, 0)) << 8)
+#define	OWL_DMA_MODE_ST_DEV			OWL_DMA_MODE_ST(0)
+#define	OWL_DMA_MODE_ST_DCU			OWL_DMA_MODE_ST(2)
+#define	OWL_DMA_MODE_ST_SRAM			OWL_DMA_MODE_ST(3)
+#define OWL_DMA_MODE_DT(x)			(((x) & GENMASK(1, 0)) << 10)
+#define	OWL_DMA_MODE_DT_DEV			OWL_DMA_MODE_DT(0)
+#define	OWL_DMA_MODE_DT_DCU			OWL_DMA_MODE_DT(2)
+#define	OWL_DMA_MODE_DT_SRAM			OWL_DMA_MODE_DT(3)
+#define OWL_DMA_MODE_SAM(x)			(((x) & GENMASK(1, 0)) << 16)
+#define	OWL_DMA_MODE_SAM_CONST			OWL_DMA_MODE_SAM(0)
+#define	OWL_DMA_MODE_SAM_INC			OWL_DMA_MODE_SAM(1)
+#define	OWL_DMA_MODE_SAM_STRIDE			OWL_DMA_MODE_SAM(2)
+#define OWL_DMA_MODE_DAM(x)			(((x) & GENMASK(1, 0)) << 18)
+#define	OWL_DMA_MODE_DAM_CONST			OWL_DMA_MODE_DAM(0)
+#define	OWL_DMA_MODE_DAM_INC			OWL_DMA_MODE_DAM(1)
+#define	OWL_DMA_MODE_DAM_STRIDE			OWL_DMA_MODE_DAM(2)
+#define OWL_DMA_MODE_PW(x)			(((x) & GENMASK(2, 0)) << 20)
+#define OWL_DMA_MODE_CB				BIT(23)
+#define OWL_DMA_MODE_NDDBW(x)			(((x) & 0x1) << 28)
+#define	OWL_DMA_MODE_NDDBW_32BIT		OWL_DMA_MODE_NDDBW(0)
+#define	OWL_DMA_MODE_NDDBW_8BIT			OWL_DMA_MODE_NDDBW(1)
+#define OWL_DMA_MODE_CFE			BIT(29)
+#define OWL_DMA_MODE_LME			BIT(30)
+#define OWL_DMA_MODE_CME			BIT(31)
+
+/* OWL_DMAX_LINKLIST_CTL Bits */
+#define OWL_DMA_LLC_SAV(x)			(((x) & GENMASK(1, 0)) << 8)
+#define	OWL_DMA_LLC_SAV_INC			OWL_DMA_LLC_SAV(0)
+#define	OWL_DMA_LLC_SAV_LOAD_NEXT		OWL_DMA_LLC_SAV(1)
+#define	OWL_DMA_LLC_SAV_LOAD_PREV		OWL_DMA_LLC_SAV(2)
+#define OWL_DMA_LLC_DAV(x)			(((x) & GENMASK(1, 0)) << 10)
+#define	OWL_DMA_LLC_DAV_INC			OWL_DMA_LLC_DAV(0)
+#define	OWL_DMA_LLC_DAV_LOAD_NEXT		OWL_DMA_LLC_DAV(1)
+#define	OWL_DMA_LLC_DAV_LOAD_PREV		OWL_DMA_LLC_DAV(2)
+#define OWL_DMA_LLC_SUSPEND			BIT(16)
+
+/* OWL_DMAX_INT_CTL Bits */
+#define OWL_DMA_INTCTL_BLOCK			BIT(0)
+#define OWL_DMA_INTCTL_SUPER_BLOCK		BIT(1)
+#define OWL_DMA_INTCTL_FRAME			BIT(2)
+#define OWL_DMA_INTCTL_HALF_FRAME		BIT(3)
+#define OWL_DMA_INTCTL_LAST_FRAME		BIT(4)
+
+/* OWL_DMAX_INT_STATUS Bits */
+#define OWL_DMA_INTSTAT_BLOCK			BIT(0)
+#define OWL_DMA_INTSTAT_SUPER_BLOCK		BIT(1)
+#define OWL_DMA_INTSTAT_FRAME			BIT(2)
+#define OWL_DMA_INTSTAT_HALF_FRAME		BIT(3)
+#define OWL_DMA_INTSTAT_LAST_FRAME		BIT(4)
+
+/* Pack shift and newshift in a single word */
+#define BIT_FIELD(val, width, shift, newshift)	\
+		((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift))
+
+/**
+ * struct owl_dma_lli_hw - Hardware link list for dma transfer
+ * @next_lli: physical address of the next link list
+ * @saddr: source physical address
+ * @daddr: destination physical address
+ * @flen: frame length
+ * @fcnt: frame count
+ * @src_stride: source stride
+ * @dst_stride: destination stride
+ * @ctrla: dma_mode and linklist ctrl config
+ * @ctrlb: interrupt config
+ * @const_num: data for constant fill
+ */
+struct owl_dma_lli_hw {
+	u32	next_lli;
+	u32	saddr;
+	u32	daddr;
+	u32	flen:20;
+	u32	fcnt:12;
+	u32	src_stride;
+	u32	dst_stride;
+	u32	ctrla;
+	u32	ctrlb;
+	u32	const_num;
+};
+
+/**
+ * struct owl_dma_lli - Link list for dma transfer
+ * @hw: hardware link list
+ * @phys: physical address of hardware link list
+ * @node: node for txd's lli_list
+ */
+struct owl_dma_lli {
+	struct  owl_dma_lli_hw	hw;
+	dma_addr_t		phys;
+	struct list_head	node;
+};
+
+/**
+ * struct owl_dma_txd - Wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @lli_list: link list of lli nodes
+ */
+struct owl_dma_txd {
+	struct virt_dma_desc	vd;
+	struct list_head	lli_list;
+};
+
+/**
+ * struct owl_dma_pchan - Holder for the physical channels
+ * @id: physical index to this channel
+ * @base: virtual memory base for the dma channel
+ * @vchan: the virtual channel currently being served by this physical channel
+ * @lock: a lock to use when altering an instance of this struct
+ */
+struct owl_dma_pchan {
+	u32			id;
+	void __iomem		*base;
+	struct owl_dma_vchan	*vchan;
+	spinlock_t		lock;
+};
+
+/**
+ * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * @vc: wrappped virtual channel
+ * @pchan: the physical channel utilized by this channel
+ * @txd: active transaction on this channel
+ */
+struct owl_dma_vchan {
+	struct virt_dma_chan	vc;
+	struct owl_dma_pchan	*pchan;
+	struct owl_dma_txd	*txd;
+};
+
+/**
+ * struct owl_dma - Holder for the Owl DMA controller
+ * @dma: dma engine for this instance
+ * @base: virtual memory base for the DMA controller
+ * @clk: clock for the DMA controller
+ * @lock: a lock to use when change DMA controller global register
+ * @lli_pool: a pool for the LLI descriptors
+ * @nr_pchans: the number of physical channels
+ * @pchans: array of data for the physical channels
+ * @nr_vchans: the number of physical channels
+ * @vchans: array of data for the physical channels
+ */
+struct owl_dma {
+	struct dma_device	dma;
+	void __iomem		*base;
+	struct clk		*clk;
+	spinlock_t		lock;
+	struct dma_pool		*lli_pool;
+	int			irq;
+
+	unsigned int		nr_pchans;
+	struct owl_dma_pchan	*pchans;
+
+	unsigned int		nr_vchans;
+	struct owl_dma_vchan	*vchans;
+};
+
+static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
+			 u32 val, bool state)
+{
+	u32 regval;
+
+	regval = readl(pchan->base + reg);
+
+	if (state)
+		regval |= val;
+	else
+		regval &= ~val;
+
+	writel(val, pchan->base + reg);
+}
+
+static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data)
+{
+	writel(data, pchan->base + reg);
+}
+
+static u32 pchan_readl(struct owl_dma_pchan *pchan, u32 reg)
+{
+	return readl(pchan->base + reg);
+}
+
+static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state)
+{
+	u32 regval;
+
+	regval = readl(od->base + reg);
+
+	if (state)
+		regval |= val;
+	else
+		regval &= ~val;
+
+	writel(val, od->base + reg);
+}
+
+static void dma_writel(struct owl_dma *od, u32 reg, u32 data)
+{
+	writel(data, od->base + reg);
+}
+
+static u32 dma_readl(struct owl_dma *od, u32 reg)
+{
+	return readl(od->base + reg);
+}
+
+static inline struct owl_dma *to_owl_dma(struct dma_device *dd)
+{
+	return container_of(dd, struct owl_dma, dma);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct owl_dma_vchan *to_owl_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct owl_dma_vchan, vc.chan);
+}
+
+static inline struct owl_dma_txd *to_owl_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct owl_dma_txd, vd.tx);
+}
+
+static inline u32 llc_hw_ctrla(u32 mode, u32 llc_ctl)
+{
+	u32 ctl;
+
+	ctl = BIT_FIELD(mode, 4, 28, 28) |
+	      BIT_FIELD(mode, 8, 16, 20) |
+	      BIT_FIELD(mode, 4, 8, 16) |
+	      BIT_FIELD(mode, 6, 0, 10) |
+	      BIT_FIELD(llc_ctl, 2, 10, 8) |
+	      BIT_FIELD(llc_ctl, 2, 8, 6);
+
+	return ctl;
+}
+
+static inline u32 llc_hw_ctrlb(u32 int_ctl)
+{
+	u32 ctl;
+
+	ctl = BIT_FIELD(int_ctl, 7, 0, 18);
+
+	return ctl;
+}
+
+static void owl_dma_free_lli(struct owl_dma *od,
+			     struct owl_dma_lli *lli)
+{
+	list_del(&lli->node);
+	dma_pool_free(od->lli_pool, lli, lli->phys);
+}
+
+static struct owl_dma_lli *owl_dma_alloc_lli(struct owl_dma *od)
+{
+	struct owl_dma_lli *lli;
+	dma_addr_t phys;
+
+	lli = dma_pool_alloc(od->lli_pool, GFP_NOWAIT, &phys);
+	if (!lli)
+		return NULL;
+
+	INIT_LIST_HEAD(&lli->node);
+	lli->phys = phys;
+
+	return lli;
+}
+
+static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd,
+					   struct owl_dma_lli *prev,
+					   struct owl_dma_lli *next)
+{
+	list_add_tail(&next->node, &txd->lli_list);
+
+	if (prev) {
+		prev->hw.next_lli = next->phys;
+		prev->hw.ctrla |= llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
+	}
+
+	return next;
+}
+
+static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan,
+				  struct owl_dma_lli *lli,
+				  dma_addr_t src, dma_addr_t dst,
+				  u32 len, enum dma_transfer_direction dir)
+{
+	struct owl_dma_lli_hw *hw = &lli->hw;
+	u32 mode;
+
+	mode = OWL_DMA_MODE_PW(0);
+
+	switch (dir) {
+	case DMA_MEM_TO_MEM:
+		mode |= OWL_DMA_MODE_TS(0) | OWL_DMA_MODE_ST_DCU |
+			OWL_DMA_MODE_DT_DCU | OWL_DMA_MODE_SAM_INC |
+			OWL_DMA_MODE_DAM_INC;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hw->next_lli = 0; /* One link list by default */
+	hw->saddr = src;
+	hw->daddr = dst;
+
+	hw->fcnt = 1; /* Frame count fixed as 1 */
+	hw->flen = len; /* Max frame length is 1MB */
+	hw->src_stride = 0;
+	hw->dst_stride = 0;
+	hw->ctrla = llc_hw_ctrla(mode,
+				 OWL_DMA_LLC_SAV_LOAD_NEXT |
+				 OWL_DMA_LLC_DAV_LOAD_NEXT);
+
+	hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);
+
+	return 0;
+}
+
+static struct owl_dma_pchan *owl_dma_get_pchan(struct owl_dma *od,
+					       struct owl_dma_vchan *vchan)
+{
+	struct owl_dma_pchan *pchan = NULL;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < od->nr_pchans; i++) {
+		pchan = &od->pchans[i];
+
+		spin_lock_irqsave(&pchan->lock, flags);
+		if (!pchan->vchan) {
+			pchan->vchan = vchan;
+			spin_unlock_irqrestore(&pchan->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&pchan->lock, flags);
+	}
+
+	return pchan;
+}
+
+static int owl_dma_pchan_busy(struct owl_dma *od, struct owl_dma_pchan *pchan)
+{
+	unsigned int val;
+
+	val = dma_readl(od, OWL_DMA_IDLE_STAT);
+
+	return !(val & (1 << pchan->id));
+}
+
+static void owl_dma_terminate_pchan(struct owl_dma *od,
+				    struct owl_dma_pchan *pchan)
+{
+	unsigned long flags;
+	u32 irq_pd;
+
+	pchan_writel(pchan, OWL_DMAX_START, 0);
+	pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+	spin_lock_irqsave(&od->lock, flags);
+	dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), false);
+
+	irq_pd = dma_readl(od, OWL_DMA_IRQ_PD0);
+	if (irq_pd & (1 << pchan->id)) {
+		dev_warn(od->dma.dev,
+			 "terminating pchan %d that still has pending irq\n",
+			 pchan->id);
+		dma_writel(od, OWL_DMA_IRQ_PD0, (1 << pchan->id));
+	}
+
+	pchan->vchan = NULL;
+
+	spin_unlock_irqrestore(&od->lock, flags);
+}
+
+static int owl_dma_start_next_txd(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&vchan->vc);
+	struct owl_dma_pchan *pchan = vchan->pchan;
+	struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+	struct owl_dma_lli *lli;
+	unsigned long flags;
+	u32 int_ctl;
+
+	list_del(&vd->node);
+
+	vchan->txd = txd;
+
+	/* Wait for channel inactive */
+	while (owl_dma_pchan_busy(od, pchan))
+		cpu_relax();
+
+	lli = list_first_entry(&txd->lli_list,
+			       struct owl_dma_lli, node);
+
+	int_ctl = OWL_DMA_INTCTL_SUPER_BLOCK;
+
+	pchan_writel(pchan, OWL_DMAX_MODE, OWL_DMA_MODE_LME);
+	pchan_writel(pchan, OWL_DMAX_LINKLIST_CTL,
+		     OWL_DMA_LLC_SAV_LOAD_NEXT | OWL_DMA_LLC_DAV_LOAD_NEXT);
+	pchan_writel(pchan, OWL_DMAX_NEXT_DESCRIPTOR, lli->phys);
+	pchan_writel(pchan, OWL_DMAX_INT_CTL, int_ctl);
+
+	/* Clear IRQ status for this pchan */
+	pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+	spin_lock_irqsave(&od->lock, flags);
+
+	dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), true);
+
+	spin_unlock_irqrestore(&od->lock, flags);
+
+	dev_dbg(chan2dev(&vchan->vc.chan), "starting pchan %d\n", pchan->id);
+
+	/* Start DMA transfer for this pchan */
+	pchan_writel(pchan, OWL_DMAX_START, 0x1);
+
+	return 0;
+}
+
+static void owl_dma_phy_free(struct owl_dma *od, struct owl_dma_vchan *vchan)
+{
+	/* Ensure that the physical channel is stopped */
+	owl_dma_terminate_pchan(od, vchan->pchan);
+
+	vchan->pchan = NULL;
+}
+
+static irqreturn_t owl_dma_interrupt(int irq, void *dev_id)
+{
+	struct owl_dma *od = dev_id;
+	struct owl_dma_vchan *vchan;
+	struct owl_dma_pchan *pchan;
+	unsigned long pending;
+	int i;
+	unsigned int global_irq_pending, chan_irq_pending;
+
+	spin_lock(&od->lock);
+
+	pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+	/* Clear IRQ status for each pchan */
+	for_each_set_bit(i, &pending, od->nr_pchans) {
+		pchan = &od->pchans[i];
+		pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+	}
+
+	/* Clear pending IRQ */
+	dma_writel(od, OWL_DMA_IRQ_PD0, pending);
+
+	/* Check missed pending IRQ */
+	for (i = 0; i < od->nr_pchans; i++) {
+		pchan = &od->pchans[i];
+		chan_irq_pending = pchan_readl(pchan, OWL_DMAX_INT_CTL) &
+				   pchan_readl(pchan, OWL_DMAX_INT_STATUS);
+
+		/* Dummy read to ensure OWL_DMA_IRQ_PD0 value is updated */
+		dma_readl(od, OWL_DMA_IRQ_PD0);
+
+		global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+		if (chan_irq_pending && !(global_irq_pending & BIT(i)))	{
+			dev_dbg(od->dma.dev,
+				"global and channel IRQ pending match err\n");
+
+			/* Clear IRQ status for this pchan */
+			pchan_update(pchan, OWL_DMAX_INT_STATUS,
+				     0xff, false);
+
+			/* Update global IRQ pending */
+			pending |= BIT(i);
+		}
+	}
+
+	spin_unlock(&od->lock);
+
+	for_each_set_bit(i, &pending, od->nr_pchans) {
+		struct owl_dma_txd *txd;
+
+		pchan = &od->pchans[i];
+
+		vchan = pchan->vchan;
+		if (!vchan) {
+			dev_warn(od->dma.dev, "no vchan attached on pchan %d\n",
+				 pchan->id);
+			continue;
+		}
+
+		spin_lock(&vchan->vc.lock);
+
+		txd = vchan->txd;
+		if (txd) {
+			vchan->txd = NULL;
+
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * Start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&vchan->vc))
+				owl_dma_start_next_txd(vchan);
+			else
+				owl_dma_phy_free(od, vchan);
+		}
+
+		spin_unlock(&vchan->vc.lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void owl_dma_free_txd(struct owl_dma *od, struct owl_dma_txd *txd)
+{
+	struct owl_dma_lli *lli, *_lli;
+
+	if (unlikely(!txd))
+		return;
+
+	list_for_each_entry_safe(lli, _lli, &txd->lli_list, node)
+		owl_dma_free_lli(od, lli);
+
+	kfree(txd);
+}
+
+static void owl_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct owl_dma *od = to_owl_dma(vd->tx.chan->device);
+	struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+
+	owl_dma_free_txd(od, txd);
+}
+
+static int owl_dma_terminate_all(struct dma_chan *chan)
+{
+	struct owl_dma *od = to_owl_dma(chan->device);
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (vchan->pchan)
+		owl_dma_phy_free(od, vchan);
+
+	if (vchan->txd) {
+		owl_dma_desc_free(&vchan->txd->vd);
+		vchan->txd = NULL;
+	}
+
+	vchan_get_all_descriptors(&vchan->vc, &head);
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return 0;
+}
+
+static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma_pchan *pchan;
+	struct owl_dma_txd *txd;
+	struct owl_dma_lli *lli;
+	unsigned int next_lli_phy;
+	size_t bytes;
+
+	pchan = vchan->pchan;
+	txd = vchan->txd;
+
+	if (!pchan || !txd)
+		return 0;
+
+	/* Get remain count of current node in link list */
+	bytes = pchan_readl(pchan, OWL_DMAX_REMAIN_CNT);
+
+	/* Loop through the preceding nodes to get total remaining bytes */
+	if (pchan_readl(pchan, OWL_DMAX_MODE) & OWL_DMA_MODE_LME) {
+		next_lli_phy = pchan_readl(pchan, OWL_DMAX_NEXT_DESCRIPTOR);
+		list_for_each_entry(lli, &txd->lli_list, node) {
+			/* Start from the next active node */
+			if (lli->phys == next_lli_phy) {
+				list_for_each_entry(lli, &txd->lli_list, node)
+					bytes += lli->hw.flen;
+				break;
+			}
+		}
+	}
+
+	return bytes;
+}
+
+static enum dma_status owl_dma_tx_status(struct dma_chan *chan,
+					 dma_cookie_t cookie,
+					 struct dma_tx_state *state)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	struct owl_dma_lli *lli;
+	struct virt_dma_desc *vd;
+	struct owl_dma_txd *txd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (ret == DMA_COMPLETE || !state)
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	if (vd) {
+		txd = to_owl_txd(&vd->tx);
+		list_for_each_entry(lli, &txd->lli_list, node)
+			bytes += lli->hw.flen;
+	} else {
+		bytes = owl_dma_getbytes_chan(vchan);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	dma_set_residue(state, bytes);
+
+	return ret;
+}
+
+static void owl_dma_phy_alloc_and_start(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+	struct owl_dma_pchan *pchan;
+
+	pchan = owl_dma_get_pchan(od, vchan);
+	if (!pchan)
+		return;
+
+	dev_dbg(od->dma.dev, "allocated pchan %d\n", pchan->id);
+
+	vchan->pchan = pchan;
+	owl_dma_start_next_txd(vchan);
+}
+
+static void owl_dma_issue_pending(struct dma_chan *chan)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	if (vchan_issue_pending(&vchan->vc)) {
+		if (!vchan->pchan)
+			owl_dma_phy_alloc_and_start(vchan);
+	}
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor
+		*owl_dma_prep_memcpy(struct dma_chan *chan,
+				     dma_addr_t dst, dma_addr_t src,
+				     size_t len, unsigned long flags)
+{
+	struct owl_dma *od = to_owl_dma(chan->device);
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	struct owl_dma_txd *txd;
+	struct owl_dma_lli *lli, *prev = NULL;
+	size_t offset, bytes;
+	int ret;
+
+	if (!len)
+		return NULL;
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	INIT_LIST_HEAD(&txd->lli_list);
+
+	/* Process the transfer as frame by frame */
+	for (offset = 0; offset < len; offset += bytes) {
+		lli = owl_dma_alloc_lli(od);
+		if (!lli) {
+			dev_warn(chan2dev(chan), "failed to allocate lli\n");
+			goto err_txd_free;
+		}
+
+		bytes = min_t(size_t, (len - offset), OWL_DMA_FRAME_MAX_LENGTH);
+
+		ret = owl_dma_cfg_lli(vchan, lli, src + offset, dst + offset,
+				      bytes, DMA_MEM_TO_MEM);
+		if (ret) {
+			dev_warn(chan2dev(chan), "failed to config lli\n");
+			goto err_txd_free;
+		}
+
+		prev = owl_dma_add_lli(txd, prev, lli);
+	}
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+	owl_dma_free_txd(od, txd);
+	return NULL;
+}
+
+static void owl_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static inline void owl_dma_free(struct owl_dma *od)
+{
+	struct owl_dma_vchan *vchan = NULL;
+	struct owl_dma_vchan *next;
+
+	list_for_each_entry_safe(vchan,
+				 next, &od->dma.channels, vc.chan.device_node) {
+		list_del(&vchan->vc.chan.device_node);
+		tasklet_kill(&vchan->vc.task);
+	}
+}
+
+static int owl_dma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct owl_dma *od;
+	struct resource *res;
+	int ret, i, nr_channels, nr_requests;
+
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	od->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(od->base))
+		return PTR_ERR(od->base);
+
+	ret = of_property_read_u32(np, "dma-channels", &nr_channels);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get dma-channels\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "dma-requests", &nr_requests);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get dma-requests\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n",
+		 nr_channels, nr_requests);
+
+	od->nr_pchans = nr_channels;
+	od->nr_vchans = nr_requests;
+
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+
+	platform_set_drvdata(pdev, od);
+	spin_lock_init(&od->lock);
+
+	dma_cap_set(DMA_MEMCPY, od->dma.cap_mask);
+
+	od->dma.dev = &pdev->dev;
+	od->dma.device_free_chan_resources = owl_dma_free_chan_resources;
+	od->dma.device_tx_status = owl_dma_tx_status;
+	od->dma.device_issue_pending = owl_dma_issue_pending;
+	od->dma.device_prep_dma_memcpy = owl_dma_prep_memcpy;
+	od->dma.device_terminate_all = owl_dma_terminate_all;
+	od->dma.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->dma.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->dma.directions = BIT(DMA_MEM_TO_MEM);
+	od->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	INIT_LIST_HEAD(&od->dma.channels);
+
+	od->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(od->clk)) {
+		dev_err(&pdev->dev, "unable to get clock\n");
+		return PTR_ERR(od->clk);
+	}
+
+	/*
+	 * Eventhough the DMA controller is capable of generating 4
+	 * IRQ's for DMA priority feature, we only use 1 IRQ for
+	 * simplification.
+	 */
+	od->irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, od->irq, owl_dma_interrupt, 0,
+			       dev_name(&pdev->dev), od);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to request IRQ\n");
+		return ret;
+	}
+
+	/* Init physical channel */
+	od->pchans = devm_kcalloc(&pdev->dev, od->nr_pchans,
+				  sizeof(struct owl_dma_pchan), GFP_KERNEL);
+	if (!od->pchans)
+		return -ENOMEM;
+
+	for (i = 0; i < od->nr_pchans; i++) {
+		struct owl_dma_pchan *pchan = &od->pchans[i];
+
+		pchan->id = i;
+		pchan->base = od->base + OWL_DMA_CHAN_BASE(i);
+	}
+
+	/* Init virtual channel */
+	od->vchans = devm_kcalloc(&pdev->dev, od->nr_vchans,
+				  sizeof(struct owl_dma_vchan), GFP_KERNEL);
+	if (!od->vchans)
+		return -ENOMEM;
+
+	for (i = 0; i < od->nr_vchans; i++) {
+		struct owl_dma_vchan *vchan = &od->vchans[i];
+
+		vchan->vc.desc_free = owl_dma_desc_free;
+		vchan_init(&vchan->vc, &od->dma);
+	}
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	od->lli_pool = dma_pool_create(dev_name(od->dma.dev), od->dma.dev,
+				       sizeof(struct owl_dma_lli),
+				       __alignof__(struct owl_dma_lli),
+				       0);
+	if (!od->lli_pool) {
+		dev_err(&pdev->dev, "unable to allocate DMA descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	clk_prepare_enable(od->clk);
+
+	ret = dma_async_device_register(&od->dma);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register DMA engine device\n");
+		goto err_pool_free;
+	}
+
+	return 0;
+
+err_pool_free:
+	clk_disable_unprepare(od->clk);
+	dma_pool_destroy(od->lli_pool);
+
+	return ret;
+}
+
+static int owl_dma_remove(struct platform_device *pdev)
+{
+	struct owl_dma *od = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&od->dma);
+
+	/* Mask all interrupts for this execution environment */
+	dma_writel(od, OWL_DMA_IRQ_EN0, 0x0);
+
+	/* Make sure we won't have any further interrupts */
+	devm_free_irq(od->dma.dev, od->irq, od);
+
+	owl_dma_free(od);
+
+	clk_disable_unprepare(od->clk);
+
+	return 0;
+}
+
+static const struct of_device_id owl_dma_match[] = {
+	{ .compatible = "actions,s900-dma", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, owl_dma_match);
+
+static struct platform_driver owl_dma_driver = {
+	.probe	= owl_dma_probe,
+	.remove	= owl_dma_remove,
+	.driver = {
+		.name = "dma-owl",
+		.of_match_table = of_match_ptr(owl_dma_match),
+	},
+};
+
+static int owl_dma_init(void)
+{
+	return platform_driver_register(&owl_dma_driver);
+}
+subsys_initcall(owl_dma_init);
+
+static void __exit owl_dma_exit(void)
+{
+	platform_driver_unregister(&owl_dma_driver);
+}
+module_exit(owl_dma_exit);
+
+MODULE_AUTHOR("David Liu <liuwei@actions-semi.com>");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Actions Semi Owl SoCs DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 2a2ccd9..48ee35e 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Renesas R-Car Gen2 DMA Controller Driver
  *
  * Copyright (C) 2014 Renesas Electronics Inc.
  *
  * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
@@ -431,7 +428,8 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		chcr |= RCAR_DMACHCR_DPM_DISABLED | RCAR_DMACHCR_IE;
 	}
 
-	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr | RCAR_DMACHCR_DE);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR,
+			     chcr | RCAR_DMACHCR_DE | RCAR_DMACHCR_CAIE);
 }
 
 static int rcar_dmac_init(struct rcar_dmac *dmac)
@@ -761,21 +759,15 @@ static void rcar_dmac_chcr_de_barrier(struct rcar_dmac_chan *chan)
 	dev_err(chan->chan.device->dev, "CHCR DE check error\n");
 }
 
-static void rcar_dmac_sync_tcr(struct rcar_dmac_chan *chan)
+static void rcar_dmac_clear_chcr_de(struct rcar_dmac_chan *chan)
 {
 	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
 
-	if (!(chcr & RCAR_DMACHCR_DE))
-		return;
-
 	/* set DE=0 and flush remaining data */
 	rcar_dmac_chan_write(chan, RCAR_DMACHCR, (chcr & ~RCAR_DMACHCR_DE));
 
 	/* make sure all remaining data was flushed */
 	rcar_dmac_chcr_de_barrier(chan);
-
-	/* back DE */
-	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
 }
 
 static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
@@ -783,7 +775,8 @@ static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
 	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
 
 	chcr &= ~(RCAR_DMACHCR_DSE | RCAR_DMACHCR_DSIE | RCAR_DMACHCR_IE |
-		  RCAR_DMACHCR_TE | RCAR_DMACHCR_DE);
+		  RCAR_DMACHCR_TE | RCAR_DMACHCR_DE |
+		  RCAR_DMACHCR_CAE | RCAR_DMACHCR_CAIE);
 	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
 	rcar_dmac_chcr_de_barrier(chan);
 }
@@ -812,12 +805,7 @@ static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
 	}
 }
 
-static void rcar_dmac_stop(struct rcar_dmac *dmac)
-{
-	rcar_dmac_write(dmac, RCAR_DMAOR, 0);
-}
-
-static void rcar_dmac_abort(struct rcar_dmac *dmac)
+static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac)
 {
 	unsigned int i;
 
@@ -826,14 +814,24 @@ static void rcar_dmac_abort(struct rcar_dmac *dmac)
 		struct rcar_dmac_chan *chan = &dmac->channels[i];
 
 		/* Stop and reinitialize the channel. */
-		spin_lock(&chan->lock);
+		spin_lock_irq(&chan->lock);
 		rcar_dmac_chan_halt(chan);
-		spin_unlock(&chan->lock);
-
-		rcar_dmac_chan_reinit(chan);
+		spin_unlock_irq(&chan->lock);
 	}
 }
 
+static int rcar_dmac_chan_pause(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	rcar_dmac_clear_chcr_de(rchan);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	return 0;
+}
+
 /* -----------------------------------------------------------------------------
  * Descriptors preparation
  */
@@ -1355,9 +1353,6 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 		residue += chunk->size;
 	}
 
-	if (desc->direction == DMA_DEV_TO_MEM)
-		rcar_dmac_sync_tcr(chan);
-
 	/* Add the residue for the current chunk. */
 	residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift;
 
@@ -1522,11 +1517,26 @@ static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
 	u32 mask = RCAR_DMACHCR_DSE | RCAR_DMACHCR_TE;
 	struct rcar_dmac_chan *chan = dev;
 	irqreturn_t ret = IRQ_NONE;
+	bool reinit = false;
 	u32 chcr;
 
 	spin_lock(&chan->lock);
 
 	chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+	if (chcr & RCAR_DMACHCR_CAE) {
+		struct rcar_dmac *dmac = to_rcar_dmac(chan->chan.device);
+
+		/*
+		 * We don't need to call rcar_dmac_chan_halt()
+		 * because channel is already stopped in error case.
+		 * We need to clear register and check DE bit as recovery.
+		 */
+		rcar_dmac_write(dmac, RCAR_DMACHCLR, 1 << chan->index);
+		rcar_dmac_chcr_de_barrier(chan);
+		reinit = true;
+		goto spin_lock_end;
+	}
+
 	if (chcr & RCAR_DMACHCR_TE)
 		mask |= RCAR_DMACHCR_DE;
 	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr & ~mask);
@@ -1539,8 +1549,16 @@ static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
 	if (chcr & RCAR_DMACHCR_TE)
 		ret |= rcar_dmac_isr_transfer_end(chan);
 
+spin_lock_end:
 	spin_unlock(&chan->lock);
 
+	if (reinit) {
+		dev_err(chan->chan.device->dev, "Channel Address Error\n");
+
+		rcar_dmac_chan_reinit(chan);
+		ret = IRQ_HANDLED;
+	}
+
 	return ret;
 }
 
@@ -1597,24 +1615,6 @@ static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t rcar_dmac_isr_error(int irq, void *data)
-{
-	struct rcar_dmac *dmac = data;
-
-	if (!(rcar_dmac_read(dmac, RCAR_DMAOR) & RCAR_DMAOR_AE))
-		return IRQ_NONE;
-
-	/*
-	 * An unrecoverable error occurred on an unknown channel. Halt the DMAC,
-	 * abort transfers on all channels, and reinitialize the DMAC.
-	 */
-	rcar_dmac_stop(dmac);
-	rcar_dmac_abort(dmac);
-	rcar_dmac_init(dmac);
-
-	return IRQ_HANDLED;
-}
-
 /* -----------------------------------------------------------------------------
  * OF xlate and channel filter
  */
@@ -1784,8 +1784,6 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	struct rcar_dmac *dmac;
 	struct resource *mem;
 	unsigned int i;
-	char *irqname;
-	int irq;
 	int ret;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
@@ -1824,17 +1822,6 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	if (IS_ERR(dmac->iomem))
 		return PTR_ERR(dmac->iomem);
 
-	irq = platform_get_irq_byname(pdev, "error");
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no error IRQ specified\n");
-		return -ENODEV;
-	}
-
-	irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:error",
-				 dev_name(dmac->dev));
-	if (!irqname)
-		return -ENOMEM;
-
 	/* Enable runtime PM and initialize the device. */
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -1871,6 +1858,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	engine->device_prep_slave_sg		= rcar_dmac_prep_slave_sg;
 	engine->device_prep_dma_cyclic		= rcar_dmac_prep_dma_cyclic;
 	engine->device_config			= rcar_dmac_device_config;
+	engine->device_pause			= rcar_dmac_chan_pause;
 	engine->device_terminate_all		= rcar_dmac_chan_terminate_all;
 	engine->device_tx_status		= rcar_dmac_tx_status;
 	engine->device_issue_pending		= rcar_dmac_issue_pending;
@@ -1885,14 +1873,6 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 			goto error;
 	}
 
-	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
-			       irqname, dmac);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
-			irq, ret);
-		return ret;
-	}
-
 	/* Register the DMAC as a DMA provider for DT. */
 	ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
 					 NULL);
@@ -1932,7 +1912,7 @@ static void rcar_dmac_shutdown(struct platform_device *pdev)
 {
 	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
 
-	rcar_dmac_stop(dmac);
+	rcar_dmac_stop_all_chan(dmac);
 }
 
 static const struct of_device_id rcar_dmac_of_ids[] = {
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 1bc149a..f4edfc5 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -555,6 +555,7 @@ struct d40_gen_dmac {
  * @reg_val_backup_v4: Backup of registers that only exits on dma40 v3 and
  * later
  * @reg_val_backup_chan: Backup data for standard channel parameter registers.
+ * @regs_interrupt: Scratch space for registers during interrupt.
  * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
  * @gen_dmac: the struct for generic registers values to represent u8500/8540
  * DMA controller
@@ -592,6 +593,7 @@ struct d40_base {
 	u32				  reg_val_backup[BACKUP_REGS_SZ];
 	u32				  reg_val_backup_v4[BACKUP_REGS_SZ_MAX];
 	u32				 *reg_val_backup_chan;
+	u32				 *regs_interrupt;
 	u16				  gcc_pwr_off_mask;
 	struct d40_gen_dmac		  gen_dmac;
 };
@@ -1637,7 +1639,7 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data)
 	struct d40_chan *d40c;
 	unsigned long flags;
 	struct d40_base *base = data;
-	u32 regs[base->gen_dmac.il_size];
+	u32 *regs = base->regs_interrupt;
 	struct d40_interrupt_lookup *il = base->gen_dmac.il;
 	u32 il_size = base->gen_dmac.il_size;
 
@@ -3258,13 +3260,22 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	if (!base->lcla_pool.alloc_map)
 		goto free_backup_chan;
 
+	base->regs_interrupt = kmalloc_array(base->gen_dmac.il_size,
+					     sizeof(*base->regs_interrupt),
+					     GFP_KERNEL);
+	if (!base->regs_interrupt)
+		goto free_map;
+
 	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
 					    0, SLAB_HWCACHE_ALIGN,
 					    NULL);
 	if (base->desc_slab == NULL)
-		goto free_map;
+		goto free_regs;
+
 
 	return base;
+ free_regs:
+	kfree(base->regs_interrupt);
  free_map:
 	kfree(base->lcla_pool.alloc_map);
  free_backup_chan:
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 8c58073..379e8d5 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -594,7 +594,7 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 
 	chan->busy = true;
 
-	dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan);
+	dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
 }
 
 static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
@@ -693,7 +693,7 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
 	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
-		dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
+		dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
 		stm32_dma_start_transfer(chan);
 
 	}
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 9dc450b..06dd172 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -1170,7 +1170,7 @@ static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan)
 
 	chan->busy = true;
 
-	dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan);
+	dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
 }
 
 static void stm32_mdma_issue_pending(struct dma_chan *c)
@@ -1183,7 +1183,7 @@ static void stm32_mdma_issue_pending(struct dma_chan *c)
 	if (!vchan_issue_pending(&chan->vchan))
 		goto end;
 
-	dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
+	dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
 
 	if (!chan->desc && !chan->busy)
 		stm32_mdma_start_transfer(chan);
@@ -1203,7 +1203,7 @@ static int stm32_mdma_pause(struct dma_chan *c)
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 
 	if (!ret)
-		dev_dbg(chan2dev(chan), "vchan %p: pause\n", &chan->vchan);
+		dev_dbg(chan2dev(chan), "vchan %pK: pause\n", &chan->vchan);
 
 	return ret;
 }
@@ -1240,7 +1240,7 @@ static int stm32_mdma_resume(struct dma_chan *c)
 
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 
-	dev_dbg(chan2dev(chan), "vchan %p: resume\n", &chan->vchan);
+	dev_dbg(chan2dev(chan), "vchan %pK: resume\n", &chan->vchan);
 
 	return 0;
 }
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 27b5235..c124423 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -115,6 +115,9 @@
 #define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
 #define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
 
+#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP	0x00ec
+#define XILINX_VDMA_ENABLE_VERTICAL_FLIP	BIT(0)
+
 /* HW specific definitions */
 #define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
 
@@ -340,6 +343,7 @@ struct xilinx_dma_tx_descriptor {
  * @start_transfer: Differentiate b/w DMA IP's transfer
  * @stop_transfer: Differentiate b/w DMA IP's quiesce
  * @tdest: TDEST value for mcdma
+ * @has_vflip: S2MM vertical flip
  */
 struct xilinx_dma_chan {
 	struct xilinx_dma_device *xdev;
@@ -376,6 +380,7 @@ struct xilinx_dma_chan {
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
 	int (*stop_transfer)(struct xilinx_dma_chan *chan);
 	u16 tdest;
+	bool has_vflip;
 };
 
 /**
@@ -1092,6 +1097,14 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 				desc->async_tx.phys);
 
 	/* Configure the hardware using info in the config structure */
+	if (chan->has_vflip) {
+		reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
+		reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		reg |= config->vflip_en;
+		dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP,
+			  reg);
+	}
+
 	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
 	if (config->frm_cnt_en)
@@ -2105,6 +2118,8 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
 	}
 
 	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	chan->config.vflip_en = cfg->vflip_en;
+
 	if (cfg->park)
 		chan->config.park_frm = cfg->park_frm;
 	else
@@ -2428,6 +2443,13 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		chan->direction = DMA_DEV_TO_MEM;
 		chan->id = chan_id;
 		chan->tdest = chan_id - xdev->nr_channels;
+		chan->has_vflip = of_property_read_bool(node,
+					"xlnx,enable-vert-flip");
+		if (chan->has_vflip) {
+			chan->config.vflip_en = dma_read(chan,
+				XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) &
+				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		}
 
 		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h
index 34b98f2..5b6e61e 100644
--- a/include/linux/dma/xilinx_dma.h
+++ b/include/linux/dma/xilinx_dma.h
@@ -27,6 +27,7 @@
  * @delay: Delay counter
  * @reset: Reset Channel
  * @ext_fsync: External Frame Sync source
+ * @vflip_en:  Vertical Flip enable
  */
 struct xilinx_vdma_config {
 	int frm_dly;
@@ -39,6 +40,7 @@ struct xilinx_vdma_config {
 	int delay;
 	int reset;
 	int ext_fsync;
+	bool vflip_en;
 };
 
 int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 861be5c..d49ec5c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -415,7 +415,9 @@ enum dma_residue_granularity {
  *	each type, the dma controller should set BIT(<TYPE>) and same
  *	should be checked by controller as well
  * @max_burst: max burst capability per-transfer
- * @cmd_pause: true, if pause and thereby resume is supported
+ * @cmd_pause: true, if pause is supported (i.e. for reading residue or
+ *	       for resume later)
+ * @cmd_resume: true, if resume is supported
  * @cmd_terminate: true, if terminate cmd is supported
  * @residue_granularity: granularity of the reported transfer residue
  * @descriptor_reuse: if a descriptor can be reused by client and
@@ -427,6 +429,7 @@ struct dma_slave_caps {
 	u32 directions;
 	u32 max_burst;
 	bool cmd_pause;
+	bool cmd_resume;
 	bool cmd_terminate;
 	enum dma_residue_granularity residue_granularity;
 	bool descriptor_reuse;
@@ -1403,6 +1406,7 @@ static inline int dmaengine_desc_free(struct dma_async_tx_descriptor *desc)
 /* --- DMA device --- */
 
 int dma_async_device_register(struct dma_device *device);
+int dmaenginem_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 56a541b..76c46d7 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -156,7 +156,7 @@ static int dmaengine_pcm_set_runtime_hwparams(struct snd_pcm_substream *substrea
 
 	ret = dma_get_slave_caps(chan, &dma_caps);
 	if (ret == 0) {
-		if (dma_caps.cmd_pause)
+		if (dma_caps.cmd_pause && dma_caps.cmd_resume)
 			hw.info |= SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME;
 		if (dma_caps.residue_granularity <= DMA_RESIDUE_GRANULARITY_SEGMENT)
 			hw.info |= SNDRV_PCM_INFO_BATCH;