Merge branch 'topic/pxa' into for-linus
diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt
index 8210427..6312fb0 100644
--- a/Documentation/devicetree/bindings/dma/dma.txt
+++ b/Documentation/devicetree/bindings/dma/dma.txt
@@ -31,6 +31,34 @@
 		dma-requests = <127>;
 	};
 
+* DMA router
+
+DMA routers are transparent IP blocks used to route DMA request lines from
+devices to the DMA controller. Some SoCs (like TI DRA7x) have more peripherals
+integrated with DMA requests than what the DMA controller can handle directly.
+
+Required property:
+- dma-masters:		phandle of the DMA controller or list of phandles for
+			the DMA controllers the router can direct the signal to.
+- #dma-cells: 		Must be at least 1. Used to provide DMA router specific
+			information. See DMA client binding below for more
+			details.
+
+Optional properties:
+- dma-requests: 	Number of incoming request lines the router can handle.
+- In the node pointed by the dma-masters:
+	- dma-requests:	The router driver might need to look for this in order
+			to configure the routing.
+
+Example:
+	sdma_xbar: dma-router@4a002b78 {
+		compatible = "ti,dra7-dma-crossbar";
+		reg = <0x4a002b78 0xfc>;
+		#dma-cells = <1>;
+		dma-requests = <205>;
+		ti,dma-safe-map = <0>;
+		dma-masters = <&sdma>;
+	};
 
 * DMA client
 
diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt
index 7c6cb7f..cc29c35 100644
--- a/Documentation/devicetree/bindings/dma/mv-xor.txt
+++ b/Documentation/devicetree/bindings/dma/mv-xor.txt
@@ -1,7 +1,7 @@
 * Marvell XOR engines
 
 Required properties:
-- compatible: Should be "marvell,orion-xor"
+- compatible: Should be "marvell,orion-xor" or "marvell,armada-380-xor"
 - reg: Should contain registers location and length (two sets)
     the first set is the low registers, the second set the high
     registers for the XOR engine.
diff --git a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
index ecbc96a..ccd52d6 100644
--- a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
@@ -3,7 +3,8 @@
 See dma.txt first
 
 Required properties:
-- compatible: Should be "sirf,prima2-dmac" or "sirf,marco-dmac"
+- compatible: Should be "sirf,prima2-dmac", "sirf,atlas7-dmac" or
+  "sirf,atlas7-dmac-v2"
 - reg: Should contain DMA registers location and length.
 - interrupts: Should contain one interrupt shared by all channel
 - #dma-cells: must be <1>. used to represent the number of integer
diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
index 9cdcba24d..d13c136 100644
--- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -4,7 +4,10 @@
 
 Required properties:
 
-- compatible:	Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma"
+- compatible:	Must be one of
+		  "allwinner,sun6i-a31-dma"
+		  "allwinner,sun8i-a23-dma"
+		  "allwinner,sun8i-h3-dma"
 - reg:		Should contain the registers base address and length
 - interrupts:	Should contain a reference to the interrupt used by this device
 - clocks:	Should contain a reference to the parent AHB clock
diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
new file mode 100644
index 0000000..63a4892
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
@@ -0,0 +1,52 @@
+Texas Instruments DMA Crossbar (DMA request router)
+
+Required properties:
+- compatible:	"ti,dra7-dma-crossbar" for DRA7xx DMA crossbar
+- reg:		Memory map for accessing module
+- #dma-cells:	Should be set to <1>.
+		Clients should use the crossbar request number (input)
+- dma-requests:	Number of DMA requests the crossbar can receive
+- dma-masters:	phandle pointing to the DMA controller
+
+The DMA controller node need to have the following poroperties:
+- dma-requests:	Number of DMA requests the controller can handle
+
+Optional properties:
+- ti,dma-safe-map: Safe routing value for unused request lines
+
+Example:
+
+/* DMA controller */
+sdma: dma-controller@4a056000 {
+	compatible = "ti,omap4430-sdma";
+	reg = <0x4a056000 0x1000>;
+	interrupts =	<GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
+			<GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+	#dma-cells = <1>;
+	dma-channels = <32>;
+	dma-requests = <127>;
+};
+
+/* DMA crossbar */
+sdma_xbar: dma-router@4a002b78 {
+	compatible = "ti,dra7-dma-crossbar";
+	reg = <0x4a002b78 0xfc>;
+	#dma-cells = <1>;
+	dma-requests = <205>;
+	ti,dma-safe-map = <0>;
+	dma-masters = <&sdma>;
+};
+
+/* DMA client */
+uart1: serial@4806a000 {
+	compatible = "ti,omap4-uart";
+	reg = <0x4806a000 0x100>;
+	interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
+	ti,hwmods = "uart1";
+	clock-frequency = <48000000>;
+	status = "disabled";
+	dmas = <&sdma_xbar 49>, <&sdma_xbar 50>;
+	dma-names = "tx", "rx";
+};
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index 05d2280..ca67b0f 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -345,11 +345,12 @@
       that abstracts it away.
 
   * DMA_CTRL_ACK
-    - Undocumented feature
-    - No one really has an idea of what it's about, besides being
-      related to reusing the DMA transaction descriptors or having
-      additional transactions added to it in the async-tx API
-    - Useless in the case of the slave API
+    - If set, the transfer can be reused after being completed.
+    - There is a guarantee the transfer won't be freed until it is acked
+      by async_tx_ack().
+    - As a consequence, if a device driver wants to skip the dma_map_sg() and
+      dma_unmap_sg() in between 2 transfers, because the DMA'd data wasn't used,
+      it can resubmit the transfer right after its completion.
 
 General Design Notes
 --------------------
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b37015e..88d474b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -256,6 +256,9 @@
 	  Enable support for the TI EDMA controller. This DMA
 	  engine is found on TI DaVinci and AM33xx parts.
 
+config TI_DMA_CROSSBAR
+	bool
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
@@ -341,6 +344,7 @@
 	depends on ARCH_OMAP
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if SOC_DRA7XX
 
 config DMA_BCM2835
 	tristate "BCM2835 DMA engine support"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index cdf4cbd..6a4d6f2 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -39,6 +39,7 @@
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o
 obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 49d396e..5de3cf4 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -474,7 +474,7 @@
 	u32 val = readl(ch->reg_config);
 
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
-	         PL080_CONFIG_TC_IRQ_MASK);
+		 PL080_CONFIG_TC_IRQ_MASK);
 
 	writel(val, ch->reg_config);
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 57b2141..59892126 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -247,6 +247,10 @@
 	channel_writel(atchan, CTRLA, 0);
 	channel_writel(atchan, CTRLB, 0);
 	channel_writel(atchan, DSCR, first->txd.phys);
+	channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
+		       ATC_SPIP_BOUNDARY(first->boundary));
+	channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+		       ATC_DPIP_BOUNDARY(first->boundary));
 	dma_writel(atdma, CHER, atchan->mask);
 
 	vdbg_dump_regs(atchan);
@@ -635,6 +639,104 @@
 }
 
 /**
+ * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
+ * @chan: the channel to prepare operation on
+ * @xt: Interleaved transfer template
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_interleaved(struct dma_chan *chan,
+			 struct dma_interleaved_template *xt,
+			 unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct data_chunk	*first = xt->sgl;
+	struct at_desc		*desc = NULL;
+	size_t			xfer_count;
+	unsigned int		dwidth;
+	u32			ctrla;
+	u32			ctrlb;
+	size_t			len = 0;
+	int			i;
+
+	dev_info(chan2dev(chan),
+		 "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, xt->src_start, xt->dst_start, xt->numf,
+		xt->frame_size, flags);
+
+	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+		return NULL;
+
+	/*
+	 * The controller can only "skip" X bytes every Y bytes, so we
+	 * need to make sure we are given a template that fit that
+	 * description, ie a template with chunks that always have the
+	 * same size, with the same ICGs.
+	 */
+	for (i = 0; i < xt->frame_size; i++) {
+		struct data_chunk *chunk = xt->sgl + i;
+
+		if ((chunk->size != xt->sgl->size) ||
+		    (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) ||
+		    (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) {
+			dev_err(chan2dev(chan),
+				"%s: the controller can transfer only identical chunks\n",
+				__func__);
+			return NULL;
+		}
+
+		len += chunk->size;
+	}
+
+	dwidth = atc_get_xfer_width(xt->src_start,
+				    xt->dst_start, len);
+
+	xfer_count = len >> dwidth;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+		return NULL;
+	}
+
+	ctrla = ATC_SRC_WIDTH(dwidth) |
+		ATC_DST_WIDTH(dwidth);
+
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_SRC_PIP
+		| ATC_DST_PIP
+		| ATC_FC_MEM2MEM;
+
+	/* create the transfer */
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan),
+			"%s: couldn't allocate our descriptor\n", __func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = xt->src_start;
+	desc->lli.daddr = xt->dst_start;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->boundary = first->size >> dwidth;
+	desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
+	desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = desc->len = len;
+	desc->tx_width = dwidth;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags; /* client is in control of this ack */
+
+	return &desc->txd;
+}
+
+/**
  * atc_prep_dma_memcpy - prepare a memcpy operation
  * @chan: the channel to prepare operation on
  * @dest: operation virtual destination address
@@ -1609,6 +1711,7 @@
 	/* setup platform data for each SoC */
 	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
@@ -1713,6 +1816,9 @@
 	atdma->dma_common.dev = &pdev->dev;
 
 	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+
 	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
 
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 2727ca5..bc8d5eb 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -196,6 +196,11 @@
 	size_t				len;
 	u32				tx_width;
 	size_t				total_len;
+
+	/* Interleaved data */
+	size_t				boundary;
+	size_t				dst_hole;
+	size_t				src_hole;
 };
 
 static inline struct at_desc *
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 933e4b3..80e46e5 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -236,6 +236,10 @@
 	dma_addr_t	mbr_sa;		/* Source Address Member */
 	dma_addr_t	mbr_da;		/* Destination Address Member */
 	u32		mbr_cfg;	/* Configuration Register */
+	u32		mbr_bc;		/* Block Control Register */
+	u32		mbr_ds;		/* Data Stride Register */
+	u32		mbr_sus;	/* Source Microblock Stride Register */
+	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
 
@@ -359,6 +363,8 @@
 	if (at_xdmac_chan_is_cyclic(atchan)) {
 		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
 		at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
+	} else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) {
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
 	} else {
 		/*
 		 * No need to write AT_XDMAC_CC reg, it will be done when the
@@ -465,6 +471,33 @@
 	return desc;
 }
 
+static void at_xdmac_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_desc *prev,
+				struct at_xdmac_desc *desc)
+{
+	if (!prev || !desc)
+		return;
+
+	prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+	prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;
+
+	dev_dbg(chan2dev(chan),	"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+}
+
+static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
+						  struct at_xdmac_desc *desc)
+{
+	if (!desc)
+		return;
+
+	desc->lld.mbr_bc++;
+
+	dev_dbg(chan2dev(chan),
+		"%s: incrementing the block count of the desc 0x%p\n",
+		__func__, desc);
+}
+
 static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
 				       struct of_dma *of_dma)
 {
@@ -621,19 +654,14 @@
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2			/* next descriptor view */
 			| AT_XDMAC_MBR_UBC_NDEN					/* next descriptor dst parameter update */
 			| AT_XDMAC_MBR_UBC_NSEN					/* next descriptor src parameter update */
-			| (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE)		/* descriptor fetch */
 			| (len >> fixed_dwidth);				/* microblock length */
 		dev_dbg(chan2dev(chan),
 			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -708,7 +736,6 @@
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| AT_XDMAC_MBR_UBC_NDE
 			| period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);
 
 		dev_dbg(chan2dev(chan),
@@ -716,12 +743,8 @@
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -743,6 +766,215 @@
 	return &first->tx_dma_desc;
 }
 
+static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
+{
+	u32 width;
+
+	/*
+	 * Check address alignment to select the greater data width we
+	 * can use.
+	 *
+	 * Some XDMAC implementations don't provide dword transfer, in
+	 * this case selecting dword has the same behavior as
+	 * selecting word transfers.
+	 */
+	if (!(addr & 7)) {
+		width = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!(addr & 3)) {
+		width = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!(addr & 1)) {
+		width = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		width = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	return width;
+}
+
+static struct at_xdmac_desc *
+at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *prev,
+				dma_addr_t src, dma_addr_t dst,
+				struct dma_interleaved_template *xt,
+				struct data_chunk *chunk)
+{
+	struct at_xdmac_desc	*desc;
+	u32			dwidth;
+	unsigned long		flags;
+	size_t			ublen;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
+	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_dbg(chan2dev(chan),
+			"%s: chunk too big (%d, max size %lu)...\n",
+			__func__, chunk->size,
+			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
+		return NULL;
+	}
+
+	if (prev)
+		dev_dbg(chan2dev(chan),
+			"Adding items at the end of desc 0x%p\n", prev);
+
+	if (xt->src_inc) {
+		if (xt->src_sgl)
+			chan_cc |=  AT_XDMAC_CC_SAM_UBS_DS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
+	}
+
+	if (xt->dst_inc) {
+		if (xt->dst_sgl)
+			chan_cc |=  AT_XDMAC_CC_DAM_UBS_DS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = chunk->size >> dwidth;
+
+	desc->lld.mbr_sa = src;
+	desc->lld.mbr_da = dst;
+	desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
+	desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);
+
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, desc->lld.mbr_sa, desc->lld.mbr_da,
+		desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+	/* Chain lld. */
+	if (prev)
+		at_xdmac_queue_desc(chan, prev, desc);
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_interleaved(struct dma_chan *chan,
+			  struct dma_interleaved_template *xt,
+			  unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*prev = NULL, *first = NULL;
+	struct data_chunk	*chunk, *prev_chunk = NULL;
+	dma_addr_t		dst_addr, src_addr;
+	size_t			dst_skip, src_skip, len = 0;
+	size_t			prev_dst_icg = 0, prev_src_icg = 0;
+	int			i;
+
+	if (!xt || (xt->numf != 1) || (xt->dir != DMA_MEM_TO_MEM))
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, xt->src_start, xt->dst_start,	xt->numf,
+		xt->frame_size, flags);
+
+	src_addr = xt->src_start;
+	dst_addr = xt->dst_start;
+
+	for (i = 0; i < xt->frame_size; i++) {
+		struct at_xdmac_desc *desc;
+		size_t src_icg, dst_icg;
+
+		chunk = xt->sgl + i;
+
+		dst_icg = dmaengine_get_dst_icg(xt, chunk);
+		src_icg = dmaengine_get_src_icg(xt, chunk);
+
+		src_skip = chunk->size + src_icg;
+		dst_skip = chunk->size + dst_icg;
+
+		dev_dbg(chan2dev(chan),
+			"%s: chunk size=%d, src icg=%d, dst icg=%d\n",
+			__func__, chunk->size, src_icg, dst_icg);
+
+		/*
+		 * Handle the case where we just have the same
+		 * transfer to setup, we can just increase the
+		 * block number and reuse the same descriptor.
+		 */
+		if (prev_chunk && prev &&
+		    (prev_chunk->size == chunk->size) &&
+		    (prev_src_icg == src_icg) &&
+		    (prev_dst_icg == dst_icg)) {
+			dev_dbg(chan2dev(chan),
+				"%s: same configuration that the previous chunk, merging the descriptors...\n",
+				__func__);
+			at_xdmac_increment_block_count(chan, prev);
+			continue;
+		}
+
+		desc = at_xdmac_interleaved_queue_desc(chan, atchan,
+						       prev,
+						       src_addr, dst_addr,
+						       xt, chunk);
+		if (!desc) {
+			list_splice_init(&first->descs_list,
+					 &atchan->free_descs_list);
+			return NULL;
+		}
+
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			__func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+
+		if (xt->src_sgl)
+			src_addr += src_skip;
+
+		if (xt->dst_sgl)
+			dst_addr += dst_skip;
+
+		len += chunk->size;
+		prev_chunk = chunk;
+		prev_dst_icg = dst_icg;
+		prev_src_icg = src_icg;
+		prev = desc;
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
 static struct dma_async_tx_descriptor *
 at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 size_t len, unsigned long flags)
@@ -773,24 +1005,7 @@
 	if (unlikely(!len))
 		return NULL;
 
-	/*
-	 * Check address alignment to select the greater data width we can use.
-	 * Some XDMAC implementations don't provide dword transfer, in this
-	 * case selecting dword has the same behavior as selecting word transfers.
-	 */
-	if (!((src_addr | dst_addr) & 7)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-	} else if (!((src_addr | dst_addr)  & 3)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-	} else if (!((src_addr | dst_addr) & 1)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-	} else {
-		dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-	}
+	dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);
 
 	/* Prepare descriptors. */
 	while (remaining_size) {
@@ -820,19 +1035,8 @@
 		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
 
 		/* Check remaining length and change data width if needed. */
-		if (!((src_addr | dst_addr | xfer_size) & 7)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size)  & 3)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size) & 1)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-		} else if ((src_addr | dst_addr | xfer_size) & 1) {
-			dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-			dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-		}
+		dwidth = at_xdmac_align_width(chan,
+					      src_addr | dst_addr | xfer_size);
 		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
 
 		ublen = xfer_size >> dwidth;
@@ -843,7 +1047,6 @@
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0)
 			| ublen;
 		desc->lld.mbr_cfg = chan_cc;
 
@@ -852,12 +1055,8 @@
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n",
-				 __func__, prev, prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -1398,6 +1597,7 @@
 	}
 
 	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
 	/*
@@ -1411,6 +1611,7 @@
 	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
 	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
 	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
 	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
 	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
 	atxdmac->dma.device_config			= at_xdmac_device_config;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2890d74..052a2bd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -267,6 +267,13 @@
 	/* This channel is not in use anymore, free it */
 	if (!chan->client_count && chan->device->device_free_chan_resources)
 		chan->device->device_free_chan_resources(chan);
+
+	/* If the channel is used via a DMA request router, free the mapping */
+	if (chan->router && chan->router->route_free) {
+		chan->router->route_free(chan->router->dev, chan->route_data);
+		chan->router = NULL;
+		chan->route_data = NULL;
+	}
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -532,7 +539,7 @@
 }
 
 /**
- * dma_request_slave_channel - try to get specific channel exclusively
+ * dma_get_slave_channel - try to get specific channel exclusively
  * @chan: target channel
  */
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
@@ -644,7 +651,7 @@
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
 /**
- * dma_request_slave_channel - try to allocate an exclusive slave channel
+ * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  *
@@ -832,6 +839,8 @@
 		!device->device_prep_dma_pq);
 	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
 		!device->device_prep_dma_pq_val);
+	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 24e5290..57ff462 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1364,7 +1364,7 @@
 	return ret;
 }
 
-static struct platform_device_id ep93xx_dma_driver_ids[] = {
+static const struct platform_device_id ep93xx_dma_driver_ids[] = {
 	{ "ep93xx-dma-m2p", 0 },
 	{ "ep93xx-dma-m2m", 1 },
 	{ },
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index eed4059..865501fc 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -193,7 +193,7 @@
 	int			 request;
 };
 
-static struct platform_device_id imx_dma_devtype[] = {
+static const struct platform_device_id imx_dma_devtype[] = {
 	{
 		.name = "imx1-dma",
 		.driver_data = IMX1_DMA,
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 62bbd79..77b6aab 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -420,7 +420,7 @@
 	.script_addrs = &sdma_script_imx6q,
 };
 
-static struct platform_device_id sdma_devtypes[] = {
+static const struct platform_device_id sdma_devtypes[] = {
 	{
 		.name = "imx25-sdma",
 		.driver_data = (unsigned long)&sdma_imx25,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 1c56001..fbaf1ea 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -19,6 +19,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <linux/clk.h>
@@ -30,6 +31,11 @@
 #include "dmaengine.h"
 #include "mv_xor.h"
 
+enum mv_xor_mode {
+	XOR_MODE_IN_REG,
+	XOR_MODE_IN_DESC,
+};
+
 static void mv_xor_issue_pending(struct dma_chan *chan);
 
 #define to_mv_xor_chan(chan)		\
@@ -56,6 +62,24 @@
 	hw_desc->byte_count = byte_count;
 }
 
+static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+	switch (desc->type) {
+	case DMA_XOR:
+	case DMA_INTERRUPT:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
+		break;
+	case DMA_MEMCPY:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
+		break;
+	default:
+		BUG();
+		return;
+	}
+}
+
 static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
 				  u32 next_desc_addr)
 {
@@ -64,12 +88,6 @@
 	hw_desc->phy_next_desc = next_desc_addr;
 }
 
-static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
-{
-	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	hw_desc->phy_next_desc = 0;
-}
-
 static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
 				 int index, dma_addr_t addr)
 {
@@ -104,7 +122,7 @@
 	return intr_cause;
 }
 
-static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -114,14 +132,14 @@
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
 {
 	u32 val = 0xFFFF0000 >> (chan->idx * 16);
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_set_mode(struct mv_xor_chan *chan,
-			       enum dma_transaction_type type)
+static void mv_chan_set_mode(struct mv_xor_chan *chan,
+			     enum dma_transaction_type type)
 {
 	u32 op_mode;
 	u32 config = readl_relaxed(XOR_CONFIG(chan));
@@ -144,6 +162,25 @@
 	config &= ~0x7;
 	config |= op_mode;
 
+	if (IS_ENABLED(__BIG_ENDIAN))
+		config |= XOR_DESCRIPTOR_SWAP;
+	else
+		config &= ~XOR_DESCRIPTOR_SWAP;
+
+	writel_relaxed(config, XOR_CONFIG(chan));
+	chan->current_type = type;
+}
+
+static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan)
+{
+	u32 op_mode;
+	u32 config = readl_relaxed(XOR_CONFIG(chan));
+
+	op_mode = XOR_OPERATION_MODE_IN_DESC;
+
+	config &= ~0x7;
+	config |= op_mode;
+
 #if defined(__BIG_ENDIAN)
 	config |= XOR_DESCRIPTOR_SWAP;
 #else
@@ -151,7 +188,6 @@
 #endif
 
 	writel_relaxed(config, XOR_CONFIG(chan));
-	chan->current_type = type;
 }
 
 static void mv_chan_activate(struct mv_xor_chan *chan)
@@ -171,28 +207,13 @@
 	return (state == 1) ? 1 : 0;
 }
 
-/**
- * mv_xor_free_slots - flags descriptor slots for reuse
- * @slot: Slot to free
- * Caller must hold &mv_chan->lock while calling this function
- */
-static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
-			      struct mv_xor_desc_slot *slot)
-{
-	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n",
-		__func__, __LINE__, slot);
-
-	slot->slot_used = 0;
-
-}
-
 /*
- * mv_xor_start_new_chain - program the engine to operate on new chain headed by
- * sw_desc
+ * mv_chan_start_new_chain - program the engine to operate on new
+ * chain headed by sw_desc
  * Caller must hold &mv_chan->lock while calling this function
  */
-static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
-				   struct mv_xor_desc_slot *sw_desc)
+static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
+				    struct mv_xor_desc_slot *sw_desc)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
 		__func__, __LINE__, sw_desc);
@@ -205,8 +226,9 @@
 }
 
 static dma_cookie_t
-mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+				struct mv_xor_chan *mv_chan,
+				dma_cookie_t cookie)
 {
 	BUG_ON(desc->async_tx.cookie < 0);
 
@@ -230,93 +252,110 @@
 }
 
 static int
-mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
 
-		if (async_tx_test_ack(&iter->async_tx)) {
-			list_del(&iter->completed_node);
-			mv_xor_free_slots(mv_chan, iter);
-		}
+		if (async_tx_test_ack(&iter->async_tx))
+			list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	return 0;
 }
 
 static int
-mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan)
+mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
+		   struct mv_xor_chan *mv_chan)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
 		__func__, __LINE__, desc, desc->async_tx.flags);
-	list_del(&desc->chain_node);
+
 	/* the client is allowed to attach dependent operations
 	 * until 'ack' is set
 	 */
-	if (!async_tx_test_ack(&desc->async_tx)) {
+	if (!async_tx_test_ack(&desc->async_tx))
 		/* move this slot to the completed_slots */
-		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
-		return 0;
-	}
+		list_move_tail(&desc->node, &mv_chan->completed_slots);
+	else
+		list_move_tail(&desc->node, &mv_chan->free_slots);
 
-	mv_xor_free_slots(mv_chan, desc);
 	return 0;
 }
 
 /* This function must be called with the mv_xor_chan spinlock held */
-static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 	dma_cookie_t cookie = 0;
 	int busy = mv_chan_is_busy(mv_chan);
 	u32 current_desc = mv_chan_get_current_desc(mv_chan);
-	int seen_current = 0;
+	int current_cleaned = 0;
+	struct mv_xor_desc *hw_desc;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
-	mv_xor_clean_completed_slots(mv_chan);
+	mv_chan_clean_completed_slots(mv_chan);
 
 	/* free completed slots from the chain starting with
 	 * the oldest descriptor
 	 */
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
+				 node) {
 
-		/* do not advance past the current descriptor loaded into the
-		 * hardware channel, subsequent descriptors are either in
-		 * process or have not been submitted
-		 */
-		if (seen_current)
-			break;
+		/* clean finished descriptors */
+		hw_desc = iter->hw_desc;
+		if (hw_desc->status & XOR_DESC_SUCCESS) {
+			cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
+								 cookie);
 
-		/* stop the search if we reach the current descriptor and the
-		 * channel is busy
-		 */
-		if (iter->async_tx.phys == current_desc) {
-			seen_current = 1;
-			if (busy)
+			/* done processing desc, clean slot */
+			mv_desc_clean_slot(iter, mv_chan);
+
+			/* break if we did cleaned the current */
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 1;
 				break;
+			}
+		} else {
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 0;
+				break;
+			}
 		}
-
-		cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
-
-		if (mv_xor_clean_slot(iter, mv_chan))
-			break;
 	}
 
 	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
-		struct mv_xor_desc_slot *chain_head;
-		chain_head = list_entry(mv_chan->chain.next,
-					struct mv_xor_desc_slot,
-					chain_node);
-
-		mv_xor_start_new_chain(mv_chan, chain_head);
+		if (current_cleaned) {
+			/*
+			 * current descriptor cleaned and removed, run
+			 * from list head
+			 */
+			iter = list_entry(mv_chan->chain.next,
+					  struct mv_xor_desc_slot,
+					  node);
+			mv_chan_start_new_chain(mv_chan, iter);
+		} else {
+			if (!list_is_last(&iter->node, &mv_chan->chain)) {
+				/*
+				 * descriptors are still waiting after
+				 * current, trigger them
+				 */
+				iter = list_entry(iter->node.next,
+						  struct mv_xor_desc_slot,
+						  node);
+				mv_chan_start_new_chain(mv_chan, iter);
+			} else {
+				/*
+				 * some descriptors are still waiting
+				 * to be cleaned
+				 */
+				tasklet_schedule(&mv_chan->irq_tasklet);
+			}
+		}
 	}
 
 	if (cookie > 0)
@@ -328,56 +367,35 @@
 	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
 
 	spin_lock_bh(&chan->lock);
-	mv_xor_slot_cleanup(chan);
+	mv_chan_slot_cleanup(chan);
 	spin_unlock_bh(&chan->lock);
 }
 
 static struct mv_xor_desc_slot *
-mv_xor_alloc_slot(struct mv_xor_chan *mv_chan)
+mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
 {
-	struct mv_xor_desc_slot *iter, *_iter;
-	int retry = 0;
+	struct mv_xor_desc_slot *iter;
 
-	/* start search from the last allocated descrtiptor
-	 * if a contiguous allocation can not be found start searching
-	 * from the beginning of the list
-	 */
-retry:
-	if (retry == 0)
-		iter = mv_chan->last_used;
-	else
-		iter = list_entry(&mv_chan->all_slots,
-			struct mv_xor_desc_slot,
-			slot_node);
+	spin_lock_bh(&mv_chan->lock);
 
-	list_for_each_entry_safe_continue(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
+	if (!list_empty(&mv_chan->free_slots)) {
+		iter = list_first_entry(&mv_chan->free_slots,
+					struct mv_xor_desc_slot,
+					node);
 
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
-		if (iter->slot_used) {
-			/* give up after finding the first busy slot
-			 * on the second pass through the list
-			 */
-			if (retry)
-				break;
-			continue;
-		}
+		list_move_tail(&iter->node, &mv_chan->allocated_slots);
+
+		spin_unlock_bh(&mv_chan->lock);
 
 		/* pre-ack descriptor */
 		async_tx_ack(&iter->async_tx);
-
-		iter->slot_used = 1;
-		INIT_LIST_HEAD(&iter->chain_node);
 		iter->async_tx.cookie = -EBUSY;
-		mv_chan->last_used = iter;
-		mv_desc_clear_next_desc(iter);
 
 		return iter;
 
 	}
-	if (!retry++)
-		goto retry;
+
+	spin_unlock_bh(&mv_chan->lock);
 
 	/* try to free some slots if the allocation fails */
 	tasklet_schedule(&mv_chan->irq_tasklet);
@@ -403,14 +421,14 @@
 	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&mv_chan->chain))
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 	else {
 		new_hw_chain = 0;
 
 		old_chain_tail = list_entry(mv_chan->chain.prev,
 					    struct mv_xor_desc_slot,
-					    chain_node);
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+					    node);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 
 		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
 			&old_chain_tail->async_tx.phys);
@@ -431,7 +449,7 @@
 	}
 
 	if (new_hw_chain)
-		mv_xor_start_new_chain(mv_chan, sw_desc);
+		mv_chan_start_new_chain(mv_chan, sw_desc);
 
 	spin_unlock_bh(&mv_chan->lock);
 
@@ -463,26 +481,20 @@
 
 		dma_async_tx_descriptor_init(&slot->async_tx, chan);
 		slot->async_tx.tx_submit = mv_xor_tx_submit;
-		INIT_LIST_HEAD(&slot->chain_node);
-		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->node);
 		dma_desc = mv_chan->dma_desc_pool;
 		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
 		slot->idx = idx++;
 
 		spin_lock_bh(&mv_chan->lock);
 		mv_chan->slots_allocated = idx;
-		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+		list_add_tail(&slot->node, &mv_chan->free_slots);
 		spin_unlock_bh(&mv_chan->lock);
 	}
 
-	if (mv_chan->slots_allocated && !mv_chan->last_used)
-		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
-					struct mv_xor_desc_slot,
-					slot_node);
-
 	dev_dbg(mv_chan_to_devp(mv_chan),
-		"allocated %d descriptor slots last_used: %p\n",
-		mv_chan->slots_allocated, mv_chan->last_used);
+		"allocated %d descriptor slots\n",
+		mv_chan->slots_allocated);
 
 	return mv_chan->slots_allocated ? : -ENOMEM;
 }
@@ -503,16 +515,17 @@
 		"%s src_cnt: %d len: %u dest %pad flags: %ld\n",
 		__func__, src_cnt, len, &dest, flags);
 
-	spin_lock_bh(&mv_chan->lock);
-	sw_desc = mv_xor_alloc_slot(mv_chan);
+	sw_desc = mv_chan_alloc_slot(mv_chan);
 	if (sw_desc) {
 		sw_desc->type = DMA_XOR;
 		sw_desc->async_tx.flags = flags;
 		mv_desc_init(sw_desc, dest, len, flags);
+		if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+			mv_desc_set_mode(sw_desc);
 		while (src_cnt--)
 			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
 	}
-	spin_unlock_bh(&mv_chan->lock);
+
 	dev_dbg(mv_chan_to_devp(mv_chan),
 		"%s sw_desc %p async_tx %p \n",
 		__func__, sw_desc, &sw_desc->async_tx);
@@ -556,25 +569,29 @@
 
 	spin_lock_bh(&mv_chan->lock);
 
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
+					node) {
 		in_use_descs++;
-		list_del(&iter->chain_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
 		in_use_descs++;
-		list_del(&iter->completed_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
+				 node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe_reverse(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
-		list_del(&iter->slot_node);
+		iter, _iter, &mv_chan->free_slots, node) {
+		list_del(&iter->node);
 		kfree(iter);
 		mv_chan->slots_allocated--;
 	}
-	mv_chan->last_used = NULL;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
 		__func__, mv_chan->slots_allocated);
@@ -603,13 +620,13 @@
 		return ret;
 
 	spin_lock_bh(&mv_chan->lock);
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 	spin_unlock_bh(&mv_chan->lock);
 
 	return dma_cookie_status(chan, cookie, txstate);
 }
 
-static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+static void mv_chan_dump_regs(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -632,8 +649,8 @@
 	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
 }
 
-static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
-					 u32 intr_cause)
+static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
+					  u32 intr_cause)
 {
 	if (intr_cause & XOR_INT_ERR_DECODE) {
 		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
@@ -643,7 +660,7 @@
 	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
 		chan->idx, intr_cause);
 
-	mv_dump_xor_regs(chan);
+	mv_chan_dump_regs(chan);
 	WARN_ON(1);
 }
 
@@ -655,11 +672,11 @@
 	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
 
 	if (intr_cause & XOR_INTR_ERRORS)
-		mv_xor_err_interrupt_handler(chan, intr_cause);
+		mv_chan_err_interrupt_handler(chan, intr_cause);
 
 	tasklet_schedule(&chan->irq_tasklet);
 
-	mv_xor_device_clear_eoc_cause(chan);
+	mv_chan_clear_eoc_cause(chan);
 
 	return IRQ_HANDLED;
 }
@@ -678,7 +695,7 @@
  * Perform a transaction to verify the HW works.
  */
 
-static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
+static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, ret;
 	void *src, *dest;
@@ -787,7 +804,7 @@
 
 #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
 static int
-mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
+mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, src_idx, ret;
 	struct page *dest;
@@ -951,7 +968,7 @@
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask, int irq)
+		   int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -963,6 +980,7 @@
 
 	mv_chan->idx = idx;
 	mv_chan->irq = irq;
+	mv_chan->op_in_desc = op_in_desc;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1014,7 +1032,7 @@
 		     mv_chan);
 
 	/* clear errors before enabling interrupts */
-	mv_xor_device_clear_err_status(mv_chan);
+	mv_chan_clear_err_status(mv_chan);
 
 	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
 			  0, dev_name(&pdev->dev), mv_chan);
@@ -1023,32 +1041,37 @@
 
 	mv_chan_unmask_interrupts(mv_chan);
 
-	mv_set_mode(mv_chan, DMA_XOR);
+	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+		mv_chan_set_mode_to_desc(mv_chan);
+	else
+		mv_chan_set_mode(mv_chan, DMA_XOR);
 
 	spin_lock_init(&mv_chan->lock);
 	INIT_LIST_HEAD(&mv_chan->chain);
 	INIT_LIST_HEAD(&mv_chan->completed_slots);
-	INIT_LIST_HEAD(&mv_chan->all_slots);
+	INIT_LIST_HEAD(&mv_chan->free_slots);
+	INIT_LIST_HEAD(&mv_chan->allocated_slots);
 	mv_chan->dmachan.device = dma_dev;
 	dma_cookie_init(&mv_chan->dmachan);
 
 	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
-		ret = mv_xor_memcpy_self_test(mv_chan);
+		ret = mv_chan_memcpy_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		ret = mv_xor_xor_self_test(mv_chan);
+		ret = mv_chan_xor_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
-	dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n",
+	dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n",
+		 mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
 		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
 		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
@@ -1097,6 +1120,13 @@
 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
 }
 
+static const struct of_device_id mv_xor_dt_ids[] = {
+	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG },
+	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
+
 static int mv_xor_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
@@ -1104,6 +1134,7 @@
 	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
 	int i, ret;
+	int op_in_desc;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1148,11 +1179,15 @@
 	if (pdev->dev.of_node) {
 		struct device_node *np;
 		int i = 0;
+		const struct of_device_id *of_id =
+			of_match_device(mv_xor_dt_ids,
+					&pdev->dev);
 
 		for_each_child_of_node(pdev->dev.of_node, np) {
 			struct mv_xor_chan *chan;
 			dma_cap_mask_t cap_mask;
 			int irq;
+			op_in_desc = (int)of_id->data;
 
 			dma_cap_zero(cap_mask);
 			if (of_property_read_bool(np, "dmacap,memcpy"))
@@ -1169,7 +1204,7 @@
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cap_mask, irq);
+						  cap_mask, irq, op_in_desc);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				irq_dispose_mapping(irq);
@@ -1198,7 +1233,8 @@
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cd->cap_mask, irq);
+						  cd->cap_mask, irq,
+						  XOR_MODE_IN_REG);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				goto err_channel_add;
@@ -1244,14 +1280,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id mv_xor_dt_ids[] = {
-       { .compatible = "marvell,orion-xor", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
-#endif
-
 static struct platform_driver mv_xor_driver = {
 	.probe		= mv_xor_probe,
 	.remove		= mv_xor_remove,
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 91958db..b7455b4 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -19,7 +19,7 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
-#define MV_XOR_POOL_SIZE		PAGE_SIZE
+#define MV_XOR_POOL_SIZE		(MV_XOR_SLOT_SIZE * 3072)
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
@@ -30,7 +30,13 @@
 /* Values for the XOR_CONFIG register */
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_IN_DESC      7
 #define XOR_DESCRIPTOR_SWAP		BIT(14)
+#define XOR_DESC_SUCCESS		0x40000000
+
+#define XOR_DESC_OPERATION_XOR          (0 << 24)
+#define XOR_DESC_OPERATION_CRC32C       (1 << 24)
+#define XOR_DESC_OPERATION_MEMCPY       (2 << 24)
 
 #define XOR_DESC_DMA_OWNED		BIT(31)
 #define XOR_DESC_EOD_INT_EN		BIT(31)
@@ -88,13 +94,14 @@
  * @mmr_base: memory mapped register base
  * @idx: the index of the xor channel
  * @chain: device chain view of the descriptors
+ * @free_slots: free slots usable by the channel
+ * @allocated_slots: slots allocated by the driver
  * @completed_slots: slots completed by HW but still need to be acked
  * @device: parent device
  * @common: common dmaengine channel object members
- * @last_used: place holder for allocation to continue from where it left off
- * @all_slots: complete domain of slots usable by the channel
  * @slots_allocated: records the actual size of the descriptor slot pool
  * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ * @op_in_desc: new mode of driver, each op is writen to descriptor.
  */
 struct mv_xor_chan {
 	int			pending;
@@ -105,16 +112,17 @@
 	int                     irq;
 	enum dma_transaction_type	current_type;
 	struct list_head	chain;
+	struct list_head	free_slots;
+	struct list_head	allocated_slots;
 	struct list_head	completed_slots;
 	dma_addr_t		dma_desc_pool;
 	void			*dma_desc_pool_virt;
 	size_t                  pool_size;
 	struct dma_device	dmadev;
 	struct dma_chan		dmachan;
-	struct mv_xor_desc_slot	*last_used;
-	struct list_head	all_slots;
 	int			slots_allocated;
 	struct tasklet_struct	irq_tasklet;
+	int                     op_in_desc;
 	char			dummy_src[MV_XOR_MIN_BYTE_COUNT];
 	char			dummy_dst[MV_XOR_MIN_BYTE_COUNT];
 	dma_addr_t		dummy_src_addr, dummy_dst_addr;
@@ -122,9 +130,7 @@
 
 /**
  * struct mv_xor_desc_slot - software descriptor
- * @slot_node: node on the mv_xor_chan.all_slots list
- * @chain_node: node on the mv_xor_chan.chain list
- * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @node: node on the mv_xor_chan lists
  * @hw_desc: virtual address of the hardware descriptor chain
  * @phys: hardware address of the hardware descriptor chain
  * @slot_used: slot in use or not
@@ -133,12 +139,9 @@
  * @async_tx: support for the async_tx api
  */
 struct mv_xor_desc_slot {
-	struct list_head	slot_node;
-	struct list_head	chain_node;
-	struct list_head	completed_node;
+	struct list_head	node;
 	enum dma_transaction_type	type;
 	void			*hw_desc;
-	u16			slot_used;
 	u16			idx;
 	struct dma_async_tx_descriptor	async_tx;
 };
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 829ec68..60de352 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -170,7 +170,7 @@
 	}
 };
 
-static struct platform_device_id mxs_dma_ids[] = {
+static const struct platform_device_id mxs_dma_ids[] = {
 	{
 		.name = "imx23-dma-apbh",
 		.driver_data = (kernel_ulong_t) &mxs_dma_types[0],
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 88b77c9..2b5a198 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1455,7 +1455,7 @@
 	return 0;
 }
 
-static struct platform_device_id nbpf_ids[] = {
+static const struct platform_device_id nbpf_ids[] = {
 	{"nbpfaxi64dmac1b4",	(kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
 	{"nbpfaxi64dmac1b8",	(kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
 	{"nbpfaxi64dmac1b16",	(kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index cbd4a8a..1e1f298 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -45,6 +45,50 @@
 }
 
 /**
+ * of_dma_router_xlate - translation function for router devices
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data (router information)
+ *
+ * The function creates new dma_spec to be passed to the router driver's
+ * of_dma_route_allocate() function to prepare a dma_spec which will be used
+ * to request channel from the real DMA controller.
+ */
+static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct dma_chan		*chan;
+	struct of_dma		*ofdma_target;
+	struct of_phandle_args	dma_spec_target;
+	void			*route_data;
+
+	/* translate the request for the real DMA controller */
+	memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target));
+	route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma);
+	if (IS_ERR(route_data))
+		return NULL;
+
+	ofdma_target = of_dma_find_controller(&dma_spec_target);
+	if (!ofdma_target)
+		return NULL;
+
+	chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
+	if (chan) {
+		chan->router = ofdma->dma_router;
+		chan->route_data = route_data;
+	} else {
+		ofdma->dma_router->route_free(ofdma->dma_router->dev,
+					      route_data);
+	}
+
+	/*
+	 * Need to put the node back since the ofdma->of_dma_route_allocate
+	 * has taken it for generating the new, translated dma_spec
+	 */
+	of_node_put(dma_spec_target.np);
+	return chan;
+}
+
+/**
  * of_dma_controller_register - Register a DMA controller to DT DMA helpers
  * @np:			device node of DMA controller
  * @of_dma_xlate:	translation function which converts a phandle
@@ -110,6 +154,51 @@
 EXPORT_SYMBOL_GPL(of_dma_controller_free);
 
 /**
+ * of_dma_router_register - Register a DMA router to DT DMA helpers as a
+ *			    controller
+ * @np:				device node of DMA router
+ * @of_dma_route_allocate:	setup function for the router which need to
+ *				modify the dma_spec for the DMA controller to
+ *				use and to set up the requested route.
+ * @dma_router:			pointer to dma_router structure to be used when
+ *				the route need to be free up.
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_router_register(struct device_node *np,
+			   void *(*of_dma_route_allocate)
+			   (struct of_phandle_args *, struct of_dma *),
+			   struct dma_router *dma_router)
+{
+	struct of_dma	*ofdma;
+
+	if (!np || !of_dma_route_allocate || !dma_router) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	ofdma->of_node = np;
+	ofdma->of_dma_xlate = of_dma_router_xlate;
+	ofdma->of_dma_route_allocate = of_dma_route_allocate;
+	ofdma->dma_router = dma_router;
+
+	/* Now queue of_dma controller structure in list */
+	mutex_lock(&of_dma_lock);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+	mutex_unlock(&of_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_router_register);
+
+/**
  * of_dma_match_channel - Check if a DMA specifier matches name
  * @np:		device node to look for DMA channels
  * @name:	channel name to be matched
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 167dbaf..249445c 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -22,6 +22,9 @@
 
 #include "virt-dma.h"
 
+#define OMAP_SDMA_REQUESTS	127
+#define OMAP_SDMA_CHANNELS	32
+
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
@@ -31,9 +34,10 @@
 	const struct omap_dma_reg *reg_map;
 	struct omap_system_dma_plat_info *plat;
 	bool legacy;
+	unsigned dma_requests;
 	spinlock_t irq_lock;
 	uint32_t irq_enable_mask;
-	struct omap_chan *lch_map[32];
+	struct omap_chan *lch_map[OMAP_SDMA_CHANNELS];
 };
 
 struct omap_chan {
@@ -362,7 +366,7 @@
 	struct omap_sg *sg = d->sg + idx;
 	unsigned cxsa, cxei, cxfi;
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CDSA;
 		cxei = CDEI;
 		cxfi = CDFI;
@@ -408,7 +412,7 @@
 	if (dma_omap1())
 		omap_dma_chan_write(c, CCR2, d->ccr >> 16);
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CSSA;
 		cxei = CSEI;
 		cxfi = CSFI;
@@ -589,6 +593,7 @@
 	omap_free_dma(c->dma_ch);
 
 	dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig);
+	c->dma_sig = 0;
 }
 
 static size_t omap_dma_sg_size(struct omap_sg *sg)
@@ -948,6 +953,51 @@
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
 }
 
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_desc *d;
+	uint8_t data_type;
+
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	data_type = __ffs((src | dest | len));
+	if (data_type > CSDP_DATA_TYPE_32)
+		data_type = CSDP_DATA_TYPE_32;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->dev_addr = src;
+	d->fi = 0;
+	d->es = data_type;
+	d->sg[0].en = len / BIT(data_type);
+	d->sg[0].fn = 1;
+	d->sg[0].addr = dest;
+	d->sglen = 1;
+	d->ccr = c->ccr;
+	d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE;
+	if (tx_flags & DMA_PREP_INTERRUPT)
+		d->cicr |= CICR_FRAME_IE;
+
+	d->csdp = data_type;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+		d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+	} else {
+		d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
 static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
@@ -1037,7 +1087,7 @@
 	return 0;
 }
 
-static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
+static int omap_dma_chan_init(struct omap_dmadev *od)
 {
 	struct omap_chan *c;
 
@@ -1046,7 +1096,6 @@
 		return -ENOMEM;
 
 	c->reg_map = od->reg_map;
-	c->dma_sig = dma_sig;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
@@ -1094,12 +1143,14 @@
 
 	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
 	od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
 	od->ddev.device_free_chan_resources = omap_dma_free_chan_resources;
 	od->ddev.device_tx_status = omap_dma_tx_status;
 	od->ddev.device_issue_pending = omap_dma_issue_pending;
 	od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg;
 	od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic;
+	od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy;
 	od->ddev.device_config = omap_dma_slave_config;
 	od->ddev.device_pause = omap_dma_pause;
 	od->ddev.device_resume = omap_dma_resume;
@@ -1116,8 +1167,17 @@
 
 	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
 
-	for (i = 0; i < 127; i++) {
-		rc = omap_dma_chan_init(od, i);
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-requests property, using %u.\n",
+			 OMAP_SDMA_REQUESTS);
+	}
+
+	for (i = 0; i < OMAP_SDMA_CHANNELS; i++) {
+		rc = omap_dma_chan_init(od);
 		if (rc) {
 			omap_dma_free(od);
 			return rc;
@@ -1208,10 +1268,14 @@
 bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	if (chan->device->dev->driver == &omap_dma_driver.driver) {
+		struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 		struct omap_chan *c = to_omap_dma_chan(chan);
 		unsigned req = *(unsigned *)param;
 
-		return req == c->dma_sig;
+		if (req <= od->dma_requests) {
+			c->dma_sig = req;
+			return true;
+		}
 	}
 	return false;
 }
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index a7d9d30..3781f32 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1424,8 +1424,8 @@
 		goto xfer_exit;
 
 	if (ret > pl330->mcbufsz / 2) {
-		dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n",
-				__func__, __LINE__);
+		dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n",
+				__func__, __LINE__, ret, pl330->mcbufsz / 2);
 		ret = -ENOMEM;
 		goto xfer_exit;
 	}
@@ -2581,12 +2581,14 @@
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct pl330_dmac *pl330 = pch->dmac;
+	struct pl330_dmac *pl330;
 	int burst;
 
 	if (unlikely(!pch || !len))
 		return NULL;
 
+	pl330 = pch->dmac;
+
 	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
 	if (!desc)
 		return NULL;
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 01dcaf2..17ccdfd 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1168,7 +1168,7 @@
 	.has_clocks = true,
 };
 
-static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
 	{
 		.name		= "s3c2410-dma",
 		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index a18d16c..d5423a6 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -183,7 +183,7 @@
 	unsigned int n_channels;
 	struct rcar_dmac_chan *channels;
 
-	unsigned long modules[256 / BITS_PER_LONG];
+	DECLARE_BITMAP(modules, 256);
 };
 
 #define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c
index 4fb9997..96ea382 100644
--- a/drivers/dma/sh/shdma-r8a73a4.c
+++ b/drivers/dma/sh/shdma-r8a73a4.c
@@ -11,7 +11,7 @@
 
 #include "shdma-arm.h"
 
-const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
+static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
 
 static const struct sh_dmae_slave_config dma_slaves[] = {
 	{
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index a1afda4..8c5186c 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -23,8 +23,13 @@
 
 #include "dmaengine.h"
 
+#define SIRFSOC_DMA_VER_A7V1                    1
+#define SIRFSOC_DMA_VER_A7V2                    2
+#define SIRFSOC_DMA_VER_A6                      4
+
 #define SIRFSOC_DMA_DESCRIPTORS                 16
 #define SIRFSOC_DMA_CHANNELS                    16
+#define SIRFSOC_DMA_TABLE_NUM                   256
 
 #define SIRFSOC_DMA_CH_ADDR                     0x00
 #define SIRFSOC_DMA_CH_XLEN                     0x04
@@ -35,15 +40,44 @@
 #define SIRFSOC_DMA_CH_VALID                    0x140
 #define SIRFSOC_DMA_CH_INT                      0x144
 #define SIRFSOC_DMA_INT_EN                      0x148
-#define SIRFSOC_DMA_INT_EN_CLR			0x14C
+#define SIRFSOC_DMA_INT_EN_CLR                  0x14C
 #define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
-#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x15C
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x154
+#define SIRFSOC_DMA_WIDTH_ATLAS7                0x10
+#define SIRFSOC_DMA_VALID_ATLAS7                0x14
+#define SIRFSOC_DMA_INT_ATLAS7                  0x18
+#define SIRFSOC_DMA_INT_EN_ATLAS7               0x1c
+#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7            0x20
+#define SIRFSOC_DMA_CUR_DATA_ADDR               0x34
+#define SIRFSOC_DMA_MUL_ATLAS7                  0x38
+#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7         0x158
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7     0x15C
+#define SIRFSOC_DMA_IOBG_SCMD_EN		0x800
+#define SIRFSOC_DMA_EARLY_RESP_SET		0x818
+#define SIRFSOC_DMA_EARLY_RESP_CLR		0x81C
 
 #define SIRFSOC_DMA_MODE_CTRL_BIT               4
 #define SIRFSOC_DMA_DIR_CTRL_BIT                5
+#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7        2
+#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7       3
+#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7         4
+#define SIRFSOC_DMA_TAB_NUM_ATLAS7              7
+#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7        5
+#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7     25
+#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT            32
+
+#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7         BIT(0)
+#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7          BIT(1)
+#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7          BIT(2)
+#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7         BIT(3)
+#define SIRFSOC_DMA_INT_INV_INT_ATLAS7          BIT(4)
+#define SIRFSOC_DMA_INT_END_INT_ATLAS7          BIT(5)
+#define SIRFSOC_DMA_INT_ALL_ATLAS7              0x3F
 
 /* xlen and dma_width register is in 4 bytes boundary */
 #define SIRFSOC_DMA_WORD_LEN			4
+#define SIRFSOC_DMA_XLEN_MAX_V1         0x800
+#define SIRFSOC_DMA_XLEN_MAX_V2         0x1000
 
 struct sirfsoc_dma_desc {
 	struct dma_async_tx_descriptor	desc;
@@ -56,7 +90,9 @@
 	int             width;          /* DMA width */
 	int             dir;
 	bool            cyclic;         /* is loop DMA? */
+	bool            chain;          /* is chain DMA? */
 	u32             addr;		/* DMA buffer address */
+	u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */
 };
 
 struct sirfsoc_dma_chan {
@@ -87,10 +123,25 @@
 	void __iomem			*base;
 	int				irq;
 	struct clk			*clk;
-	bool				is_marco;
+	int				type;
+	void (*exec_desc)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
 	struct sirfsoc_dma_regs		regs_save;
 };
 
+struct sirfsoc_dmadata {
+	void (*exec)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
+	int type;
+};
+
+enum sirfsoc_dma_chain_flag {
+	SIRFSOC_DMA_CHAIN_NORMAL = 0x01,
+	SIRFSOC_DMA_CHAIN_PAUSE = 0x02,
+	SIRFSOC_DMA_CHAIN_LOOP = 0x03,
+	SIRFSOC_DMA_CHAIN_END = 0x04
+};
+
 #define DRV_NAME	"sirfsoc_dma"
 
 static int sirfsoc_dma_runtime_suspend(struct device *dev);
@@ -109,48 +160,105 @@
 	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
 }
 
+static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	if (sdesc->chain) {
+		/* DMA v2 HW chain mode */
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3,
+			       base + SIRFSOC_DMA_CH_CTRL);
+	} else {
+		/* DMA v2 legacy mode */
+		writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7);
+		writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)),
+				base + SIRFSOC_DMA_MUL_ATLAS7);
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       0x3, base + SIRFSOC_DMA_CH_CTRL);
+	}
+	writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 :
+		       (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 |
+			SIRFSOC_DMA_INT_LOOP_INT_ATLAS7),
+		       base + SIRFSOC_DMA_INT_EN_ATLAS7);
+	writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic)
+		writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+}
+
+static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN);
+	writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET);
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+	}
+
+}
+
+static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	}
+
+}
+
 /* Execute all queued DMA descriptors */
 static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
 {
 	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
 	int cid = schan->chan.chan_id;
 	struct sirfsoc_dma_desc *sdesc = NULL;
+	void __iomem *base;
 
 	/*
 	 * lock has been held by functions calling this, so we don't hold
 	 * lock again
 	 */
-
+	base = sdma->base;
 	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
-		node);
+				 node);
 	/* Move the first queued descriptor to active list */
 	list_move_tail(&sdesc->node, &schan->active);
 
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
 	/* Start the DMA transfer */
-	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
-		cid * 4);
-	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
-		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
-		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
-	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_XLEN);
-	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_YLEN);
-	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
-		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	sdma->exec_desc(sdesc, cid, schan->mode, base);
 
-	/*
-	 * writel has an implict memory write barrier to make sure data is
-	 * flushed into memory before starting DMA
-	 */
-	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
-
-	if (sdesc->cyclic) {
-		writel((1 << cid) | 1 << (cid + 16) |
-			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	if (sdesc->cyclic)
 		schan->happened_cyclic = schan->completed_cyclic = 0;
-	}
 }
 
 /* Interrupt handler */
@@ -160,27 +268,65 @@
 	struct sirfsoc_dma_chan *schan;
 	struct sirfsoc_dma_desc *sdesc = NULL;
 	u32 is;
+	bool chain;
 	int ch;
+	void __iomem *reg;
 
-	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
-	while ((ch = fls(is) - 1) >= 0) {
-		is &= ~(1 << ch);
-		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
-		schan = &sdma->channels[ch];
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A6:
+	case SIRFSOC_DMA_VER_A7V1:
+		is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+		reg = sdma->base + SIRFSOC_DMA_CH_INT;
+		while ((ch = fls(is) - 1) >= 0) {
+			is &= ~(1 << ch);
+			writel_relaxed(1 << ch, reg);
+			schan = &sdma->channels[ch];
+			spin_lock(&schan->lock);
+			sdesc = list_first_entry(&schan->active,
+						 struct sirfsoc_dma_desc, node);
+			if (!sdesc->cyclic) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			} else
+				schan->happened_cyclic++;
+			spin_unlock(&schan->lock);
+		}
+		break;
 
+	case SIRFSOC_DMA_VER_A7V2:
+		is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7);
+
+		reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7;
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg);
+		schan = &sdma->channels[0];
 		spin_lock(&schan->lock);
-
-		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
+		sdesc = list_first_entry(&schan->active,
+					 struct sirfsoc_dma_desc, node);
 		if (!sdesc->cyclic) {
-			/* Execute queued descriptors */
-			list_splice_tail_init(&schan->active, &schan->completed);
-			if (!list_empty(&schan->queued))
-				sirfsoc_dma_execute(schan);
-		} else
+			chain = sdesc->chain;
+			if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) ||
+				(!chain &&
+				(is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			}
+		} else if (sdesc->cyclic && (is &
+					SIRFSOC_DMA_INT_LOOP_INT_ATLAS7))
 			schan->happened_cyclic++;
 
 		spin_unlock(&schan->lock);
+		break;
+
+	default:
+		break;
 	}
 
 	/* Schedule tasklet */
@@ -227,16 +373,15 @@
 			schan->chan.completed_cookie = last_cookie;
 			spin_unlock_irqrestore(&schan->lock, flags);
 		} else {
-			/* for cyclic channel, desc is always in active list */
-			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-				node);
-
-			if (!sdesc || (sdesc && !sdesc->cyclic)) {
-				/* without active cyclic DMA */
+			if (list_empty(&schan->active)) {
 				spin_unlock_irqrestore(&schan->lock, flags);
 				continue;
 			}
 
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active,
+				struct sirfsoc_dma_desc, node);
+
 			/* cyclic DMA */
 			happened_cyclic = schan->happened_cyclic;
 			spin_unlock_irqrestore(&schan->lock, flags);
@@ -307,20 +452,32 @@
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco) {
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
-			~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	} else {
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR);
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+			       ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	default:
+		break;
 	}
 
-	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
-
 	list_splice_tail_init(&schan->active, &schan->free);
 	list_splice_tail_init(&schan->queued, &schan->free);
 
@@ -338,13 +495,25 @@
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -359,14 +528,25 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&schan->lock, flags);
-
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			| ((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0x10001,
+			       sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) |
+			       ((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -473,14 +653,31 @@
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
-	dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
-		(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	if (list_empty(&schan->active)) {
+		ret = dma_cookie_status(chan, cookie, txstate);
+		dma_set_residue(txstate, 0);
+		spin_unlock_irqrestore(&schan->lock, flags);
+		return ret;
+	}
+	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node);
+	if (sdesc->cyclic)
+		dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
+			(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	else
+		dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR)
-		<< 2;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR);
+	} else {
+		dma_pos = readl_relaxed(
+			sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2;
+	}
+
 	residue = dma_request_bytes - (dma_pos - sdesc->addr);
 	dma_set_residue(txstate, residue);
 
@@ -647,6 +844,7 @@
 	struct dma_device *dma;
 	struct sirfsoc_dma *sdma;
 	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dmadata *data;
 	struct resource res;
 	ulong regs_start, regs_size;
 	u32 id;
@@ -657,9 +855,11 @@
 		dev_err(dev, "Memory exhausted!\n");
 		return -ENOMEM;
 	}
-
-	if (of_device_is_compatible(dn, "sirf,marco-dmac"))
-		sdma->is_marco = true;
+	data = (struct sirfsoc_dmadata *)
+		(of_match_device(op->dev.driver->of_match_table,
+				 &op->dev)->data);
+	sdma->exec_desc = data->exec;
+	sdma->type = data->type;
 
 	if (of_property_read_u32(dn, "cell-index", &id)) {
 		dev_err(dev, "Fail to get DMAC index\n");
@@ -816,6 +1016,8 @@
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
 
 	/*
 	 * if we were runtime-suspended before, resume to enable clock
@@ -827,11 +1029,19 @@
 			return ret;
 	}
 
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+	}
+
 	/*
 	 * DMA controller will lose all registers while suspending
 	 * so we need to save registers for active channels
 	 */
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -841,7 +1051,7 @@
 		save->ctrl[ch] = readl_relaxed(sdma->base +
 			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
 	}
-	save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN);
+	save->interrupt_en = readl_relaxed(sdma->base + int_offset);
 
 	/* Disable clock */
 	sirfsoc_dma_runtime_suspend(dev);
@@ -857,14 +1067,27 @@
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
+	u32 width_offset;
 
 	/* Enable clock before accessing register */
 	ret = sirfsoc_dma_runtime_resume(dev);
 	if (ret < 0)
 		return ret;
 
-	writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN);
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+		width_offset = SIRFSOC_DMA_WIDTH_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+		width_offset = SIRFSOC_DMA_WIDTH_0;
+	}
+
+	writel_relaxed(save->interrupt_en, sdma->base + int_offset);
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -872,15 +1095,21 @@
 			struct sirfsoc_dma_desc,
 			node);
 		writel_relaxed(sdesc->width,
-			sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4);
+			sdma->base + width_offset + ch * 4);
 		writel_relaxed(sdesc->xlen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
 		writel_relaxed(sdesc->ylen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
 		writel_relaxed(save->ctrl[ch],
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
-		writel_relaxed(sdesc->addr >> 2,
-			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+		if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+			writel_relaxed(sdesc->addr,
+				sdma->base + SIRFSOC_DMA_CH_ADDR);
+		} else {
+			writel_relaxed(sdesc->addr >> 2,
+				sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+		}
 	}
 
 	/* if we were runtime-suspended before, suspend again */
@@ -896,9 +1125,25 @@
 	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
 };
 
+struct sirfsoc_dmadata sirfsoc_dmadata_a6 = {
+	.exec = sirfsoc_dma_execute_hw_a6,
+	.type = SIRFSOC_DMA_VER_A6,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = {
+	.exec = sirfsoc_dma_execute_hw_a7v1,
+	.type = SIRFSOC_DMA_VER_A7V1,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = {
+	.exec = sirfsoc_dma_execute_hw_a7v2,
+	.type = SIRFSOC_DMA_VER_A7V2,
+};
+
 static const struct of_device_id sirfsoc_dma_match[] = {
-	{ .compatible = "sirf,prima2-dmac", },
-	{ .compatible = "sirf,marco-dmac", },
+	{ .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,},
+	{ .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,},
+	{ .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,},
 	{},
 };
 
@@ -925,7 +1170,7 @@
 subsys_initcall(sirfsoc_dma_init);
 module_exit(sirfsoc_dma_exit);
 
-MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
-	"Barry Song <baohua.song@csr.com>");
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
 MODULE_DESCRIPTION("SIRFSOC DMA control driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index 11e5365..842ff97 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -891,9 +891,21 @@
 	.nr_max_vchans   = 37,
 };
 
+/*
+ * The H3 has 12 physical channels, a maximum DRQ port id of 27,
+ * and a total of 34 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_h3_dma_cfg = {
+	.nr_max_channels = 12,
+	.nr_max_requests = 27,
+	.nr_max_vchans   = 34,
+};
+
 static const struct of_device_id sun6i_dma_match[] = {
 	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
 	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
new file mode 100644
index 0000000..24f5ca2
--- /dev/null
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -0,0 +1,188 @@
+/*
+ *  Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#define TI_XBAR_OUTPUTS	127
+#define TI_XBAR_INPUTS	256
+
+static DEFINE_IDR(map_idr);
+
+struct ti_dma_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+
+	u16 safe_val; /* Value to rest the crossbar lines */
+	u32 xbar_requests; /* number of DMA requests connected to XBAR */
+	u32 dma_requests; /* number of DMA requests forwarded to DMA */
+};
+
+struct ti_dma_xbar_map {
+	u16 xbar_in;
+	int xbar_out;
+};
+
+static inline void ti_dma_xbar_write(void __iomem *iomem, int xbar, u16 val)
+{
+	writew_relaxed(val, iomem + (xbar * 2));
+}
+
+static void ti_dma_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_dma_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_dma_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dma_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
+	idr_remove(&map_idr, map->xbar_out);
+	kfree(map);
+}
+
+static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_dma_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_dma_xbar_map *map;
+
+	if (dma_spec->args[0] >= xbar->xbar_requests) {
+		dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	map->xbar_out = idr_alloc(&map_idr, NULL, 0, xbar->dma_requests,
+				  GFP_KERNEL);
+	map->xbar_in = (u16)dma_spec->args[0];
+
+	/* The DMA request is 1 based in sDMA */
+	dma_spec->args[0] = map->xbar_out + 1;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dma_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in);
+
+	return map;
+}
+
+static int ti_dma_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *dma_node;
+	struct ti_dma_xbar_data *xbar;
+	struct resource *res;
+	u32 safe_val;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_XBAR_OUTPUTS);
+		xbar->dma_requests = TI_XBAR_OUTPUTS;
+	}
+	of_node_put(dma_node);
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_XBAR_INPUTS);
+		xbar->xbar_requests = TI_XBAR_INPUTS;
+	}
+
+	if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
+		xbar->safe_val = (u16)safe_val;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (!iomem)
+		return -ENOMEM;
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_dma_xbar_free;
+
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++)
+		ti_dma_xbar_write(xbar->iomem, i, xbar->safe_val);
+
+	ret = of_dma_router_register(node, ti_dma_xbar_route_allocate,
+				     &xbar->dmarouter);
+	if (ret) {
+		/* Restore the defaults for the crossbar */
+		for (i = 0; i < xbar->dma_requests; i++)
+			ti_dma_xbar_write(xbar->iomem, i, i);
+	}
+
+	return ret;
+}
+
+static const struct of_device_id ti_dma_xbar_match[] = {
+	{ .compatible = "ti,dra7-dma-crossbar" },
+	{},
+};
+
+static struct platform_driver ti_dma_xbar_driver = {
+	.driver = {
+		.name = "ti-dma-crossbar",
+		.of_match_table = of_match_ptr(ti_dma_xbar_match),
+	},
+	.probe	= ti_dma_xbar_probe,
+};
+
+int omap_dmaxbar_init(void)
+{
+	return platform_driver_register(&ti_dma_xbar_driver);
+}
+arch_initcall(omap_dmaxbar_init);
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 6f80432..7d2c17d 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -29,7 +29,7 @@
 	spin_lock_irqsave(&vc->lock, flags);
 	cookie = dma_cookie_assign(tx);
 
-	list_add_tail(&vd->node, &vc->desc_submitted);
+	list_move_tail(&vd->node, &vc->desc_submitted);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
@@ -83,8 +83,10 @@
 		cb_data = vd->tx.callback_param;
 
 		list_del(&vd->node);
-
-		vc->desc_free(vd);
+		if (async_tx_test_ack(&vd->tx))
+			list_add(&vd->node, &vc->desc_allocated);
+		else
+			vc->desc_free(vd);
 
 		if (cb)
 			cb(cb_data);
@@ -96,9 +98,13 @@
 	while (!list_empty(head)) {
 		struct virt_dma_desc *vd = list_first_entry(head,
 			struct virt_dma_desc, node);
-		list_del(&vd->node);
-		dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
-		vc->desc_free(vd);
+		if (async_tx_test_ack(&vd->tx)) {
+			list_move_tail(&vd->node, &vc->desc_allocated);
+		} else {
+			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+			list_del(&vd->node);
+			vc->desc_free(vd);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -108,6 +114,7 @@
 	dma_cookie_init(&vc->chan);
 
 	spin_lock_init(&vc->lock);
+	INIT_LIST_HEAD(&vc->desc_allocated);
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 181b952..189e75d 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -29,6 +29,7 @@
 	spinlock_t lock;
 
 	/* protected by vc.lock */
+	struct list_head desc_allocated;
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
@@ -55,11 +56,16 @@
 	struct virt_dma_desc *vd, unsigned long tx_flags)
 {
 	extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+	unsigned long flags;
 
 	dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
 	vd->tx.flags = tx_flags;
 	vd->tx.tx_submit = vchan_tx_submit;
 
+	spin_lock_irqsave(&vc->lock, flags);
+	list_add_tail(&vd->node, &vc->desc_allocated);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
 	return &vd->tx;
 }
 
@@ -122,7 +128,8 @@
 }
 
 /**
- * vchan_get_all_descriptors - obtain all submitted and issued descriptors
+ * vchan_get_all_descriptors - obtain all allocated, submitted and issued
+ *                             descriptors
  * vc: virtual channel to get descriptors from
  * head: list of descriptors found
  *
@@ -134,6 +141,7 @@
 static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	struct list_head *head)
 {
+	list_splice_tail_init(&vc->desc_allocated, head);
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
@@ -141,11 +149,14 @@
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
 {
+	struct virt_dma_desc *vd;
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&vc->lock, flags);
 	vchan_get_all_descriptors(vc, &head);
+	list_for_each_entry(vd, &head, node)
+		async_tx_clear_ack(&vd->tx);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	vchan_dma_desc_free_list(vc, &head);
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index f52e3750..620fd55ec 100755
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -124,32 +124,8 @@
 #define XGENE_DMA_DESC_ELERR_POS		46
 #define XGENE_DMA_DESC_RTYPE_POS		56
 #define XGENE_DMA_DESC_LERR_POS			60
-#define XGENE_DMA_DESC_FLYBY_POS		4
 #define XGENE_DMA_DESC_BUFLEN_POS		48
 #define XGENE_DMA_DESC_HOENQ_NUM_POS		48
-
-#define XGENE_DMA_DESC_NV_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT)
-#define XGENE_DMA_DESC_IN_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT)
-#define XGENE_DMA_DESC_RTYPE_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS))
-#define XGENE_DMA_DESC_BUFADDR_SET(m, v)	\
-	(((u64 *)(m))[0] |= (v))
-#define XGENE_DMA_DESC_BUFLEN_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS))
-#define XGENE_DMA_DESC_C_SET(m)			\
-	(((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT)
-#define XGENE_DMA_DESC_FLYBY_SET(m, v)		\
-	(((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS))
-#define XGENE_DMA_DESC_MULTI_SET(m, v, i)	\
-	(((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8)))
-#define XGENE_DMA_DESC_DR_SET(m)		\
-	(((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT)
-#define XGENE_DMA_DESC_DST_ADDR_SET(m, v)	\
-	(((u64 *)(m))[3] |= (v))
-#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v)	\
-	(((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS))
 #define XGENE_DMA_DESC_ELERR_RD(m)		\
 	(((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
 #define XGENE_DMA_DESC_LERR_RD(m)		\
@@ -158,14 +134,7 @@
 	(((elerr) << 4) | (lerr))
 
 /* X-Gene DMA descriptor empty s/w signature */
-#define XGENE_DMA_DESC_EMPTY_INDEX		0
 #define XGENE_DMA_DESC_EMPTY_SIGNATURE		~0ULL
-#define XGENE_DMA_DESC_SET_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] =	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
-#define XGENE_DMA_DESC_IS_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] ==	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
 
 /* X-Gene DMA configurable parameters defines */
 #define XGENE_DMA_RING_NUM		512
@@ -184,7 +153,7 @@
 #define XGENE_DMA_XOR_ALIGNMENT		6	/* 64 Bytes */
 #define XGENE_DMA_MAX_XOR_SRC		5
 #define XGENE_DMA_16K_BUFFER_LEN_CODE	0x0
-#define XGENE_DMA_INVALID_LEN_CODE	0x7800
+#define XGENE_DMA_INVALID_LEN_CODE	0x7800000000000000ULL
 
 /* X-Gene DMA descriptor error codes */
 #define ERR_DESC_AXI			0x01
@@ -214,10 +183,10 @@
 #define ERR_DESC_SRC_INT		0xB
 
 /* X-Gene DMA flyby operation code */
-#define FLYBY_2SRC_XOR			0x8
-#define FLYBY_3SRC_XOR			0x9
-#define FLYBY_4SRC_XOR			0xA
-#define FLYBY_5SRC_XOR			0xB
+#define FLYBY_2SRC_XOR			0x80
+#define FLYBY_3SRC_XOR			0x90
+#define FLYBY_4SRC_XOR			0xA0
+#define FLYBY_5SRC_XOR			0xB0
 
 /* X-Gene DMA SW descriptor flags */
 #define XGENE_DMA_FLAG_64B_DESC		BIT(0)
@@ -238,10 +207,10 @@
 	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
 
 struct xgene_dma_desc_hw {
-	u64 m0;
-	u64 m1;
-	u64 m2;
-	u64 m3;
+	__le64 m0;
+	__le64 m1;
+	__le64 m2;
+	__le64 m3;
 };
 
 enum xgene_dma_ring_cfgsize {
@@ -388,18 +357,11 @@
 	return !(val & XGENE_DMA_PQ_DISABLE_MASK);
 }
 
-static void xgene_dma_cpu_to_le64(u64 *desc, int count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		desc[i] = cpu_to_le64(desc[i]);
-}
-
-static u16 xgene_dma_encode_len(u32 len)
+static u64 xgene_dma_encode_len(size_t len)
 {
 	return (len < XGENE_DMA_MAX_BYTE_CNT) ?
-		len : XGENE_DMA_16K_BUFFER_LEN_CODE;
+		((u64)len << XGENE_DMA_DESC_BUFLEN_POS) :
+		XGENE_DMA_16K_BUFFER_LEN_CODE;
 }
 
 static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
@@ -424,34 +386,50 @@
 	return XGENE_DMA_RING_DESC_CNT(ring_state);
 }
 
-static void xgene_dma_set_src_buffer(void *ext8, size_t *len,
+static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len,
 				     dma_addr_t *paddr)
 {
 	size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
 			*len : XGENE_DMA_MAX_BYTE_CNT;
 
-	XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr);
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes));
+	*ext8 |= cpu_to_le64(*paddr);
+	*ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes));
 	*len -= nbytes;
 	*paddr += nbytes;
 }
 
-static void xgene_dma_invalidate_buffer(void *ext8)
+static void xgene_dma_invalidate_buffer(__le64 *ext8)
 {
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE);
+	*ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE);
 }
 
-static void *xgene_dma_lookup_ext8(u64 *desc, int idx)
+static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx)
 {
-	return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1);
+	switch (idx) {
+	case 0:
+		return &desc->m1;
+	case 1:
+		return &desc->m0;
+	case 2:
+		return &desc->m3;
+	case 3:
+		return &desc->m2;
+	default:
+		pr_err("Invalid dma descriptor index\n");
+	}
+
+	return NULL;
 }
 
-static void xgene_dma_init_desc(void *desc, u16 dst_ring_num)
+static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc,
+				u16 dst_ring_num)
 {
-	XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */
-	XGENE_DMA_DESC_IN_SET(desc);
-	XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num);
-	XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA);
+	desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+	desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+				XGENE_DMA_DESC_RTYPE_POS);
+	desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+	desc->m3 |= cpu_to_le64((u64)dst_ring_num <<
+				XGENE_DMA_DESC_HOENQ_NUM_POS);
 }
 
 static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
@@ -459,7 +437,7 @@
 				    dma_addr_t dst, dma_addr_t src,
 				    size_t len)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	int i;
 
 	/* Get 1st descriptor */
@@ -467,23 +445,21 @@
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(dst);
 
 	/* Set 1st source address */
-	xgene_dma_set_src_buffer(desc1 + 8, &len, &src);
+	xgene_dma_set_src_buffer(&desc1->m1, &len, &src);
 
-	if (len <= 0) {
-		desc2 = NULL;
-		goto skip_additional_src;
-	}
+	if (!len)
+		return;
 
 	/*
 	 * We need to split this source buffer,
 	 * and need to use 2nd descriptor
 	 */
 	desc2 = &desc_sw->desc2;
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set 2nd to 5th source address */
 	for (i = 0; i < 4 && len; i++)
@@ -496,12 +472,6 @@
 
 	/* Updated flag that we have prepared 64B descriptor */
 	desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
-
-skip_additional_src:
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	if (desc2)
-		xgene_dma_cpu_to_le64(desc2, 4);
 }
 
 static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
@@ -510,7 +480,7 @@
 				    u32 src_cnt, size_t *nbytes,
 				    const u8 *scf)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	size_t len = *nbytes;
 	int i;
 
@@ -521,28 +491,24 @@
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(*dst);
 
 	/* We have multiple source addresses, so need to set NV bit*/
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set flyby opcode */
-	XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt));
+	desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt));
 
 	/* Set 1st to 5th source addresses */
 	for (i = 0; i < src_cnt; i++) {
 		len = *nbytes;
-		xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) :
+		xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 :
 					 xgene_dma_lookup_ext8(desc2, i - 1),
 					 &len, &src[i]);
-		XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i);
+		desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8)));
 	}
 
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	xgene_dma_cpu_to_le64(desc2, 4);
-
 	/* Update meta data */
 	*nbytes = len;
 	*dst += XGENE_DMA_MAX_BYTE_CNT;
@@ -738,7 +704,7 @@
  * xgene_chan_xfer_ld_pending - push any pending transactions to hw
  * @chan : X-Gene DMA channel
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
 {
@@ -808,7 +774,8 @@
 		desc_hw = &ring->desc_hw[ring->head];
 
 		/* Check if this descriptor has been completed */
-		if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw)))
+		if (unlikely(le64_to_cpu(desc_hw->m0) ==
+			     XGENE_DMA_DESC_EMPTY_SIGNATURE))
 			break;
 
 		if (++ring->head == ring->slots)
@@ -842,7 +809,7 @@
 		iowrite32(-1, ring->cmd);
 
 		/* Mark this hw descriptor as processed */
-		XGENE_DMA_DESC_SET_EMPTY(desc_hw);
+		desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 
 		xgene_dma_run_tx_complete_actions(chan, desc_sw);
 
@@ -889,7 +856,7 @@
  * @chan: X-Gene DMA channel
  * @list: the list to free
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
 				     struct list_head *list)
@@ -900,15 +867,6 @@
 		xgene_dma_clean_descriptor(chan, desc);
 }
 
-static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan,
-					struct list_head *list)
-{
-	struct xgene_dma_desc_sw *desc, *_desc;
-
-	list_for_each_entry_safe(desc, _desc, list, node)
-		xgene_dma_clean_descriptor(chan, desc);
-}
-
 static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct xgene_dma_chan *chan = to_dma_chan(dchan);
@@ -985,7 +943,7 @@
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1093,7 +1051,7 @@
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1141,7 +1099,7 @@
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1218,7 +1176,7 @@
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1316,7 +1274,6 @@
 {
 	void *ring_cfg = ring->state;
 	u64 addr = ring->desc_paddr;
-	void *desc;
 	u32 i, val;
 
 	ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
@@ -1358,8 +1315,10 @@
 
 	/* Set empty signature to DMA Rx ring descriptors */
 	for (i = 0; i < ring->slots; i++) {
+		struct xgene_dma_desc_hw *desc;
+
 		desc = &ring->desc_hw[i];
-		XGENE_DMA_DESC_SET_EMPTY(desc);
+		desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 	}
 
 	/* Enable DMA Rx ring interrupt */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ad41975..e2f5eb4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -65,6 +65,7 @@
 	DMA_PQ,
 	DMA_XOR_VAL,
 	DMA_PQ_VAL,
+	DMA_MEMSET,
 	DMA_INTERRUPT,
 	DMA_SG,
 	DMA_PRIVATE,
@@ -122,10 +123,18 @@
  *	 chunk and before first src/dst address for next chunk.
  *	 Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
  *	 Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ * @dst_icg: Number of bytes to jump after last dst address of this
+ *	 chunk and before the first dst address for next chunk.
+ *	 Ignored if dst_inc is true and dst_sgl is false.
+ * @src_icg: Number of bytes to jump after last src address of this
+ *	 chunk and before the first src address for next chunk.
+ *	 Ignored if src_inc is true and src_sgl is false.
  */
 struct data_chunk {
 	size_t size;
 	size_t icg;
+	size_t dst_icg;
+	size_t src_icg;
 };
 
 /**
@@ -222,6 +231,16 @@
 };
 
 /**
+ * struct dma_router - DMA router structure
+ * @dev: pointer to the DMA router device
+ * @route_free: function to be called when the route can be disconnected
+ */
+struct dma_router {
+	struct device *dev;
+	void (*route_free)(struct device *dev, void *route_data);
+};
+
+/**
  * struct dma_chan - devices supply DMA channels, clients use them
  * @device: ptr to the dma device who supplies this channel, always !%NULL
  * @cookie: last cookie value returned to client
@@ -232,6 +251,8 @@
  * @local: per-cpu pointer to a struct dma_chan_percpu
  * @client_count: how many clients are using this channel
  * @table_count: number of appearances in the mem-to-mem allocation table
+ * @router: pointer to the DMA router structure
+ * @route_data: channel specific data for the router
  * @private: private data for certain client-channel associations
  */
 struct dma_chan {
@@ -247,6 +268,11 @@
 	struct dma_chan_percpu __percpu *local;
 	int client_count;
 	int table_count;
+
+	/* DMA router */
+	struct dma_router *router;
+	void *route_data;
+
 	void *private;
 };
 
@@ -570,6 +596,7 @@
  * @copy_align: alignment shift for memcpy operations
  * @xor_align: alignment shift for xor operations
  * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @src_addr_widths: bit mask of src addr widths the device supports
@@ -588,6 +615,7 @@
  * @device_prep_dma_xor_val: prepares a xor validation operation
  * @device_prep_dma_pq: prepares a pq operation
  * @device_prep_dma_pq_val: prepares a pqzero_sum operation
+ * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
@@ -620,6 +648,7 @@
 	u8 copy_align;
 	u8 xor_align;
 	u8 pq_align;
+	u8 fill_align;
 	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
@@ -650,6 +679,9 @@
 		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 		unsigned int src_cnt, const unsigned char *scf, size_t len,
 		enum sum_check_flags *pqres, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
+		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_sg)(
@@ -745,6 +777,17 @@
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
+		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
+		unsigned long flags)
+{
+	if (!chan || !chan->device)
+		return NULL;
+
+	return chan->device->device_prep_dma_memset(chan, dest, value,
+						    len, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
 		struct dma_chan *chan,
 		struct scatterlist *dst_sg, unsigned int dst_nents,
@@ -820,6 +863,12 @@
 	return dmaengine_check_align(dev->pq_align, off1, off2, len);
 }
 
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
@@ -874,6 +923,33 @@
 	BUG();
 }
 
+static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg,
+				      size_t dir_icg)
+{
+	if (inc) {
+		if (dir_icg)
+			return dir_icg;
+		else if (sgl)
+			return icg;
+	}
+
+	return 0;
+}
+
+static inline size_t dmaengine_get_dst_icg(struct dma_interleaved_template *xt,
+					   struct data_chunk *chunk)
+{
+	return dmaengine_get_icg(xt->dst_inc, xt->dst_sgl,
+				 chunk->icg, chunk->dst_icg);
+}
+
+static inline size_t dmaengine_get_src_icg(struct dma_interleaved_template *xt,
+					   struct data_chunk *chunk)
+{
+	return dmaengine_get_icg(xt->src_inc, xt->src_sgl,
+				 chunk->icg, chunk->src_icg);
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 56bc026..98ba752 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -23,6 +23,9 @@
 	struct device_node	*of_node;
 	struct dma_chan		*(*of_dma_xlate)
 				(struct of_phandle_args *, struct of_dma *);
+	void			*(*of_dma_route_allocate)
+				(struct of_phandle_args *, struct of_dma *);
+	struct dma_router	*dma_router;
 	void			*of_dma_data;
 };
 
@@ -37,12 +40,20 @@
 		(struct of_phandle_args *, struct of_dma *),
 		void *data);
 extern void of_dma_controller_free(struct device_node *np);
+
+extern int of_dma_router_register(struct device_node *np,
+		void *(*of_dma_route_allocate)
+		(struct of_phandle_args *, struct of_dma *),
+		struct dma_router *dma_router);
+#define of_dma_router_free of_dma_controller_free
+
 extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
 extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -56,6 +67,16 @@
 {
 }
 
+static inline int of_dma_router_register(struct device_node *np,
+		void *(*of_dma_route_allocate)
+		(struct of_phandle_args *, struct of_dma *),
+		struct dma_router *dma_router)
+{
+	return -ENODEV;
+}
+
+#define of_dma_router_free of_dma_controller_free
+
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name)
 {
diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h
deleted file mode 100644
index 471fffe..0000000
--- a/include/linux/platform_data/dma-rcar-audmapp.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * This is for Renesas R-Car Audio-DMAC-peri-peri.
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- * Copyright (C) 2014 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This file is based on the include/linux/sh_dma.h
- *
- * Header for the new SH dmaengine driver
- *
- * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef SH_AUDMAPP_H
-#define SH_AUDMAPP_H
-
-#include <linux/dmaengine.h>
-
-struct audmapp_slave_config {
-	int		slave_id;
-	dma_addr_t	src;
-	dma_addr_t	dst;
-	u32		chcr;
-};
-
-struct audmapp_pdata {
-	struct audmapp_slave_config *slave;
-	int slave_num;
-};
-
-#endif /* SH_AUDMAPP_H */