diff --git a/arch/sh/include/asm/siu.h b/arch/sh/include/asm/siu.h
index 1d95c78808d13cfaab77145d91921f76517aa205..580b7ac228b72029073fc7736e7982ad064d6830 100644
--- a/arch/sh/include/asm/siu.h
+++ b/arch/sh/include/asm/siu.h
@@ -14,7 +14,6 @@
 struct device;
 
 struct siu_platform {
-	struct device *dma_dev;
 	unsigned int dma_slave_tx_a;
 	unsigned int dma_slave_rx_a;
 	unsigned int dma_slave_tx_b;
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 0f5a21907da627ad31566dc43d906699c8f8e7d2..65786c7f5dedf1e449369dbeef90724bfd42ff5c 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -512,7 +512,6 @@ static struct platform_device tmu2_device = {
 };
 
 static struct siu_platform siu_platform_data = {
-	.dma_dev	= &dma_device.dev,
 	.dma_slave_tx_a	= SHDMA_SLAVE_SIUA_TX,
 	.dma_slave_rx_a	= SHDMA_SLAVE_SIUA_RX,
 	.dma_slave_tx_b	= SHDMA_SLAVE_SIUB_TX,
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index aadeb5be9dba25249d0215e3881b484fcfb813ac..d45cf1bcbde5f6b096d5cf38bf13b9c9ddf23a86 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -148,6 +148,20 @@ config TXX9_DMAC
 	  Support the TXx9 SoC internal DMA controller.  This can be
 	  integrated in chips such as the Toshiba TX4927/38/39.
 
+config TEGRA20_APB_DMA
+	bool "NVIDIA Tegra20 APB DMA support"
+	depends on ARCH_TEGRA
+	select DMA_ENGINE
+	help
+	  Support for the NVIDIA Tegra20 APB DMA controller driver. The
+	  DMA controller is having multiple DMA channel which can be
+	  configured for different peripherals like audio, UART, SPI,
+	  I2C etc which is in APB bus.
+	  This DMA controller transfers data from memory to peripheral fifo
+	  or vice versa. It does not support memory to memory data transfer.
+
+
+
 config SH_DMAE
 	tristate "Renesas SuperH DMAC support"
 	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
@@ -237,7 +251,7 @@ config IMX_DMA
 
 config MXS_DMA
 	bool "MXS DMA support"
-	depends on SOC_IMX23 || SOC_IMX28
+	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q
 	select STMP_DEVICE
 	select DMA_ENGINE
 	help
@@ -260,6 +274,16 @@ config DMA_SA11X0
 	  SA-1110 SoCs.  This DMA engine can only be used with on-chip
 	  devices.
 
+config MMP_TDMA
+	bool "MMP Two-Channel DMA support"
+	depends on ARCH_MMP
+	select DMA_ENGINE
+	help
+	  Support the MMP Two-Channel DMA engine.
+	  This engine used for MMP Audio DMA and pxa910 SQU.
+
+	  Say Y here if you enabled MMP ADMA, otherwise say N.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 86b795baba981907ed42f71ede7cd7263f0c5df8..640356add0a31f0bea96cc0568a2f290e41b0494 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
-obj-$(CONFIG_SH_DMAE) += shdma.o
+obj-$(CONFIG_SH_DMAE) += sh/
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
@@ -23,8 +23,10 @@ obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
+obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
 obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
+obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 7292aa87b2dd3fb54a4797f7c676d74a6a047713..3934fcc4e00b1b02778bb02866fb86073f9b1f0a 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -9,10 +9,9 @@
  * (at your option) any later version.
  *
  *
- * This supports the Atmel AHB DMA Controller,
- *
- * The driver has currently been tested with the Atmel AT91SAM9RL
- * and AT91SAM9G45 series.
+ * This supports the Atmel AHB DMA Controller found in several Atmel SoCs.
+ * The only Atmel DMA Controller that is not covered by this driver is the one
+ * found on AT91SAM9263.
  */
 
 #include <linux/clk.h>
@@ -1217,7 +1216,7 @@ static const struct platform_device_id atdma_devtypes[] = {
 	}
 };
 
-static inline struct at_dma_platform_data * __init at_dma_get_driver_data(
+static inline const struct at_dma_platform_data * __init at_dma_get_driver_data(
 						struct platform_device *pdev)
 {
 	if (pdev->dev.of_node) {
@@ -1255,7 +1254,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	int			irq;
 	int			err;
 	int			i;
-	struct at_dma_platform_data *plat_dat;
+	const struct at_dma_platform_data *plat_dat;
 
 	/* setup platform data for each SoC */
 	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index e67b4e06a918350137e8119e940618318f2c7867..aa384e53b7aca152c71c82a79a817313b276a79b 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -1438,34 +1438,32 @@ static int __init coh901318_probe(struct platform_device *pdev)
 
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!io)
-		goto err_get_resource;
+		return -ENODEV;
 
 	/* Map DMA controller registers to virtual memory */
-	if (request_mem_region(io->start,
-			       resource_size(io),
-			       pdev->dev.driver->name) == NULL) {
-		err = -EBUSY;
-		goto err_request_mem;
-	}
+	if (devm_request_mem_region(&pdev->dev,
+				    io->start,
+				    resource_size(io),
+				    pdev->dev.driver->name) == NULL)
+		return -ENOMEM;
 
 	pdata = pdev->dev.platform_data;
 	if (!pdata)
-		goto err_no_platformdata;
+		return -ENODEV;
 
-	base = kmalloc(ALIGN(sizeof(struct coh901318_base), 4) +
-		       pdata->max_channels *
-		       sizeof(struct coh901318_chan),
-		       GFP_KERNEL);
+	base = devm_kzalloc(&pdev->dev,
+			    ALIGN(sizeof(struct coh901318_base), 4) +
+			    pdata->max_channels *
+			    sizeof(struct coh901318_chan),
+			    GFP_KERNEL);
 	if (!base)
-		goto err_alloc_coh_dma_channels;
+		return -ENOMEM;
 
 	base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4);
 
-	base->virtbase = ioremap(io->start, resource_size(io));
-	if (!base->virtbase) {
-		err = -ENOMEM;
-		goto err_no_ioremap;
-	}
+	base->virtbase = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+	if (!base->virtbase)
+		return -ENOMEM;
 
 	base->dev = &pdev->dev;
 	base->platform = pdata;
@@ -1474,25 +1472,20 @@ static int __init coh901318_probe(struct platform_device *pdev)
 
 	COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base);
 
-	platform_set_drvdata(pdev, base);
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		goto err_no_irq;
-
-	err = request_irq(irq, dma_irq_handler, IRQF_DISABLED,
-			  "coh901318", base);
-	if (err) {
-		dev_crit(&pdev->dev,
-			 "Cannot allocate IRQ for DMA controller!\n");
-		goto err_request_irq;
-	}
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, IRQF_DISABLED,
+			       "coh901318", base);
+	if (err)
+		return err;
 
 	err = coh901318_pool_create(&base->pool, &pdev->dev,
 				    sizeof(struct coh901318_lli),
 				    32);
 	if (err)
-		goto err_pool_create;
+		return err;
 
 	/* init channels for device transfers */
 	coh901318_base_init(&base->dma_slave,  base->platform->chans_slave,
@@ -1538,6 +1531,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
 	if (err)
 		goto err_register_memcpy;
 
+	platform_set_drvdata(pdev, base);
 	dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n",
 		(u32) base->virtbase);
 
@@ -1547,19 +1541,6 @@ static int __init coh901318_probe(struct platform_device *pdev)
 	dma_async_device_unregister(&base->dma_slave);
  err_register_slave:
 	coh901318_pool_destroy(&base->pool);
- err_pool_create:
-	free_irq(platform_get_irq(pdev, 0), base);
- err_request_irq:
- err_no_irq:
-	iounmap(base->virtbase);
- err_no_ioremap:
-	kfree(base);
- err_alloc_coh_dma_channels:
- err_no_platformdata:
-	release_mem_region(pdev->resource->start,
-			   resource_size(pdev->resource));
- err_request_mem:
- err_get_resource:
 	return err;
 }
 
@@ -1570,11 +1551,6 @@ static int __exit coh901318_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&base->dma_memcpy);
 	dma_async_device_unregister(&base->dma_slave);
 	coh901318_pool_destroy(&base->pool);
-	free_irq(platform_get_irq(pdev, 0), base);
-	iounmap(base->virtbase);
-	kfree(base);
-	release_mem_region(pdev->resource->start,
-			   resource_size(pdev->resource));
 	return 0;
 }
 
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2397f6f451b15ccb274d367c9955cefa27c56013..3491654cdf7b84fdcd2253f71542b3a53dcefdb9 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -45,6 +45,8 @@
  * See Documentation/dmaengine.txt for more details
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -261,7 +263,7 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 	do {
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
-			printk(KERN_ERR "dma_sync_wait_timeout!\n");
+			pr_err("%s: timeout!\n", __func__);
 			return DMA_ERROR;
 		}
 	} while (status == DMA_IN_PROGRESS);
@@ -312,7 +314,7 @@ static int __init dma_channel_table_init(void)
 	}
 
 	if (err) {
-		pr_err("dmaengine: initialization failure\n");
+		pr_err("initialization failure\n");
 		for_each_dma_cap_mask(cap, dma_cap_mask_all)
 			if (channel_table[cap])
 				free_percpu(channel_table[cap]);
@@ -520,12 +522,12 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 			err = dma_chan_get(chan);
 
 			if (err == -ENODEV) {
-				pr_debug("%s: %s module removed\n", __func__,
-					 dma_chan_name(chan));
+				pr_debug("%s: %s module removed\n",
+					 __func__, dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 			} else if (err)
 				pr_debug("%s: failed to get %s: (%d)\n",
-					__func__, dma_chan_name(chan), err);
+					 __func__, dma_chan_name(chan), err);
 			else
 				break;
 			if (--device->privatecnt == 0)
@@ -535,7 +537,9 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 	}
 	mutex_unlock(&dma_list_mutex);
 
-	pr_debug("%s: %s (%s)\n", __func__, chan ? "success" : "fail",
+	pr_debug("%s: %s (%s)\n",
+		 __func__,
+		 chan ? "success" : "fail",
 		 chan ? dma_chan_name(chan) : NULL);
 
 	return chan;
@@ -579,7 +583,7 @@ void dmaengine_get(void)
 				break;
 			} else if (err)
 				pr_err("%s: failed to get %s: (%d)\n",
-					__func__, dma_chan_name(chan), err);
+				       __func__, dma_chan_name(chan), err);
 		}
 	}
 
@@ -1015,7 +1019,7 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 	while (tx->cookie == -EBUSY) {
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
 			pr_err("%s timeout waiting for descriptor submission\n",
-				__func__);
+			       __func__);
 			return DMA_ERROR;
 		}
 		cpu_relax();
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 7212961575770df8c17d00d666bd11a79553a0b2..d3c5a5a88f1e9af91b97848f6d9c107ac2010da5 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -105,13 +105,13 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 	list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+		i++;
 		if (async_tx_test_ack(&desc->txd)) {
 			list_del(&desc->desc_node);
 			ret = desc;
 			break;
 		}
 		dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
-		i++;
 	}
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -191,6 +191,42 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 /*----------------------------------------------------------------------*/
 
+static inline unsigned int dwc_fast_fls(unsigned long long v)
+{
+	/*
+	 * We can be a lot more clever here, but this should take care
+	 * of the most common optimization.
+	 */
+	if (!(v & 7))
+		return 3;
+	else if (!(v & 3))
+		return 2;
+	else if (!(v & 1))
+		return 1;
+	return 0;
+}
+
+static void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+{
+	dev_err(chan2dev(&dwc->chan),
+		"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+		channel_readl(dwc, SAR),
+		channel_readl(dwc, DAR),
+		channel_readl(dwc, LLP),
+		channel_readl(dwc, CTL_HI),
+		channel_readl(dwc, CTL_LO));
+}
+
+
+static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+}
+
+/*----------------------------------------------------------------------*/
+
 /* Called with dwc->lock held and bh disabled */
 static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 {
@@ -200,13 +236,7 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
 		dev_err(chan2dev(&dwc->chan),
 			"BUG: Attempted to start non-idle channel\n");
-		dev_err(chan2dev(&dwc->chan),
-			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
-			channel_readl(dwc, SAR),
-			channel_readl(dwc, DAR),
-			channel_readl(dwc, LLP),
-			channel_readl(dwc, CTL_HI),
-			channel_readl(dwc, CTL_LO));
+		dwc_dump_chan_regs(dwc);
 
 		/* The tasklet will hopefully advance the queue... */
 		return;
@@ -290,9 +320,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 			"BUG: XFER bit set, but channel not idle!\n");
 
 		/* Try to continue after resetting the channel... */
-		channel_clear_bit(dw, CH_EN, dwc->mask);
-		while (dma_readl(dw, CH_EN) & dwc->mask)
-			cpu_relax();
+		dwc_chan_disable(dw, dwc);
 	}
 
 	/*
@@ -337,7 +365,8 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		return;
 	}
 
-	dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
+	dev_vdbg(chan2dev(&dwc->chan), "%s: llp=0x%llx\n", __func__,
+			(unsigned long long)llp);
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
 		/* check first descriptors addr */
@@ -373,9 +402,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		"BUG: All descriptors done, but channel not idle!\n");
 
 	/* Try to continue after resetting the channel... */
-	channel_clear_bit(dw, CH_EN, dwc->mask);
-	while (dma_readl(dw, CH_EN) & dwc->mask)
-		cpu_relax();
+	dwc_chan_disable(dw, dwc);
 
 	if (!list_empty(&dwc->queue)) {
 		list_move(dwc->queue.next, &dwc->active_list);
@@ -384,12 +411,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 
-static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
 {
 	dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
 			"  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
-			lli->sar, lli->dar, lli->llp,
-			lli->ctlhi, lli->ctllo);
+			lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
 }
 
 static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -487,17 +513,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 
 		spin_lock_irqsave(&dwc->lock, flags);
 
-		dev_err(chan2dev(&dwc->chan),
-			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
-			channel_readl(dwc, SAR),
-			channel_readl(dwc, DAR),
-			channel_readl(dwc, LLP),
-			channel_readl(dwc, CTL_HI),
-			channel_readl(dwc, CTL_LO));
+		dwc_dump_chan_regs(dwc);
 
-		channel_clear_bit(dw, CH_EN, dwc->mask);
-		while (dma_readl(dw, CH_EN) & dwc->mask)
-			cpu_relax();
+		dwc_chan_disable(dw, dwc);
 
 		/* make sure DMA does not restart by loading a new list */
 		channel_writel(dwc, LLP, 0);
@@ -527,7 +545,7 @@ static void dw_dma_tasklet(unsigned long data)
 	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
-	dev_vdbg(dw->dma.dev, "tasklet: status_err=%x\n", status_err);
+	dev_vdbg(dw->dma.dev, "%s: status_err=%x\n", __func__, status_err);
 
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
@@ -551,7 +569,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 	struct dw_dma *dw = dev_id;
 	u32 status;
 
-	dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__,
 			dma_readl(dw, STATUS_INT));
 
 	/*
@@ -597,12 +615,12 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	 * for DMA. But this is hard to do in a race-free manner.
 	 */
 	if (list_empty(&dwc->active_list)) {
-		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+		dev_vdbg(chan2dev(tx->chan), "%s: started %u\n", __func__,
 				desc->txd.cookie);
 		list_add_tail(&desc->desc_node, &dwc->active_list);
 		dwc_dostart(dwc, dwc_first_active(dwc));
 	} else {
-		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+		dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__,
 				desc->txd.cookie);
 
 		list_add_tail(&desc->desc_node, &dwc->queue);
@@ -627,26 +645,17 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	unsigned int		dst_width;
 	u32			ctllo;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
-			dest, src, len, flags);
+	dev_vdbg(chan2dev(chan),
+			"%s: d0x%llx s0x%llx l0x%zx f0x%lx\n", __func__,
+			(unsigned long long)dest, (unsigned long long)src,
+			len, flags);
 
 	if (unlikely(!len)) {
-		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
 		return NULL;
 	}
 
-	/*
-	 * We can be a lot more clever here, but this should take care
-	 * of the most common optimization.
-	 */
-	if (!((src | dest  | len) & 7))
-		src_width = dst_width = 3;
-	else if (!((src | dest  | len) & 3))
-		src_width = dst_width = 2;
-	else if (!((src | dest | len) & 1))
-		src_width = dst_width = 1;
-	else
-		src_width = dst_width = 0;
+	src_width = dst_width = dwc_fast_fls(src | dest | len);
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
@@ -720,7 +729,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	struct scatterlist	*sg;
 	size_t			total_len = 0;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
 	if (unlikely(!dws || !sg_len))
 		return NULL;
@@ -746,14 +755,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			if (!((mem | len) & 7))
-				mem_width = 3;
-			else if (!((mem | len) & 3))
-				mem_width = 2;
-			else if (!((mem | len) & 1))
-				mem_width = 1;
-			else
-				mem_width = 0;
+			mem_width = dwc_fast_fls(mem | len);
 
 slave_sg_todev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -813,14 +815,7 @@ slave_sg_todev_fill_desc:
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			if (!((mem | len) & 7))
-				mem_width = 3;
-			else if (!((mem | len) & 3))
-				mem_width = 2;
-			else if (!((mem | len) & 1))
-				mem_width = 1;
-			else
-				mem_width = 0;
+			mem_width = dwc_fast_fls(mem | len);
 
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -950,9 +945,7 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	} else if (cmd == DMA_TERMINATE_ALL) {
 		spin_lock_irqsave(&dwc->lock, flags);
 
-		channel_clear_bit(dw, CH_EN, dwc->mask);
-		while (dma_readl(dw, CH_EN) & dwc->mask)
-			cpu_relax();
+		dwc_chan_disable(dw, dwc);
 
 		dwc->paused = false;
 
@@ -1014,7 +1007,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 	int			i;
 	unsigned long		flags;
 
-	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
 	/* ASSERT:  channel is idle */
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
@@ -1057,8 +1050,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
-	dev_dbg(chan2dev(chan),
-		"alloc_chan_resources allocated %d descriptors\n", i);
+	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
 
 	return i;
 }
@@ -1071,7 +1063,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	unsigned long		flags;
 	LIST_HEAD(list);
 
-	dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+	dev_dbg(chan2dev(chan), "%s: descs allocated=%u\n", __func__,
 			dwc->descs_allocated);
 
 	/* ASSERT:  channel is idle */
@@ -1097,7 +1089,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 		kfree(desc);
 	}
 
-	dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
@@ -1126,13 +1118,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
 		dev_err(chan2dev(&dwc->chan),
 			"BUG: Attempted to start non-idle channel\n");
-		dev_err(chan2dev(&dwc->chan),
-			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
-			channel_readl(dwc, SAR),
-			channel_readl(dwc, DAR),
-			channel_readl(dwc, LLP),
-			channel_readl(dwc, CTL_HI),
-			channel_readl(dwc, CTL_LO));
+		dwc_dump_chan_regs(dwc);
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		return -EBUSY;
 	}
@@ -1167,9 +1153,7 @@ void dw_dma_cyclic_stop(struct dma_chan *chan)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	channel_clear_bit(dw, CH_EN, dwc->mask);
-	while (dma_readl(dw, CH_EN) & dwc->mask)
-		cpu_relax();
+	dwc_chan_disable(dw, dwc);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 }
@@ -1308,9 +1292,9 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	dma_sync_single_for_device(chan2parent(chan), last->txd.phys,
 			sizeof(last->lli), DMA_TO_DEVICE);
 
-	dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%08x len %zu "
-			"period %zu periods %d\n", buf_addr, buf_len,
-			period_len, periods);
+	dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%llx len %zu "
+			"period %zu periods %d\n", (unsigned long long)buf_addr,
+			buf_len, period_len, periods);
 
 	cdesc->periods = periods;
 	dwc->cdesc = cdesc;
@@ -1340,16 +1324,14 @@ void dw_dma_cyclic_free(struct dma_chan *chan)
 	int			i;
 	unsigned long		flags;
 
-	dev_dbg(chan2dev(&dwc->chan), "cyclic free\n");
+	dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
 
 	if (!cdesc)
 		return;
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	channel_clear_bit(dw, CH_EN, dwc->mask);
-	while (dma_readl(dw, CH_EN) & dwc->mask)
-		cpu_relax();
+	dwc_chan_disable(dw, dwc);
 
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
@@ -1386,7 +1368,7 @@ static void dw_dma_off(struct dw_dma *dw)
 		dw->chan[i].initialized = false;
 }
 
-static int __init dw_probe(struct platform_device *pdev)
+static int __devinit dw_probe(struct platform_device *pdev)
 {
 	struct dw_dma_platform_data *pdata;
 	struct resource		*io;
@@ -1432,9 +1414,15 @@ static int __init dw_probe(struct platform_device *pdev)
 	}
 	clk_prepare_enable(dw->clk);
 
+	/* Calculate all channel mask before DMA setup */
+	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
 	/* force dma off, just in case */
 	dw_dma_off(dw);
 
+	/* disable BLOCK interrupts as well */
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+
 	err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
 	if (err)
 		goto err_irq;
@@ -1443,8 +1431,6 @@ static int __init dw_probe(struct platform_device *pdev)
 
 	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
 
-	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
-
 	INIT_LIST_HEAD(&dw->dma.channels);
 	for (i = 0; i < pdata->nr_channels; i++) {
 		struct dw_dma_chan	*dwc = &dw->chan[i];
@@ -1474,17 +1460,13 @@ static int __init dw_probe(struct platform_device *pdev)
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 	}
 
-	/* Clear/disable all interrupts on all channels. */
+	/* Clear all interrupts on all channels. */
 	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
-	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
-
 	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
 	if (pdata->is_private)
@@ -1523,7 +1505,7 @@ err_kfree:
 	return err;
 }
 
-static int __exit dw_remove(struct platform_device *pdev)
+static int __devexit dw_remove(struct platform_device *pdev)
 {
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
@@ -1602,7 +1584,7 @@ MODULE_DEVICE_TABLE(of, dw_dma_id_table);
 #endif
 
 static struct platform_driver dw_driver = {
-	.remove		= __exit_p(dw_remove),
+	.remove		= __devexit_p(dw_remove),
 	.shutdown	= dw_shutdown,
 	.driver = {
 		.name	= "dw_dmac",
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index f298f69ecbf997959235f5f620087917b23a023c..50830bee087a65eac6178e9111b72db9cbe0c49a 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -82,7 +82,7 @@ struct dw_dma_regs {
 	DW_REG(ID);
 	DW_REG(TEST);
 
-	/* optional encoded params, 0x3c8..0x3 */
+	/* optional encoded params, 0x3c8..0x3f7 */
 };
 
 /* Bitfields in CTL_LO */
@@ -219,9 +219,9 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
 /* LLI == Linked List Item; a.k.a. DMA block descriptor */
 struct dw_lli {
 	/* values that are not changed by hardware */
-	dma_addr_t	sar;
-	dma_addr_t	dar;
-	dma_addr_t	llp;		/* chain to next lli */
+	u32		sar;
+	u32		dar;
+	u32		llp;		/* chain to next lli */
 	u32		ctllo;
 	/* values that may get written back: */
 	u32		ctlhi;
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
new file mode 100644
index 0000000000000000000000000000000000000000..8a15cf2163dc8512dcb8805694a327e03ba86f01
--- /dev/null
+++ b/drivers/dma/mmp_tdma.c
@@ -0,0 +1,610 @@
+/*
+ * Driver For Marvell Two-channel DMA Engine
+ *
+ * Copyright: Marvell International Ltd.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <mach/regs-icu.h>
+#include <mach/sram.h>
+
+#include "dmaengine.h"
+
+/*
+ * Two-Channel DMA registers
+ */
+#define TDBCR		0x00	/* Byte Count */
+#define TDSAR		0x10	/* Src Addr */
+#define TDDAR		0x20	/* Dst Addr */
+#define TDNDPR		0x30	/* Next Desc */
+#define TDCR		0x40	/* Control */
+#define TDCP		0x60	/* Priority*/
+#define TDCDPR		0x70	/* Current Desc */
+#define TDIMR		0x80	/* Int Mask */
+#define TDISR		0xa0	/* Int Status */
+
+/* Two-Channel DMA Control Register */
+#define TDCR_SSZ_8_BITS		(0x0 << 22)	/* Sample Size */
+#define TDCR_SSZ_12_BITS	(0x1 << 22)
+#define TDCR_SSZ_16_BITS	(0x2 << 22)
+#define TDCR_SSZ_20_BITS	(0x3 << 22)
+#define TDCR_SSZ_24_BITS	(0x4 << 22)
+#define TDCR_SSZ_32_BITS	(0x5 << 22)
+#define TDCR_SSZ_SHIFT		(0x1 << 22)
+#define TDCR_SSZ_MASK		(0x7 << 22)
+#define TDCR_SSPMOD		(0x1 << 21)	/* SSP MOD */
+#define TDCR_ABR		(0x1 << 20)	/* Channel Abort */
+#define TDCR_CDE		(0x1 << 17)	/* Close Desc Enable */
+#define TDCR_PACKMOD		(0x1 << 16)	/* Pack Mode (ADMA Only) */
+#define TDCR_CHANACT		(0x1 << 14)	/* Channel Active */
+#define TDCR_FETCHND		(0x1 << 13)	/* Fetch Next Desc */
+#define TDCR_CHANEN		(0x1 << 12)	/* Channel Enable */
+#define TDCR_INTMODE		(0x1 << 10)	/* Interrupt Mode */
+#define TDCR_CHAINMOD		(0x1 << 9)	/* Chain Mode */
+#define TDCR_BURSTSZ_MSK	(0x7 << 6)	/* Burst Size */
+#define TDCR_BURSTSZ_4B		(0x0 << 6)
+#define TDCR_BURSTSZ_8B		(0x1 << 6)
+#define TDCR_BURSTSZ_16B	(0x3 << 6)
+#define TDCR_BURSTSZ_32B	(0x6 << 6)
+#define TDCR_BURSTSZ_64B	(0x7 << 6)
+#define TDCR_BURSTSZ_SQU_32B	(0x7 << 6)
+#define TDCR_BURSTSZ_128B	(0x5 << 6)
+#define TDCR_DSTDIR_MSK		(0x3 << 4)	/* Dst Direction */
+#define TDCR_DSTDIR_ADDR_HOLD	(0x2 << 4)	/* Dst Addr Hold */
+#define TDCR_DSTDIR_ADDR_INC	(0x0 << 4)	/* Dst Addr Increment */
+#define TDCR_SRCDIR_MSK		(0x3 << 2)	/* Src Direction */
+#define TDCR_SRCDIR_ADDR_HOLD	(0x2 << 2)	/* Src Addr Hold */
+#define TDCR_SRCDIR_ADDR_INC	(0x0 << 2)	/* Src Addr Increment */
+#define TDCR_DSTDESCCONT	(0x1 << 1)
+#define TDCR_SRCDESTCONT	(0x1 << 0)
+
+/* Two-Channel DMA Int Mask Register */
+#define TDIMR_COMP		(0x1 << 0)
+
+/* Two-Channel DMA Int Status Register */
+#define TDISR_COMP		(0x1 << 0)
+
+/*
+ * Two-Channel DMA Descriptor Struct
+ * NOTE: desc's buf must be aligned to 16 bytes.
+ */
+struct mmp_tdma_desc {
+	u32 byte_cnt;
+	u32 src_addr;
+	u32 dst_addr;
+	u32 nxt_desc;
+};
+
+enum mmp_tdma_type {
+	MMP_AUD_TDMA = 0,
+	PXA910_SQU,
+};
+
+#define TDMA_ALIGNMENT		3
+#define TDMA_MAX_XFER_BYTES    SZ_64K
+
+struct mmp_tdma_chan {
+	struct device			*dev;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+
+	struct mmp_tdma_desc		*desc_arr;
+	phys_addr_t			desc_arr_phys;
+	int				desc_num;
+	enum dma_transfer_direction	dir;
+	dma_addr_t			dev_addr;
+	u32				burst_sz;
+	enum dma_slave_buswidth		buswidth;
+	enum dma_status			status;
+
+	int				idx;
+	enum mmp_tdma_type		type;
+	int				irq;
+	unsigned long			reg_base;
+
+	size_t				buf_len;
+	size_t				period_len;
+	size_t				pos;
+};
+
+#define TDMA_CHANNEL_NUM 2
+struct mmp_tdma_device {
+	struct device			*dev;
+	void __iomem			*base;
+	struct dma_device		device;
+	struct mmp_tdma_chan		*tdmac[TDMA_CHANNEL_NUM];
+	int				irq;
+};
+
+#define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan)
+
+static void mmp_tdma_chan_set_desc(struct mmp_tdma_chan *tdmac, dma_addr_t phys)
+{
+	writel(phys, tdmac->reg_base + TDNDPR);
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_FETCHND,
+					tdmac->reg_base + TDCR);
+}
+
+static void mmp_tdma_enable_chan(struct mmp_tdma_chan *tdmac)
+{
+	/* enable irq */
+	writel(TDIMR_COMP, tdmac->reg_base + TDIMR);
+	/* enable dma chan */
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_IN_PROGRESS;
+}
+
+static void mmp_tdma_disable_chan(struct mmp_tdma_chan *tdmac)
+{
+	writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_SUCCESS;
+}
+
+static void mmp_tdma_resume_chan(struct mmp_tdma_chan *tdmac)
+{
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_IN_PROGRESS;
+}
+
+static void mmp_tdma_pause_chan(struct mmp_tdma_chan *tdmac)
+{
+	writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_PAUSED;
+}
+
+static int mmp_tdma_config_chan(struct mmp_tdma_chan *tdmac)
+{
+	unsigned int tdcr;
+
+	mmp_tdma_disable_chan(tdmac);
+
+	if (tdmac->dir == DMA_MEM_TO_DEV)
+		tdcr = TDCR_DSTDIR_ADDR_HOLD | TDCR_SRCDIR_ADDR_INC;
+	else if (tdmac->dir == DMA_DEV_TO_MEM)
+		tdcr = TDCR_SRCDIR_ADDR_HOLD | TDCR_DSTDIR_ADDR_INC;
+
+	if (tdmac->type == MMP_AUD_TDMA) {
+		tdcr |= TDCR_PACKMOD;
+
+		switch (tdmac->burst_sz) {
+		case 4:
+			tdcr |= TDCR_BURSTSZ_4B;
+			break;
+		case 8:
+			tdcr |= TDCR_BURSTSZ_8B;
+			break;
+		case 16:
+			tdcr |= TDCR_BURSTSZ_16B;
+			break;
+		case 32:
+			tdcr |= TDCR_BURSTSZ_32B;
+			break;
+		case 64:
+			tdcr |= TDCR_BURSTSZ_64B;
+			break;
+		case 128:
+			tdcr |= TDCR_BURSTSZ_128B;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+			return -EINVAL;
+		}
+
+		switch (tdmac->buswidth) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			tdcr |= TDCR_SSZ_8_BITS;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			tdcr |= TDCR_SSZ_16_BITS;
+			break;
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			tdcr |= TDCR_SSZ_32_BITS;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown bus size.\n");
+			return -EINVAL;
+		}
+	} else if (tdmac->type == PXA910_SQU) {
+		tdcr |= TDCR_BURSTSZ_SQU_32B;
+		tdcr |= TDCR_SSPMOD;
+	}
+
+	writel(tdcr, tdmac->reg_base + TDCR);
+	return 0;
+}
+
+static int mmp_tdma_clear_chan_irq(struct mmp_tdma_chan *tdmac)
+{
+	u32 reg = readl(tdmac->reg_base + TDISR);
+
+	if (reg & TDISR_COMP) {
+		/* clear irq */
+		reg &= ~TDISR_COMP;
+		writel(reg, tdmac->reg_base + TDISR);
+
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+static irqreturn_t mmp_tdma_chan_handler(int irq, void *dev_id)
+{
+	struct mmp_tdma_chan *tdmac = dev_id;
+
+	if (mmp_tdma_clear_chan_irq(tdmac) == 0) {
+		tdmac->pos = (tdmac->pos + tdmac->period_len) % tdmac->buf_len;
+		tasklet_schedule(&tdmac->tasklet);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static irqreturn_t mmp_tdma_int_handler(int irq, void *dev_id)
+{
+	struct mmp_tdma_device *tdev = dev_id;
+	int i, ret;
+	int irq_num = 0;
+
+	for (i = 0; i < TDMA_CHANNEL_NUM; i++) {
+		struct mmp_tdma_chan *tdmac = tdev->tdmac[i];
+
+		ret = mmp_tdma_chan_handler(irq, tdmac);
+		if (ret == IRQ_HANDLED)
+			irq_num++;
+	}
+
+	if (irq_num)
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+	struct mmp_tdma_chan *tdmac = (struct mmp_tdma_chan *)data;
+
+	if (tdmac->desc.callback)
+		tdmac->desc.callback(tdmac->desc.callback_param);
+
+}
+
+static void mmp_tdma_free_descriptor(struct mmp_tdma_chan *tdmac)
+{
+	struct gen_pool *gpool;
+	int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+	gpool = sram_get_gpool("asram");
+	if (tdmac->desc_arr)
+		gen_pool_free(gpool, (unsigned long)tdmac->desc_arr,
+				size);
+	tdmac->desc_arr = NULL;
+
+	return;
+}
+
+static dma_cookie_t mmp_tdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(tx->chan);
+
+	mmp_tdma_chan_set_desc(tdmac, tdmac->desc_arr_phys);
+
+	return 0;
+}
+
+static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	int ret;
+
+	dma_async_tx_descriptor_init(&tdmac->desc, chan);
+	tdmac->desc.tx_submit = mmp_tdma_tx_submit;
+
+	if (tdmac->irq) {
+		ret = devm_request_irq(tdmac->dev, tdmac->irq,
+			mmp_tdma_chan_handler, IRQF_DISABLED, "tdma", tdmac);
+		if (ret)
+			return ret;
+	}
+	return 1;
+}
+
+static void mmp_tdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	if (tdmac->irq)
+		devm_free_irq(tdmac->dev, tdmac->irq, tdmac);
+	mmp_tdma_free_descriptor(tdmac);
+	return;
+}
+
+struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
+{
+	struct gen_pool *gpool;
+	int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+	gpool = sram_get_gpool("asram");
+	if (!gpool)
+		return NULL;
+
+	tdmac->desc_arr = (void *)gen_pool_alloc(gpool, size);
+	if (!tdmac->desc_arr)
+		return NULL;
+
+	tdmac->desc_arr_phys = gen_pool_virt_to_phys(gpool,
+			(unsigned long)tdmac->desc_arr);
+
+	return tdmac->desc_arr;
+}
+
+static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	struct mmp_tdma_desc *desc;
+	int num_periods = buf_len / period_len;
+	int i = 0, buf = 0;
+
+	if (tdmac->status != DMA_SUCCESS)
+		return NULL;
+
+	if (period_len > TDMA_MAX_XFER_BYTES) {
+		dev_err(tdmac->dev,
+				"maximum period size exceeded: %d > %d\n",
+				period_len, TDMA_MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	tdmac->status = DMA_IN_PROGRESS;
+	tdmac->desc_num = num_periods;
+	desc = mmp_tdma_alloc_descriptor(tdmac);
+	if (!desc)
+		goto err_out;
+
+	while (buf < buf_len) {
+		desc = &tdmac->desc_arr[i];
+
+		if (i + 1 == num_periods)
+			desc->nxt_desc = tdmac->desc_arr_phys;
+		else
+			desc->nxt_desc = tdmac->desc_arr_phys +
+				sizeof(*desc) * (i + 1);
+
+		if (direction == DMA_MEM_TO_DEV) {
+			desc->src_addr = dma_addr;
+			desc->dst_addr = tdmac->dev_addr;
+		} else {
+			desc->src_addr = tdmac->dev_addr;
+			desc->dst_addr = dma_addr;
+		}
+		desc->byte_cnt = period_len;
+		dma_addr += period_len;
+		buf += period_len;
+		i++;
+	}
+
+	tdmac->buf_len = buf_len;
+	tdmac->period_len = period_len;
+	tdmac->pos = 0;
+
+	return &tdmac->desc;
+
+err_out:
+	tdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mmp_tdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	struct dma_slave_config *dmaengine_cfg = (void *)arg;
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		mmp_tdma_disable_chan(tdmac);
+		break;
+	case DMA_PAUSE:
+		mmp_tdma_pause_chan(tdmac);
+		break;
+	case DMA_RESUME:
+		mmp_tdma_resume_chan(tdmac);
+		break;
+	case DMA_SLAVE_CONFIG:
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+			tdmac->dev_addr = dmaengine_cfg->src_addr;
+			tdmac->burst_sz = dmaengine_cfg->src_maxburst;
+			tdmac->buswidth = dmaengine_cfg->src_addr_width;
+		} else {
+			tdmac->dev_addr = dmaengine_cfg->dst_addr;
+			tdmac->burst_sz = dmaengine_cfg->dst_maxburst;
+			tdmac->buswidth = dmaengine_cfg->dst_addr_width;
+		}
+		tdmac->dir = dmaengine_cfg->direction;
+		return mmp_tdma_config_chan(tdmac);
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mmp_tdma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	dma_set_residue(txstate, tdmac->buf_len - tdmac->pos);
+
+	return tdmac->status;
+}
+
+static void mmp_tdma_issue_pending(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	mmp_tdma_enable_chan(tdmac);
+}
+
+static int __devexit mmp_tdma_remove(struct platform_device *pdev)
+{
+	struct mmp_tdma_device *tdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&tdev->device);
+	return 0;
+}
+
+static int __devinit mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
+						int idx, int irq, int type)
+{
+	struct mmp_tdma_chan *tdmac;
+
+	if (idx >= TDMA_CHANNEL_NUM) {
+		dev_err(tdev->dev, "too many channels for device!\n");
+		return -EINVAL;
+	}
+
+	/* alloc channel */
+	tdmac = devm_kzalloc(tdev->dev, sizeof(*tdmac), GFP_KERNEL);
+	if (!tdmac) {
+		dev_err(tdev->dev, "no free memory for DMA channels!\n");
+		return -ENOMEM;
+	}
+	if (irq)
+		tdmac->irq = irq + idx;
+	tdmac->dev	   = tdev->dev;
+	tdmac->chan.device = &tdev->device;
+	tdmac->idx	   = idx;
+	tdmac->type	   = type;
+	tdmac->reg_base	   = (unsigned long)tdev->base + idx * 4;
+	tdmac->status = DMA_SUCCESS;
+	tdev->tdmac[tdmac->idx] = tdmac;
+	tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
+
+	/* add the channel to tdma_chan list */
+	list_add_tail(&tdmac->chan.device_node,
+			&tdev->device.channels);
+
+	return 0;
+}
+
+static int __devinit mmp_tdma_probe(struct platform_device *pdev)
+{
+	const struct platform_device_id *id = platform_get_device_id(pdev);
+	enum mmp_tdma_type type = id->driver_data;
+	struct mmp_tdma_device *tdev;
+	struct resource *iores;
+	int i, ret;
+	int irq = 0;
+	int chan_num = TDMA_CHANNEL_NUM;
+
+	/* always have couple channels */
+	tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL);
+	if (!tdev)
+		return -ENOMEM;
+
+	tdev->dev = &pdev->dev;
+	iores = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!iores)
+		return -EINVAL;
+
+	if (resource_size(iores) != chan_num)
+		tdev->irq = iores->start;
+	else
+		irq = iores->start;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	tdev->base = devm_request_and_ioremap(&pdev->dev, iores);
+	if (!tdev->base)
+		return -EADDRNOTAVAIL;
+
+	if (tdev->irq) {
+		ret = devm_request_irq(&pdev->dev, tdev->irq,
+			mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
+		if (ret)
+			return ret;
+	}
+
+	dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
+
+	INIT_LIST_HEAD(&tdev->device.channels);
+
+	/* initialize channel parameters */
+	for (i = 0; i < chan_num; i++) {
+		ret = mmp_tdma_chan_init(tdev, i, irq, type);
+		if (ret)
+			return ret;
+	}
+
+	tdev->device.dev = &pdev->dev;
+	tdev->device.device_alloc_chan_resources =
+					mmp_tdma_alloc_chan_resources;
+	tdev->device.device_free_chan_resources =
+					mmp_tdma_free_chan_resources;
+	tdev->device.device_prep_dma_cyclic = mmp_tdma_prep_dma_cyclic;
+	tdev->device.device_tx_status = mmp_tdma_tx_status;
+	tdev->device.device_issue_pending = mmp_tdma_issue_pending;
+	tdev->device.device_control = mmp_tdma_control;
+	tdev->device.copy_align = TDMA_ALIGNMENT;
+
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	platform_set_drvdata(pdev, tdev);
+
+	ret = dma_async_device_register(&tdev->device);
+	if (ret) {
+		dev_err(tdev->device.dev, "unable to register\n");
+		return ret;
+	}
+
+	dev_info(tdev->device.dev, "initialized\n");
+	return 0;
+}
+
+static const struct platform_device_id mmp_tdma_id_table[] = {
+	{ "mmp-adma",	MMP_AUD_TDMA },
+	{ "pxa910-squ",	PXA910_SQU },
+	{ },
+};
+
+static struct platform_driver mmp_tdma_driver = {
+	.driver		= {
+		.name	= "mmp-tdma",
+		.owner  = THIS_MODULE,
+	},
+	.id_table	= mmp_tdma_id_table,
+	.probe		= mmp_tdma_probe,
+	.remove		= __devexit_p(mmp_tdma_remove),
+};
+
+module_platform_driver(mmp_tdma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MMP Two-Channel DMA Driver");
+MODULE_ALIAS("platform:mmp-tdma");
+MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
+MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>");
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index c96ab15319f245c363bf1ed693c5a61e0669fcef..7f41b25805fa071edfa9d74946c630c2a7528733 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -29,7 +29,6 @@
 #include <linux/of_device.h>
 
 #include <asm/irq.h>
-#include <mach/mxs.h>
 
 #include "dmaengine.h"
 
@@ -201,6 +200,7 @@ int mxs_dma_is_apbh(struct dma_chan *chan)
 
 	return dma_is_apbh(mxs_dma);
 }
+EXPORT_SYMBOL_GPL(mxs_dma_is_apbh);
 
 int mxs_dma_is_apbx(struct dma_chan *chan)
 {
@@ -209,6 +209,7 @@ int mxs_dma_is_apbx(struct dma_chan *chan)
 
 	return !dma_is_apbh(mxs_dma);
 }
+EXPORT_SYMBOL_GPL(mxs_dma_is_apbx);
 
 static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
 {
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..54ae9572b0ac1f2735d7a8fe3e47adb4c87a6373
--- /dev/null
+++ b/drivers/dma/sh/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_SH_DMAE) += shdma-base.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
new file mode 100644
index 0000000000000000000000000000000000000000..27f5c781fd73fa2b1de7d96fa189eada935c0467
--- /dev/null
+++ b/drivers/dma/sh/shdma-base.c
@@ -0,0 +1,934 @@
+/*
+ * Dmaengine driver base library for DMA controllers, found on SH-based SoCs
+ *
+ * extracted from shdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/shdma-base.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/* DMA descriptor control */
+enum shdma_desc_status {
+	DESC_IDLE,
+	DESC_PREPARED,
+	DESC_SUBMITTED,
+	DESC_COMPLETED,	/* completed, have to call callback */
+	DESC_WAITING,	/* callback called, waiting for ack / re-submit */
+};
+
+#define NR_DESCS_PER_CHANNEL 32
+
+#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
+#define to_shdma_dev(d) container_of(d, struct shdma_dev, dma_dev)
+
+/*
+ * For slave DMA we assume, that there is a finite number of DMA slaves in the
+ * system, and that each such slave can only use a finite number of channels.
+ * We use slave channel IDs to make sure, that no such slave channel ID is
+ * allocated more than once.
+ */
+static unsigned int slave_num = 256;
+module_param(slave_num, uint, 0444);
+
+/* A bitmask with slave_num bits */
+static unsigned long *shdma_slave_used;
+
+/* Called under spin_lock_irq(&schan->chan_lock") */
+static void shdma_chan_xfer_ld_queue(struct shdma_chan *schan)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *sdesc;
+
+	/* DMA work check */
+	if (ops->channel_busy(schan))
+		return;
+
+	/* Find the first not transferred descriptor */
+	list_for_each_entry(sdesc, &schan->ld_queue, node)
+		if (sdesc->mark == DESC_SUBMITTED) {
+			ops->start_xfer(schan, sdesc);
+			break;
+		}
+}
+
+static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct shdma_desc *chunk, *c, *desc =
+		container_of(tx, struct shdma_desc, async_tx),
+		*last = desc;
+	struct shdma_chan *schan = to_shdma_chan(tx->chan);
+	dma_async_tx_callback callback = tx->callback;
+	dma_cookie_t cookie;
+	bool power_up;
+
+	spin_lock_irq(&schan->chan_lock);
+
+	power_up = list_empty(&schan->ld_queue);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Mark all chunks of this descriptor as submitted, move to the queue */
+	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+		/*
+		 * All chunks are on the global ld_free, so, we have to find
+		 * the end of the chain ourselves
+		 */
+		if (chunk != desc && (chunk->mark == DESC_IDLE ||
+				      chunk->async_tx.cookie > 0 ||
+				      chunk->async_tx.cookie == -EBUSY ||
+				      &chunk->node == &schan->ld_free))
+			break;
+		chunk->mark = DESC_SUBMITTED;
+		/* Callback goes to the last chunk */
+		chunk->async_tx.callback = NULL;
+		chunk->cookie = cookie;
+		list_move_tail(&chunk->node, &schan->ld_queue);
+		last = chunk;
+
+		dev_dbg(schan->dev, "submit #%d@%p on %d\n",
+			tx->cookie, &last->async_tx, schan->id);
+	}
+
+	last->async_tx.callback = callback;
+	last->async_tx.callback_param = tx->callback_param;
+
+	if (power_up) {
+		int ret;
+		schan->pm_state = SHDMA_PM_BUSY;
+
+		ret = pm_runtime_get(schan->dev);
+
+		spin_unlock_irq(&schan->chan_lock);
+		if (ret < 0)
+			dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret);
+
+		pm_runtime_barrier(schan->dev);
+
+		spin_lock_irq(&schan->chan_lock);
+
+		/* Have we been reset, while waiting? */
+		if (schan->pm_state != SHDMA_PM_ESTABLISHED) {
+			struct shdma_dev *sdev =
+				to_shdma_dev(schan->dma_chan.device);
+			const struct shdma_ops *ops = sdev->ops;
+			dev_dbg(schan->dev, "Bring up channel %d\n",
+				schan->id);
+			/*
+			 * TODO: .xfer_setup() might fail on some platforms.
+			 * Make it int then, on error remove chunks from the
+			 * queue again
+			 */
+			ops->setup_xfer(schan, schan->slave_id);
+
+			if (schan->pm_state == SHDMA_PM_PENDING)
+				shdma_chan_xfer_ld_queue(schan);
+			schan->pm_state = SHDMA_PM_ESTABLISHED;
+		}
+	} else {
+		/*
+		 * Tell .device_issue_pending() not to run the queue, interrupts
+		 * will do it anyway
+		 */
+		schan->pm_state = SHDMA_PM_PENDING;
+	}
+
+	spin_unlock_irq(&schan->chan_lock);
+
+	return cookie;
+}
+
+/* Called with desc_lock held */
+static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan)
+{
+	struct shdma_desc *sdesc;
+
+	list_for_each_entry(sdesc, &schan->ld_free, node)
+		if (sdesc->mark != DESC_PREPARED) {
+			BUG_ON(sdesc->mark != DESC_IDLE);
+			list_del(&sdesc->node);
+			return sdesc;
+		}
+
+	return NULL;
+}
+
+static int shdma_setup_slave(struct shdma_chan *schan, int slave_id)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	int ret;
+
+	if (slave_id < 0 || slave_id >= slave_num)
+		return -EINVAL;
+
+	if (test_and_set_bit(slave_id, shdma_slave_used))
+		return -EBUSY;
+
+	ret = ops->set_slave(schan, slave_id, false);
+	if (ret < 0) {
+		clear_bit(slave_id, shdma_slave_used);
+		return ret;
+	}
+
+	schan->slave_id = slave_id;
+
+	return 0;
+}
+
+/*
+ * This is the standard shdma filter function to be used as a replacement to the
+ * "old" method, using the .private pointer. If for some reason you allocate a
+ * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter
+ * parameter. If this filter is used, the slave driver, after calling
+ * dma_request_channel(), will also have to call dmaengine_slave_config() with
+ * .slave_id, .direction, and either .src_addr or .dst_addr set.
+ * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
+ * capability! If this becomes a requirement, hardware glue drivers, using this
+ * services would have to provide their own filters, which first would check
+ * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do
+ * this, and only then, in case of a match, call this common filter.
+ */
+bool shdma_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	int slave_id = (int)arg;
+	int ret;
+
+	if (slave_id < 0)
+		/* No slave requested - arbitrary channel */
+		return true;
+
+	if (slave_id >= slave_num)
+		return false;
+
+	ret = ops->set_slave(schan, slave_id, true);
+	if (ret < 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(shdma_chan_filter);
+
+static int shdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *desc;
+	struct shdma_slave *slave = chan->private;
+	int ret, i;
+
+	/*
+	 * This relies on the guarantee from dmaengine that alloc_chan_resources
+	 * never runs concurrently with itself or free_chan_resources.
+	 */
+	if (slave) {
+		/* Legacy mode: .private is set in filter */
+		ret = shdma_setup_slave(schan, slave->slave_id);
+		if (ret < 0)
+			goto esetslave;
+	} else {
+		schan->slave_id = -EINVAL;
+	}
+
+	schan->desc = kcalloc(NR_DESCS_PER_CHANNEL,
+			      sdev->desc_size, GFP_KERNEL);
+	if (!schan->desc) {
+		ret = -ENOMEM;
+		goto edescalloc;
+	}
+	schan->desc_num = NR_DESCS_PER_CHANNEL;
+
+	for (i = 0; i < NR_DESCS_PER_CHANNEL; i++) {
+		desc = ops->embedded_desc(schan->desc, i);
+		dma_async_tx_descriptor_init(&desc->async_tx,
+					     &schan->dma_chan);
+		desc->async_tx.tx_submit = shdma_tx_submit;
+		desc->mark = DESC_IDLE;
+
+		list_add(&desc->node, &schan->ld_free);
+	}
+
+	return NR_DESCS_PER_CHANNEL;
+
+edescalloc:
+	if (slave)
+esetslave:
+		clear_bit(slave->slave_id, shdma_slave_used);
+	chan->private = NULL;
+	return ret;
+}
+
+static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
+{
+	struct shdma_desc *desc, *_desc;
+	/* Is the "exposed" head of a chain acked? */
+	bool head_acked = false;
+	dma_cookie_t cookie = 0;
+	dma_async_tx_callback callback = NULL;
+	void *param = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->chan_lock, flags);
+	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
+		struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+		BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+		BUG_ON(desc->mark != DESC_SUBMITTED &&
+		       desc->mark != DESC_COMPLETED &&
+		       desc->mark != DESC_WAITING);
+
+		/*
+		 * queue is ordered, and we use this loop to (1) clean up all
+		 * completed descriptors, and to (2) update descriptor flags of
+		 * any chunks in a (partially) completed chain
+		 */
+		if (!all && desc->mark == DESC_SUBMITTED &&
+		    desc->cookie != cookie)
+			break;
+
+		if (tx->cookie > 0)
+			cookie = tx->cookie;
+
+		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+			if (schan->dma_chan.completed_cookie != desc->cookie - 1)
+				dev_dbg(schan->dev,
+					"Completing cookie %d, expected %d\n",
+					desc->cookie,
+					schan->dma_chan.completed_cookie + 1);
+			schan->dma_chan.completed_cookie = desc->cookie;
+		}
+
+		/* Call callback on the last chunk */
+		if (desc->mark == DESC_COMPLETED && tx->callback) {
+			desc->mark = DESC_WAITING;
+			callback = tx->callback;
+			param = tx->callback_param;
+			dev_dbg(schan->dev, "descriptor #%d@%p on %d callback\n",
+				tx->cookie, tx, schan->id);
+			BUG_ON(desc->chunks != 1);
+			break;
+		}
+
+		if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+			if (desc->mark == DESC_COMPLETED) {
+				BUG_ON(tx->cookie < 0);
+				desc->mark = DESC_WAITING;
+			}
+			head_acked = async_tx_test_ack(tx);
+		} else {
+			switch (desc->mark) {
+			case DESC_COMPLETED:
+				desc->mark = DESC_WAITING;
+				/* Fall through */
+			case DESC_WAITING:
+				if (head_acked)
+					async_tx_ack(&desc->async_tx);
+			}
+		}
+
+		dev_dbg(schan->dev, "descriptor %p #%d completed.\n",
+			tx, tx->cookie);
+
+		if (((desc->mark == DESC_COMPLETED ||
+		      desc->mark == DESC_WAITING) &&
+		     async_tx_test_ack(&desc->async_tx)) || all) {
+			/* Remove from ld_queue list */
+			desc->mark = DESC_IDLE;
+
+			list_move(&desc->node, &schan->ld_free);
+
+			if (list_empty(&schan->ld_queue)) {
+				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+				pm_runtime_put(schan->dev);
+				schan->pm_state = SHDMA_PM_ESTABLISHED;
+			}
+		}
+	}
+
+	if (all && !callback)
+		/*
+		 * Terminating and the loop completed normally: forgive
+		 * uncompleted cookies
+		 */
+		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
+
+	spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+	if (callback)
+		callback(param);
+
+	return callback;
+}
+
+/*
+ * shdma_chan_ld_cleanup - Clean up link descriptors
+ *
+ * Clean up the ld_queue of DMA channel.
+ */
+static void shdma_chan_ld_cleanup(struct shdma_chan *schan, bool all)
+{
+	while (__ld_cleanup(schan, all))
+		;
+}
+
+/*
+ * shdma_free_chan_resources - Free all resources of the channel.
+ */
+static void shdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(chan->device);
+	const struct shdma_ops *ops = sdev->ops;
+	LIST_HEAD(list);
+
+	/* Protect against ISR */
+	spin_lock_irq(&schan->chan_lock);
+	ops->halt_channel(schan);
+	spin_unlock_irq(&schan->chan_lock);
+
+	/* Now no new interrupts will occur */
+
+	/* Prepared and not submitted descriptors can still be on the queue */
+	if (!list_empty(&schan->ld_queue))
+		shdma_chan_ld_cleanup(schan, true);
+
+	if (schan->slave_id >= 0) {
+		/* The caller is holding dma_list_mutex */
+		clear_bit(schan->slave_id, shdma_slave_used);
+		chan->private = NULL;
+	}
+
+	spin_lock_irq(&schan->chan_lock);
+
+	list_splice_init(&schan->ld_free, &list);
+	schan->desc_num = 0;
+
+	spin_unlock_irq(&schan->chan_lock);
+
+	kfree(schan->desc);
+}
+
+/**
+ * shdma_add_desc - get, set up and return one transfer descriptor
+ * @schan:	DMA channel
+ * @flags:	DMA transfer flags
+ * @dst:	destination DMA address, incremented when direction equals
+ *		DMA_DEV_TO_MEM or DMA_MEM_TO_MEM
+ * @src:	source DMA address, incremented when direction equals
+ *		DMA_MEM_TO_DEV or DMA_MEM_TO_MEM
+ * @len:	DMA transfer length
+ * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
+ * @direction:	needed for slave DMA to decide which address to keep constant,
+ *		equals DMA_MEM_TO_MEM for MEMCPY
+ * Returns 0 or an error
+ * Locks: called with desc_lock held
+ */
+static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan,
+	unsigned long flags, dma_addr_t *dst, dma_addr_t *src, size_t *len,
+	struct shdma_desc **first, enum dma_transfer_direction direction)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *new;
+	size_t copy_size = *len;
+
+	if (!copy_size)
+		return NULL;
+
+	/* Allocate the link descriptor from the free list */
+	new = shdma_get_desc(schan);
+	if (!new) {
+		dev_err(schan->dev, "No free link descriptor available\n");
+		return NULL;
+	}
+
+	ops->desc_setup(schan, new, *src, *dst, &copy_size);
+
+	if (!*first) {
+		/* First desc */
+		new->async_tx.cookie = -EBUSY;
+		*first = new;
+	} else {
+		/* Other desc - invisible to the user */
+		new->async_tx.cookie = -EINVAL;
+	}
+
+	dev_dbg(schan->dev,
+		"chaining (%u/%u)@%x -> %x with %p, cookie %d\n",
+		copy_size, *len, *src, *dst, &new->async_tx,
+		new->async_tx.cookie);
+
+	new->mark = DESC_PREPARED;
+	new->async_tx.flags = flags;
+	new->direction = direction;
+
+	*len -= copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
+		*src += copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
+		*dst += copy_size;
+
+	return new;
+}
+
+/*
+ * shdma_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
+	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct scatterlist *sg;
+	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
+	LIST_HEAD(tx_list);
+	int chunks = 0;
+	unsigned long irq_flags;
+	int i;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		chunks += DIV_ROUND_UP(sg_dma_len(sg), schan->max_xfer_len);
+
+	/* Have to lock the whole loop to protect against concurrent release */
+	spin_lock_irqsave(&schan->chan_lock, irq_flags);
+
+	/*
+	 * Chaining:
+	 * first descriptor is what user is dealing with in all API calls, its
+	 *	cookie is at first set to -EBUSY, at tx-submit to a positive
+	 *	number
+	 * if more than one chunk is needed further chunks have cookie = -EINVAL
+	 * the last chunk, if not equal to the first, has cookie = -ENOSPC
+	 * all chunks are linked onto the tx_list head with their .node heads
+	 *	only during this function, then they are immediately spliced
+	 *	back onto the free list in form of a chain
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		if (!len)
+			goto err_get_desc;
+
+		do {
+			dev_dbg(schan->dev, "Add SG #%d@%p[%d], dma %llx\n",
+				i, sg, len, (unsigned long long)sg_addr);
+
+			if (direction == DMA_DEV_TO_MEM)
+				new = shdma_add_desc(schan, flags,
+						&sg_addr, addr, &len, &first,
+						direction);
+			else
+				new = shdma_add_desc(schan, flags,
+						addr, &sg_addr, &len, &first,
+						direction);
+			if (!new)
+				goto err_get_desc;
+
+			new->chunks = chunks--;
+			list_add_tail(&new->node, &tx_list);
+		} while (len);
+	}
+
+	if (new != first)
+		new->async_tx.cookie = -ENOSPC;
+
+	/* Put them back on the free list, so, they don't get lost */
+	list_splice_tail(&tx_list, &schan->ld_free);
+
+	spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+	return &first->async_tx;
+
+err_get_desc:
+	list_for_each_entry(new, &tx_list, node)
+		new->mark = DESC_IDLE;
+	list_splice(&tx_list, &schan->ld_free);
+
+	spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_memcpy(
+	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct scatterlist sg;
+
+	if (!chan || !len)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sg) = dma_src;
+	sg_dma_len(&sg) = len;
+
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags);
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || !sg_len) {
+		dev_warn(schan->dev, "%s: bad parameter: len=%d, id=%d\n",
+			 __func__, sg_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			      direction, flags);
+}
+
+static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			  unsigned long arg)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(chan->device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct dma_slave_config *config;
+	unsigned long flags;
+	int ret;
+
+	if (!chan)
+		return -EINVAL;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		spin_lock_irqsave(&schan->chan_lock, flags);
+		ops->halt_channel(schan);
+		spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+		shdma_chan_ld_cleanup(schan, true);
+		break;
+	case DMA_SLAVE_CONFIG:
+		/*
+		 * So far only .slave_id is used, but the slave drivers are
+		 * encouraged to also set a transfer direction and an address.
+		 */
+		if (!arg)
+			return -EINVAL;
+		/*
+		 * We could lock this, but you shouldn't be configuring the
+		 * channel, while using it...
+		 */
+		config = (struct dma_slave_config *)arg;
+		ret = shdma_setup_slave(schan, config->slave_id);
+		if (ret < 0)
+			return ret;
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void shdma_issue_pending(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+
+	spin_lock_irq(&schan->chan_lock);
+	if (schan->pm_state == SHDMA_PM_ESTABLISHED)
+		shdma_chan_xfer_ld_queue(schan);
+	else
+		schan->pm_state = SHDMA_PM_PENDING;
+	spin_unlock_irq(&schan->chan_lock);
+}
+
+static enum dma_status shdma_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+
+	shdma_chan_ld_cleanup(schan, false);
+
+	spin_lock_irqsave(&schan->chan_lock, flags);
+
+	status = dma_cookie_status(chan, cookie, txstate);
+
+	/*
+	 * If we don't find cookie on the queue, it has been aborted and we have
+	 * to report error
+	 */
+	if (status != DMA_SUCCESS) {
+		struct shdma_desc *sdesc;
+		status = DMA_ERROR;
+		list_for_each_entry(sdesc, &schan->ld_queue, node)
+			if (sdesc->cookie == cookie) {
+				status = DMA_IN_PROGRESS;
+				break;
+			}
+	}
+
+	spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+	return status;
+}
+
+/* Called from error IRQ or NMI */
+bool shdma_reset(struct shdma_dev *sdev)
+{
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_chan *schan;
+	unsigned int handled = 0;
+	int i;
+
+	/* Reset all channels */
+	shdma_for_each_chan(schan, sdev, i) {
+		struct shdma_desc *sdesc;
+		LIST_HEAD(dl);
+
+		if (!schan)
+			continue;
+
+		spin_lock(&schan->chan_lock);
+
+		/* Stop the channel */
+		ops->halt_channel(schan);
+
+		list_splice_init(&schan->ld_queue, &dl);
+
+		if (!list_empty(&dl)) {
+			dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+			pm_runtime_put(schan->dev);
+		}
+		schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+		spin_unlock(&schan->chan_lock);
+
+		/* Complete all  */
+		list_for_each_entry(sdesc, &dl, node) {
+			struct dma_async_tx_descriptor *tx = &sdesc->async_tx;
+			sdesc->mark = DESC_IDLE;
+			if (tx->callback)
+				tx->callback(tx->callback_param);
+		}
+
+		spin_lock(&schan->chan_lock);
+		list_splice(&dl, &schan->ld_free);
+		spin_unlock(&schan->chan_lock);
+
+		handled++;
+	}
+
+	return !!handled;
+}
+EXPORT_SYMBOL(shdma_reset);
+
+static irqreturn_t chan_irq(int irq, void *dev)
+{
+	struct shdma_chan *schan = dev;
+	const struct shdma_ops *ops =
+		to_shdma_dev(schan->dma_chan.device)->ops;
+	irqreturn_t ret;
+
+	spin_lock(&schan->chan_lock);
+
+	ret = ops->chan_irq(schan, irq) ? IRQ_WAKE_THREAD : IRQ_NONE;
+
+	spin_unlock(&schan->chan_lock);
+
+	return ret;
+}
+
+static irqreturn_t chan_irqt(int irq, void *dev)
+{
+	struct shdma_chan *schan = dev;
+	const struct shdma_ops *ops =
+		to_shdma_dev(schan->dma_chan.device)->ops;
+	struct shdma_desc *sdesc;
+
+	spin_lock_irq(&schan->chan_lock);
+	list_for_each_entry(sdesc, &schan->ld_queue, node) {
+		if (sdesc->mark == DESC_SUBMITTED &&
+		    ops->desc_completed(schan, sdesc)) {
+			dev_dbg(schan->dev, "done #%d@%p\n",
+				sdesc->async_tx.cookie, &sdesc->async_tx);
+			sdesc->mark = DESC_COMPLETED;
+			break;
+		}
+	}
+	/* Next desc */
+	shdma_chan_xfer_ld_queue(schan);
+	spin_unlock_irq(&schan->chan_lock);
+
+	shdma_chan_ld_cleanup(schan, false);
+
+	return IRQ_HANDLED;
+}
+
+int shdma_request_irq(struct shdma_chan *schan, int irq,
+			   unsigned long flags, const char *name)
+{
+	int ret = request_threaded_irq(irq, chan_irq, chan_irqt,
+				       flags, name, schan);
+
+	schan->irq = ret < 0 ? ret : irq;
+
+	return ret;
+}
+EXPORT_SYMBOL(shdma_request_irq);
+
+void shdma_free_irq(struct shdma_chan *schan)
+{
+	if (schan->irq >= 0)
+		free_irq(schan->irq, schan);
+}
+EXPORT_SYMBOL(shdma_free_irq);
+
+void shdma_chan_probe(struct shdma_dev *sdev,
+			   struct shdma_chan *schan, int id)
+{
+	schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+	/* reference struct dma_device */
+	schan->dma_chan.device = &sdev->dma_dev;
+	dma_cookie_init(&schan->dma_chan);
+
+	schan->dev = sdev->dma_dev.dev;
+	schan->id = id;
+
+	if (!schan->max_xfer_len)
+		schan->max_xfer_len = PAGE_SIZE;
+
+	spin_lock_init(&schan->chan_lock);
+
+	/* Init descripter manage list */
+	INIT_LIST_HEAD(&schan->ld_queue);
+	INIT_LIST_HEAD(&schan->ld_free);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&schan->dma_chan.device_node,
+			&sdev->dma_dev.channels);
+	sdev->schan[sdev->dma_dev.chancnt++] = schan;
+}
+EXPORT_SYMBOL(shdma_chan_probe);
+
+void shdma_chan_remove(struct shdma_chan *schan)
+{
+	list_del(&schan->dma_chan.device_node);
+}
+EXPORT_SYMBOL(shdma_chan_remove);
+
+int shdma_init(struct device *dev, struct shdma_dev *sdev,
+		    int chan_num)
+{
+	struct dma_device *dma_dev = &sdev->dma_dev;
+
+	/*
+	 * Require all call-backs for now, they can trivially be made optional
+	 * later as required
+	 */
+	if (!sdev->ops ||
+	    !sdev->desc_size ||
+	    !sdev->ops->embedded_desc ||
+	    !sdev->ops->start_xfer ||
+	    !sdev->ops->setup_xfer ||
+	    !sdev->ops->set_slave ||
+	    !sdev->ops->desc_setup ||
+	    !sdev->ops->slave_addr ||
+	    !sdev->ops->channel_busy ||
+	    !sdev->ops->halt_channel ||
+	    !sdev->ops->desc_completed)
+		return -EINVAL;
+
+	sdev->schan = kcalloc(chan_num, sizeof(*sdev->schan), GFP_KERNEL);
+	if (!sdev->schan)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Common and MEMCPY operations */
+	dma_dev->device_alloc_chan_resources
+		= shdma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = shdma_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = shdma_prep_memcpy;
+	dma_dev->device_tx_status = shdma_tx_status;
+	dma_dev->device_issue_pending = shdma_issue_pending;
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_control = shdma_control;
+
+	dma_dev->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(shdma_init);
+
+void shdma_cleanup(struct shdma_dev *sdev)
+{
+	kfree(sdev->schan);
+}
+EXPORT_SYMBOL(shdma_cleanup);
+
+static int __init shdma_enter(void)
+{
+	shdma_slave_used = kzalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG) *
+				    sizeof(long), GFP_KERNEL);
+	if (!shdma_slave_used)
+		return -ENOMEM;
+	return 0;
+}
+module_init(shdma_enter);
+
+static void __exit shdma_exit(void)
+{
+	kfree(shdma_slave_used);
+}
+module_exit(shdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SH-DMA driver base library");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
diff --git a/drivers/dma/sh/shdma.c b/drivers/dma/sh/shdma.c
new file mode 100644
index 0000000000000000000000000000000000000000..027c9be976544edc7029431e04e5f9b8697503ed
--- /dev/null
+++ b/drivers/dma/sh/shdma.c
@@ -0,0 +1,943 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/spinlock.h>
+#include <linux/rculist.h>
+
+#include "../dmaengine.h"
+#include "shdma.h"
+
+#define SH_DMAE_DRV_NAME "sh-dma-engine"
+
+/* Default MEMCPY transfer size = 2^2 = 4 bytes */
+#define LOG2_DEFAULT_XFER_SIZE	2
+#define SH_DMA_SLAVE_NUMBER 256
+#define SH_DMA_TCR_MAX (16 * 1024 * 1024 - 1)
+
+/*
+ * Used for write-side mutual exclusion for the global device list,
+ * read-side synchronization by way of RCU, and per-controller data.
+ */
+static DEFINE_SPINLOCK(sh_dmae_lock);
+static LIST_HEAD(sh_dmae_devices);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, shdev->chan_reg +
+		     shdev->pdata->channel[sh_dc->shdma_chan.id].chclr_offset);
+}
+
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+	__raw_writel(data, sh_dc->base + reg / sizeof(u32));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+	return __raw_readl(sh_dc->base + reg / sizeof(u32));
+}
+
+static u16 dmaor_read(struct sh_dmae_device *shdev)
+{
+	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+	if (shdev->pdata->dmaor_is_32bit)
+		return __raw_readl(addr);
+	else
+		return __raw_readw(addr);
+}
+
+static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
+{
+	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+
+	if (shdev->pdata->dmaor_is_32bit)
+		__raw_writel(data, addr);
+	else
+		__raw_writew(data, addr);
+}
+
+static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32));
+}
+
+static u32 chcr_read(struct sh_dmae_chan *sh_dc)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32));
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev);
+	dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+}
+
+static int sh_dmae_rst(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				chclr_write(sh_chan, 0);
+		}
+	}
+
+	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+	dmaor = dmaor_read(shdev);
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+	if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+		dev_warn(shdev->shdma_dev.dma_dev.dev, "Can't initialize DMAOR.\n");
+		return -EIO;
+	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->shdma_dev.dma_dev.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
+	return 0;
+}
+
+static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
+{
+	u32 chcr = chcr_read(sh_chan);
+
+	if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
+		return true; /* working */
+
+	return false; /* waiting */
+}
+
+static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
+		((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
+
+	if (cnt >= pdata->ts_shift_num)
+		cnt = 0;
+
+	return pdata->ts_shift[cnt];
+}
+
+static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	int i;
+
+	for (i = 0; i < pdata->ts_shift_num; i++)
+		if (pdata->ts_shift[i] == l2size)
+			break;
+
+	if (i == pdata->ts_shift_num)
+		i = 0;
+
+	return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
+		((i << pdata->ts_high_shift) & pdata->ts_high_mask);
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+	sh_dmae_writel(sh_chan, hw->sar, SAR);
+	sh_dmae_writel(sh_chan, hw->dar, DAR);
+	sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	if (shdev->pdata->needs_tend_set)
+		sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
+
+	chcr |= CHCR_DE | shdev->chcr_ie_bit;
+	chcr_write(sh_chan, chcr & ~CHCR_TE);
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+	/*
+	 * Default configuration for dual address memory-memory transfer.
+	 * 0x400 represents auto-request.
+	 */
+	u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan,
+						   LOG2_DEFAULT_XFER_SIZE);
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
+	chcr_write(sh_chan, chcr);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+	/* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
+	chcr_write(sh_chan, val);
+
+	return 0;
+}
+
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->shdma_chan.id];
+	u16 __iomem *addr = shdev->dmars;
+	unsigned int shift = chan_pdata->dmars_bit;
+
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	if (pdata->no_dmars)
+		return 0;
+
+	/* in the case of a missing DMARS resource use first memory window */
+	if (!addr)
+		addr = (u16 __iomem *)shdev->chan_reg;
+	addr += chan_pdata->dmars / sizeof(u16);
+
+	__raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
+		     addr);
+
+	return 0;
+}
+
+static void sh_dmae_start_xfer(struct shdma_chan *schan,
+			       struct shdma_desc *sdesc)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+	dev_dbg(sh_chan->shdma_chan.dev, "Queue #%d to %d: %u@%x -> %x\n",
+		sdesc->async_tx.cookie, sh_chan->shdma_chan.id,
+		sh_desc->hw.tcr, sh_desc->hw.sar, sh_desc->hw.dar);
+	/* Get the ld start address from ld_queue */
+	dmae_set_reg(sh_chan, &sh_desc->hw);
+	dmae_start(sh_chan);
+}
+
+static bool sh_dmae_channel_busy(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	return dmae_is_busy(sh_chan);
+}
+
+static void sh_dmae_setup_xfer(struct shdma_chan *schan,
+			       int slave_id)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+
+	if (slave_id >= 0) {
+		const struct sh_dmae_slave_config *cfg =
+			sh_chan->config;
+
+		dmae_set_dmars(sh_chan, cfg->mid_rid);
+		dmae_set_chcr(sh_chan, cfg->chcr);
+	} else {
+		dmae_init(sh_chan);
+	}
+}
+
+static const struct sh_dmae_slave_config *dmae_find_slave(
+	struct sh_dmae_chan *sh_chan, int slave_id)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_slave_config *cfg;
+	int i;
+
+	if (slave_id >= SH_DMA_SLAVE_NUMBER)
+		return NULL;
+
+	for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+		if (cfg->slave_id == slave_id)
+			return cfg;
+
+	return NULL;
+}
+
+static int sh_dmae_set_slave(struct shdma_chan *schan,
+			     int slave_id, bool try)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	const struct sh_dmae_slave_config *cfg = dmae_find_slave(sh_chan, slave_id);
+	if (!cfg)
+		return -ENODEV;
+
+	if (!try)
+		sh_chan->config = cfg;
+
+	return 0;
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
+	chcr_write(sh_chan, chcr);
+}
+
+static int sh_dmae_desc_setup(struct shdma_chan *schan,
+			      struct shdma_desc *sdesc,
+			      dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+
+	if (*len > schan->max_xfer_len)
+		*len = schan->max_xfer_len;
+
+	sh_desc->hw.sar = src;
+	sh_desc->hw.dar = dst;
+	sh_desc->hw.tcr = *len;
+
+	return 0;
+}
+
+static void sh_dmae_halt(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	dmae_halt(sh_chan);
+}
+
+static bool sh_dmae_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+
+	if (!(chcr_read(sh_chan) & CHCR_TE))
+		return false;
+
+	/* DMA stop */
+	dmae_halt(sh_chan);
+
+	return true;
+}
+
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
+{
+	bool ret;
+
+	/* halt the dma controller */
+	sh_dmae_ctl_stop(shdev);
+
+	/* We cannot detect, which channel caused the error, have to reset all */
+	ret = shdma_reset(&shdev->shdma_dev);
+
+	sh_dmae_rst(shdev);
+
+	return ret;
+}
+
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+	struct sh_dmae_device *shdev = data;
+
+	if (!(dmaor_read(shdev) & DMAOR_AE))
+		return IRQ_NONE;
+
+	sh_dmae_reset(shdev);
+	return IRQ_HANDLED;
+}
+
+static bool sh_dmae_desc_completed(struct shdma_chan *schan,
+				   struct shdma_desc *sdesc)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan,
+					struct sh_dmae_chan, shdma_chan);
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+	u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+	u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
+
+	return	(sdesc->direction == DMA_DEV_TO_MEM &&
+		 (sh_desc->hw.dar + sh_desc->hw.tcr) == dar_buf) ||
+		(sdesc->direction != DMA_DEV_TO_MEM &&
+		 (sh_desc->hw.sar + sh_desc->hw.tcr) == sar_buf);
+}
+
+static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
+{
+	/* Fast path out if NMIF is not asserted for this controller */
+	if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
+		return false;
+
+	return sh_dmae_reset(shdev);
+}
+
+static int sh_dmae_nmi_handler(struct notifier_block *self,
+			       unsigned long cmd, void *data)
+{
+	struct sh_dmae_device *shdev;
+	int ret = NOTIFY_DONE;
+	bool triggered;
+
+	/*
+	 * Only concern ourselves with NMI events.
+	 *
+	 * Normally we would check the die chain value, but as this needs
+	 * to be architecture independent, check for NMI context instead.
+	 */
+	if (!in_nmi())
+		return NOTIFY_DONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
+		/*
+		 * Only stop if one of the controllers has NMIF asserted,
+		 * we do not want to interfere with regular address error
+		 * handling or NMI events that don't concern the DMACs.
+		 */
+		triggered = sh_dmae_nmi_notify(shdev);
+		if (triggered == true)
+			ret = NOTIFY_OK;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
+	.notifier_call	= sh_dmae_nmi_handler,
+
+	/* Run before NMI debug handler and KGDB */
+	.priority	= 1,
+};
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
+					int irq, unsigned long flags)
+{
+	const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+	struct shdma_dev *sdev = &shdev->shdma_dev;
+	struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev);
+	struct sh_dmae_chan *sh_chan;
+	struct shdma_chan *schan;
+	int err;
+
+	sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+	if (!sh_chan) {
+		dev_err(sdev->dma_dev.dev,
+			"No free memory for allocating dma channels!\n");
+		return -ENOMEM;
+	}
+
+	schan = &sh_chan->shdma_chan;
+	schan->max_xfer_len = SH_DMA_TCR_MAX + 1;
+
+	shdma_chan_probe(sdev, schan, id);
+
+	sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32);
+
+	/* set up channel irq */
+	if (pdev->id >= 0)
+		snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+			 "sh-dmae%d.%d", pdev->id, id);
+	else
+		snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+			 "sh-dma%d", id);
+
+	err = shdma_request_irq(schan, irq, flags, sh_chan->dev_id);
+	if (err) {
+		dev_err(sdev->dma_dev.dev,
+			"DMA channel %d request_irq error %d\n",
+			id, err);
+		goto err_no_irq;
+	}
+
+	shdev->chan[id] = sh_chan;
+	return 0;
+
+err_no_irq:
+	/* remove from dmaengine device node */
+	shdma_chan_remove(schan);
+	kfree(sh_chan);
+	return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &shdev->shdma_dev, i) {
+		struct sh_dmae_chan *sh_chan = container_of(schan,
+					struct sh_dmae_chan, shdma_chan);
+		BUG_ON(!schan);
+
+		shdma_free_irq(&sh_chan->shdma_chan);
+
+		shdma_chan_remove(schan);
+		kfree(sh_chan);
+	}
+	dma_dev->chancnt = 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+	sh_dmae_ctl_stop(shdev);
+}
+
+static int sh_dmae_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int sh_dmae_runtime_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+	return sh_dmae_rst(shdev);
+}
+
+#ifdef CONFIG_PM
+static int sh_dmae_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int sh_dmae_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
+
+	for (i = 0; i < shdev->pdata->channel_num; i++) {
+		struct sh_dmae_chan *sh_chan = shdev->chan[i];
+
+		if (!sh_chan->shdma_chan.desc_num)
+			continue;
+
+		if (sh_chan->shdma_chan.slave_id >= 0) {
+			const struct sh_dmae_slave_config *cfg = sh_chan->config;
+			dmae_set_dmars(sh_chan, cfg->mid_rid);
+			dmae_set_chcr(sh_chan, cfg->chcr);
+		} else {
+			dmae_init(sh_chan);
+		}
+	}
+
+	return 0;
+}
+#else
+#define sh_dmae_suspend NULL
+#define sh_dmae_resume NULL
+#endif
+
+const struct dev_pm_ops sh_dmae_pm = {
+	.suspend		= sh_dmae_suspend,
+	.resume			= sh_dmae_resume,
+	.runtime_suspend	= sh_dmae_runtime_suspend,
+	.runtime_resume		= sh_dmae_runtime_resume,
+};
+
+static dma_addr_t sh_dmae_slave_addr(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan,
+					struct sh_dmae_chan, shdma_chan);
+
+	/*
+	 * Implicit BUG_ON(!sh_chan->config)
+	 * This is an exclusive slave DMA operation, may only be called after a
+	 * successful slave configuration.
+	 */
+	return sh_chan->config->addr;
+}
+
+static struct shdma_desc *sh_dmae_embedded_desc(void *buf, int i)
+{
+	return &((struct sh_dmae_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops sh_dmae_shdma_ops = {
+	.desc_completed = sh_dmae_desc_completed,
+	.halt_channel = sh_dmae_halt,
+	.channel_busy = sh_dmae_channel_busy,
+	.slave_addr = sh_dmae_slave_addr,
+	.desc_setup = sh_dmae_desc_setup,
+	.set_slave = sh_dmae_set_slave,
+	.setup_xfer = sh_dmae_setup_xfer,
+	.start_xfer = sh_dmae_start_xfer,
+	.embedded_desc = sh_dmae_embedded_desc,
+	.chan_irq = sh_dmae_chan_irq,
+};
+
+static int __devinit sh_dmae_probe(struct platform_device *pdev)
+{
+	struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
+	unsigned long irqflags = IRQF_DISABLED,
+		chan_flag[SH_DMAE_MAX_CHANNELS] = {};
+	int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
+	int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
+	struct sh_dmae_device *shdev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *dmars, *errirq_res, *chanirq_res;
+
+	/* get platform data */
+	if (!pdata || !pdata->channel_num)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* DMARS area is optional */
+	dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	/*
+	 * IRQ resources:
+	 * 1. there always must be at least one IRQ IO-resource. On SH4 it is
+	 *    the error IRQ, in which case it is the only IRQ in this resource:
+	 *    start == end. If it is the only IRQ resource, all channels also
+	 *    use the same IRQ.
+	 * 2. DMA channel IRQ resources can be specified one per resource or in
+	 *    ranges (start != end)
+	 * 3. iff all events (channels and, optionally, error) on this
+	 *    controller use the same IRQ, only one IRQ resource can be
+	 *    specified, otherwise there must be one IRQ per channel, even if
+	 *    some of them are equal
+	 * 4. if all IRQs on this controller are equal or if some specific IRQs
+	 *    specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
+	 *    requested with the IRQF_SHARED flag
+	 */
+	errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!chan || !errirq_res)
+		return -ENODEV;
+
+	if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) {
+		dev_err(&pdev->dev, "DMAC register region already claimed\n");
+		return -EBUSY;
+	}
+
+	if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) {
+		dev_err(&pdev->dev, "DMAC DMARS region already claimed\n");
+		err = -EBUSY;
+		goto ermrdmars;
+	}
+
+	err = -ENOMEM;
+	shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+	if (!shdev) {
+		dev_err(&pdev->dev, "Not enough memory\n");
+		goto ealloc;
+	}
+
+	dma_dev = &shdev->shdma_dev.dma_dev;
+
+	shdev->chan_reg = ioremap(chan->start, resource_size(chan));
+	if (!shdev->chan_reg)
+		goto emapchan;
+	if (dmars) {
+		shdev->dmars = ioremap(dmars->start, resource_size(dmars));
+		if (!shdev->dmars)
+			goto emapdmars;
+	}
+
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	if (pdata->slave && pdata->slave_num)
+		dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	/* Default transfer size of 32 bytes requires 32-byte alignment */
+	dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE;
+
+	shdev->shdma_dev.ops = &sh_dmae_shdma_ops;
+	shdev->shdma_dev.desc_size = sizeof(struct sh_dmae_desc);
+	err = shdma_init(&pdev->dev, &shdev->shdma_dev,
+			      pdata->channel_num);
+	if (err < 0)
+		goto eshdma;
+
+	/* platform data */
+	shdev->pdata = pdev->dev.platform_data;
+
+	if (pdata->chcr_offset)
+		shdev->chcr_offset = pdata->chcr_offset;
+	else
+		shdev->chcr_offset = CHCR;
+
+	if (pdata->chcr_ie_bit)
+		shdev->chcr_ie_bit = pdata->chcr_ie_bit;
+	else
+		shdev->chcr_ie_bit = CHCR_IE;
+
+	platform_set_drvdata(pdev, shdev);
+
+	pm_runtime_enable(&pdev->dev);
+	err = pm_runtime_get_sync(&pdev->dev);
+	if (err < 0)
+		dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	/* reset dma controller - only needed as a test */
+	err = sh_dmae_rst(shdev);
+	if (err)
+		goto rst_err;
+
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+	chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+
+	if (!chanirq_res)
+		chanirq_res = errirq_res;
+	else
+		irqres++;
+
+	if (chanirq_res == errirq_res ||
+	    (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
+		irqflags = IRQF_SHARED;
+
+	errirq = errirq_res->start;
+
+	err = request_irq(errirq, sh_dmae_err, irqflags,
+			  "DMAC Address Error", shdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"DMA failed requesting irq #%d, error %d\n",
+			errirq, err);
+		goto eirq_err;
+	}
+
+#else
+	chanirq_res = errirq_res;
+#endif /* CONFIG_CPU_SH4 || CONFIG_ARCH_SHMOBILE */
+
+	if (chanirq_res->start == chanirq_res->end &&
+	    !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
+		/* Special case - all multiplexed */
+		for (; irq_cnt < pdata->channel_num; irq_cnt++) {
+			if (irq_cnt < SH_DMAE_MAX_CHANNELS) {
+				chan_irq[irq_cnt] = chanirq_res->start;
+				chan_flag[irq_cnt] = IRQF_SHARED;
+			} else {
+				irq_cap = 1;
+				break;
+			}
+		}
+	} else {
+		do {
+			for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
+				if (irq_cnt >= SH_DMAE_MAX_CHANNELS) {
+					irq_cap = 1;
+					break;
+				}
+
+				if ((errirq_res->flags & IORESOURCE_BITS) ==
+				    IORESOURCE_IRQ_SHAREABLE)
+					chan_flag[irq_cnt] = IRQF_SHARED;
+				else
+					chan_flag[irq_cnt] = IRQF_DISABLED;
+				dev_dbg(&pdev->dev,
+					"Found IRQ %d for channel %d\n",
+					i, irq_cnt);
+				chan_irq[irq_cnt++] = i;
+			}
+
+			if (irq_cnt >= SH_DMAE_MAX_CHANNELS)
+				break;
+
+			chanirq_res = platform_get_resource(pdev,
+						IORESOURCE_IRQ, ++irqres);
+		} while (irq_cnt < pdata->channel_num && chanirq_res);
+	}
+
+	/* Create DMA Channel */
+	for (i = 0; i < irq_cnt; i++) {
+		err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
+		if (err)
+			goto chan_probe_err;
+	}
+
+	if (irq_cap)
+		dev_notice(&pdev->dev, "Attempting to register %d DMA "
+			   "channels when a maximum of %d are supported.\n",
+			   pdata->channel_num, SH_DMAE_MAX_CHANNELS);
+
+	pm_runtime_put(&pdev->dev);
+
+	err = dma_async_device_register(&shdev->shdma_dev.dma_dev);
+	if (err < 0)
+		goto edmadevreg;
+
+	return err;
+
+edmadevreg:
+	pm_runtime_get(&pdev->dev);
+
+chan_probe_err:
+	sh_dmae_chan_remove(shdev);
+
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
+	free_irq(errirq, shdev);
+eirq_err:
+#endif
+rst_err:
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	platform_set_drvdata(pdev, NULL);
+	shdma_cleanup(&shdev->shdma_dev);
+eshdma:
+	if (dmars)
+		iounmap(shdev->dmars);
+emapdmars:
+	iounmap(shdev->chan_reg);
+	synchronize_rcu();
+emapchan:
+	kfree(shdev);
+ealloc:
+	if (dmars)
+		release_mem_region(dmars->start, resource_size(dmars));
+ermrdmars:
+	release_mem_region(chan->start, resource_size(chan));
+
+	return err;
+}
+
+static int __devexit sh_dmae_remove(struct platform_device *pdev)
+{
+	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
+	struct resource *res;
+	int errirq = platform_get_irq(pdev, 0);
+
+	dma_async_device_unregister(dma_dev);
+
+	if (errirq > 0)
+		free_irq(errirq, shdev);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	pm_runtime_disable(&pdev->dev);
+
+	sh_dmae_chan_remove(shdev);
+	shdma_cleanup(&shdev->shdma_dev);
+
+	if (shdev->dmars)
+		iounmap(shdev->dmars);
+	iounmap(shdev->chan_reg);
+
+	platform_set_drvdata(pdev, NULL);
+
+	synchronize_rcu();
+	kfree(shdev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res)
+		release_mem_region(res->start, resource_size(res));
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (res)
+		release_mem_region(res->start, resource_size(res));
+
+	return 0;
+}
+
+static struct platform_driver sh_dmae_driver = {
+	.driver 	= {
+		.owner	= THIS_MODULE,
+		.pm	= &sh_dmae_pm,
+		.name	= SH_DMAE_DRV_NAME,
+	},
+	.remove		= __devexit_p(sh_dmae_remove),
+	.shutdown	= sh_dmae_shutdown,
+};
+
+static int __init sh_dmae_init(void)
+{
+	/* Wire up NMI handling */
+	int err = register_die_notifier(&sh_dmae_nmi_notifier);
+	if (err)
+		return err;
+
+	return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+	platform_driver_unregister(&sh_dmae_driver);
+
+	unregister_die_notifier(&sh_dmae_nmi_notifier);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" SH_DMAE_DRV_NAME);
diff --git a/drivers/dma/shdma.h b/drivers/dma/sh/shdma.h
similarity index 58%
rename from drivers/dma/shdma.h
rename to drivers/dma/sh/shdma.h
index 0b1d2c105f027c5a45de636bef665f321bb46968..9314e93225db73ca7cf9dadc9c336c1d828ce105 100644
--- a/drivers/dma/shdma.h
+++ b/drivers/dma/sh/shdma.h
@@ -13,42 +13,29 @@
 #ifndef __DMA_SHDMA_H
 #define __DMA_SHDMA_H
 
+#include <linux/sh_dma.h>
+#include <linux/shdma-base.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 
-#define SH_DMAC_MAX_CHANNELS 20
-#define SH_DMA_SLAVE_NUMBER 256
-#define SH_DMA_TCR_MAX 0x00FFFFFF	/* 16MB */
+#define SH_DMAE_MAX_CHANNELS 20
+#define SH_DMAE_TCR_MAX 0x00FFFFFF	/* 16MB */
 
 struct device;
 
-enum dmae_pm_state {
-	DMAE_PM_ESTABLISHED,
-	DMAE_PM_BUSY,
-	DMAE_PM_PENDING,
-};
-
 struct sh_dmae_chan {
-	spinlock_t desc_lock;		/* Descriptor operation lock */
-	struct list_head ld_queue;	/* Link descriptors queue */
-	struct list_head ld_free;	/* Link descriptors free */
-	struct dma_chan common;		/* DMA common channel */
-	struct device *dev;		/* Channel device */
-	struct tasklet_struct tasklet;	/* Tasklet */
-	int descs_allocated;		/* desc count */
+	struct shdma_chan shdma_chan;
+	const struct sh_dmae_slave_config *config; /* Slave DMA configuration */
 	int xmit_shift;			/* log_2(bytes_per_xfer) */
-	int irq;
-	int id;				/* Raw id of this channel */
 	u32 __iomem *base;
 	char dev_id[16];		/* unique name per DMAC of channel */
 	int pm_error;
-	enum dmae_pm_state pm_state;
 };
 
 struct sh_dmae_device {
-	struct dma_device common;
-	struct sh_dmae_chan *chan[SH_DMAC_MAX_CHANNELS];
+	struct shdma_dev shdma_dev;
+	struct sh_dmae_chan *chan[SH_DMAE_MAX_CHANNELS];
 	struct sh_dmae_pdata *pdata;
 	struct list_head node;
 	u32 __iomem *chan_reg;
@@ -57,10 +44,21 @@ struct sh_dmae_device {
 	u32 chcr_ie_bit;
 };
 
-#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+struct sh_dmae_regs {
+	u32 sar; /* SAR / source address */
+	u32 dar; /* DAR / destination address */
+	u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_dmae_desc {
+	struct sh_dmae_regs hw;
+	struct shdma_desc shdma_desc;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, shdma_chan)
 #define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
 #define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
-#define to_sh_dev(chan) container_of(chan->common.device,\
-				     struct sh_dmae_device, common)
+#define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\
+				     struct sh_dmae_device, shdma_dev.dma_dev)
 
 #endif	/* __DMA_SHDMA_H */
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
deleted file mode 100644
index 19d7a8d3975dc0a1a6c864eba39257fd41cf2ccf..0000000000000000000000000000000000000000
--- a/drivers/dma/shdma.c
+++ /dev/null
@@ -1,1524 +0,0 @@
-/*
- * Renesas SuperH DMA Engine support
- *
- * base is drivers/dma/flsdma.c
- *
- * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
- * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * - DMA of SuperH does not have Hardware DMA chain mode.
- * - MAX DMA size is 16MB.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
-#include <linux/sh_dma.h>
-#include <linux/notifier.h>
-#include <linux/kdebug.h>
-#include <linux/spinlock.h>
-#include <linux/rculist.h>
-
-#include "dmaengine.h"
-#include "shdma.h"
-
-/* DMA descriptor control */
-enum sh_dmae_desc_status {
-	DESC_IDLE,
-	DESC_PREPARED,
-	DESC_SUBMITTED,
-	DESC_COMPLETED,	/* completed, have to call callback */
-	DESC_WAITING,	/* callback called, waiting for ack / re-submit */
-};
-
-#define NR_DESCS_PER_CHANNEL 32
-/* Default MEMCPY transfer size = 2^2 = 4 bytes */
-#define LOG2_DEFAULT_XFER_SIZE	2
-
-/*
- * Used for write-side mutual exclusion for the global device list,
- * read-side synchronization by way of RCU, and per-controller data.
- */
-static DEFINE_SPINLOCK(sh_dmae_lock);
-static LIST_HEAD(sh_dmae_devices);
-
-/* A bitmask with bits enough for enum sh_dmae_slave_chan_id */
-static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
-
-static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
-
-static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
-
-	__raw_writel(data, shdev->chan_reg +
-		     shdev->pdata->channel[sh_dc->id].chclr_offset);
-}
-
-static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
-{
-	__raw_writel(data, sh_dc->base + reg / sizeof(u32));
-}
-
-static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
-{
-	return __raw_readl(sh_dc->base + reg / sizeof(u32));
-}
-
-static u16 dmaor_read(struct sh_dmae_device *shdev)
-{
-	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
-
-	if (shdev->pdata->dmaor_is_32bit)
-		return __raw_readl(addr);
-	else
-		return __raw_readw(addr);
-}
-
-static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
-{
-	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
-
-	if (shdev->pdata->dmaor_is_32bit)
-		__raw_writel(data, addr);
-	else
-		__raw_writew(data, addr);
-}
-
-static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
-
-	__raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32));
-}
-
-static u32 chcr_read(struct sh_dmae_chan *sh_dc)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
-
-	return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32));
-}
-
-/*
- * Reset DMA controller
- *
- * SH7780 has two DMAOR register
- */
-static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
-{
-	unsigned short dmaor;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sh_dmae_lock, flags);
-
-	dmaor = dmaor_read(shdev);
-	dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
-
-	spin_unlock_irqrestore(&sh_dmae_lock, flags);
-}
-
-static int sh_dmae_rst(struct sh_dmae_device *shdev)
-{
-	unsigned short dmaor;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sh_dmae_lock, flags);
-
-	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
-
-	if (shdev->pdata->chclr_present) {
-		int i;
-		for (i = 0; i < shdev->pdata->channel_num; i++) {
-			struct sh_dmae_chan *sh_chan = shdev->chan[i];
-			if (sh_chan)
-				chclr_write(sh_chan, 0);
-		}
-	}
-
-	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
-
-	dmaor = dmaor_read(shdev);
-
-	spin_unlock_irqrestore(&sh_dmae_lock, flags);
-
-	if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
-		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
-		return -EIO;
-	}
-	if (shdev->pdata->dmaor_init & ~dmaor)
-		dev_warn(shdev->common.dev,
-			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
-			 dmaor, shdev->pdata->dmaor_init);
-	return 0;
-}
-
-static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
-{
-	u32 chcr = chcr_read(sh_chan);
-
-	if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
-		return true; /* working */
-
-	return false; /* waiting */
-}
-
-static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
-	int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
-		((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
-
-	if (cnt >= pdata->ts_shift_num)
-		cnt = 0;
-
-	return pdata->ts_shift[cnt];
-}
-
-static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
-	int i;
-
-	for (i = 0; i < pdata->ts_shift_num; i++)
-		if (pdata->ts_shift[i] == l2size)
-			break;
-
-	if (i == pdata->ts_shift_num)
-		i = 0;
-
-	return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
-		((i << pdata->ts_high_shift) & pdata->ts_high_mask);
-}
-
-static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
-{
-	sh_dmae_writel(sh_chan, hw->sar, SAR);
-	sh_dmae_writel(sh_chan, hw->dar, DAR);
-	sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
-}
-
-static void dmae_start(struct sh_dmae_chan *sh_chan)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	u32 chcr = chcr_read(sh_chan);
-
-	if (shdev->pdata->needs_tend_set)
-		sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
-
-	chcr |= CHCR_DE | shdev->chcr_ie_bit;
-	chcr_write(sh_chan, chcr & ~CHCR_TE);
-}
-
-static void dmae_halt(struct sh_dmae_chan *sh_chan)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	u32 chcr = chcr_read(sh_chan);
-
-	chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
-	chcr_write(sh_chan, chcr);
-}
-
-static void dmae_init(struct sh_dmae_chan *sh_chan)
-{
-	/*
-	 * Default configuration for dual address memory-memory transfer.
-	 * 0x400 represents auto-request.
-	 */
-	u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan,
-						   LOG2_DEFAULT_XFER_SIZE);
-	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
-	chcr_write(sh_chan, chcr);
-}
-
-static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
-{
-	/* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
-	if (dmae_is_busy(sh_chan))
-		return -EBUSY;
-
-	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
-	chcr_write(sh_chan, val);
-
-	return 0;
-}
-
-static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
-	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id];
-	u16 __iomem *addr = shdev->dmars;
-	unsigned int shift = chan_pdata->dmars_bit;
-
-	if (dmae_is_busy(sh_chan))
-		return -EBUSY;
-
-	if (pdata->no_dmars)
-		return 0;
-
-	/* in the case of a missing DMARS resource use first memory window */
-	if (!addr)
-		addr = (u16 __iomem *)shdev->chan_reg;
-	addr += chan_pdata->dmars / sizeof(u16);
-
-	__raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
-		     addr);
-
-	return 0;
-}
-
-static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
-	struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
-	struct sh_dmae_slave *param = tx->chan->private;
-	dma_async_tx_callback callback = tx->callback;
-	dma_cookie_t cookie;
-	bool power_up;
-
-	spin_lock_irq(&sh_chan->desc_lock);
-
-	if (list_empty(&sh_chan->ld_queue))
-		power_up = true;
-	else
-		power_up = false;
-
-	cookie = dma_cookie_assign(tx);
-
-	/* Mark all chunks of this descriptor as submitted, move to the queue */
-	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
-		/*
-		 * All chunks are on the global ld_free, so, we have to find
-		 * the end of the chain ourselves
-		 */
-		if (chunk != desc && (chunk->mark == DESC_IDLE ||
-				      chunk->async_tx.cookie > 0 ||
-				      chunk->async_tx.cookie == -EBUSY ||
-				      &chunk->node == &sh_chan->ld_free))
-			break;
-		chunk->mark = DESC_SUBMITTED;
-		/* Callback goes to the last chunk */
-		chunk->async_tx.callback = NULL;
-		chunk->cookie = cookie;
-		list_move_tail(&chunk->node, &sh_chan->ld_queue);
-		last = chunk;
-	}
-
-	last->async_tx.callback = callback;
-	last->async_tx.callback_param = tx->callback_param;
-
-	dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n",
-		tx->cookie, &last->async_tx, sh_chan->id,
-		desc->hw.sar, desc->hw.tcr, desc->hw.dar);
-
-	if (power_up) {
-		sh_chan->pm_state = DMAE_PM_BUSY;
-
-		pm_runtime_get(sh_chan->dev);
-
-		spin_unlock_irq(&sh_chan->desc_lock);
-
-		pm_runtime_barrier(sh_chan->dev);
-
-		spin_lock_irq(&sh_chan->desc_lock);
-
-		/* Have we been reset, while waiting? */
-		if (sh_chan->pm_state != DMAE_PM_ESTABLISHED) {
-			dev_dbg(sh_chan->dev, "Bring up channel %d\n",
-				sh_chan->id);
-			if (param) {
-				const struct sh_dmae_slave_config *cfg =
-					param->config;
-
-				dmae_set_dmars(sh_chan, cfg->mid_rid);
-				dmae_set_chcr(sh_chan, cfg->chcr);
-			} else {
-				dmae_init(sh_chan);
-			}
-
-			if (sh_chan->pm_state == DMAE_PM_PENDING)
-				sh_chan_xfer_ld_queue(sh_chan);
-			sh_chan->pm_state = DMAE_PM_ESTABLISHED;
-		}
-	} else {
-		sh_chan->pm_state = DMAE_PM_PENDING;
-	}
-
-	spin_unlock_irq(&sh_chan->desc_lock);
-
-	return cookie;
-}
-
-/* Called with desc_lock held */
-static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
-{
-	struct sh_desc *desc;
-
-	list_for_each_entry(desc, &sh_chan->ld_free, node)
-		if (desc->mark != DESC_PREPARED) {
-			BUG_ON(desc->mark != DESC_IDLE);
-			list_del(&desc->node);
-			return desc;
-		}
-
-	return NULL;
-}
-
-static const struct sh_dmae_slave_config *sh_dmae_find_slave(
-	struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param)
-{
-	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
-	int i;
-
-	if (param->slave_id >= SH_DMA_SLAVE_NUMBER)
-		return NULL;
-
-	for (i = 0; i < pdata->slave_num; i++)
-		if (pdata->slave[i].slave_id == param->slave_id)
-			return pdata->slave + i;
-
-	return NULL;
-}
-
-static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
-{
-	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	struct sh_desc *desc;
-	struct sh_dmae_slave *param = chan->private;
-	int ret;
-
-	/*
-	 * This relies on the guarantee from dmaengine that alloc_chan_resources
-	 * never runs concurrently with itself or free_chan_resources.
-	 */
-	if (param) {
-		const struct sh_dmae_slave_config *cfg;
-
-		cfg = sh_dmae_find_slave(sh_chan, param);
-		if (!cfg) {
-			ret = -EINVAL;
-			goto efindslave;
-		}
-
-		if (test_and_set_bit(param->slave_id, sh_dmae_slave_used)) {
-			ret = -EBUSY;
-			goto etestused;
-		}
-
-		param->config = cfg;
-	}
-
-	while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
-		desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
-		if (!desc)
-			break;
-		dma_async_tx_descriptor_init(&desc->async_tx,
-					&sh_chan->common);
-		desc->async_tx.tx_submit = sh_dmae_tx_submit;
-		desc->mark = DESC_IDLE;
-
-		list_add(&desc->node, &sh_chan->ld_free);
-		sh_chan->descs_allocated++;
-	}
-
-	if (!sh_chan->descs_allocated) {
-		ret = -ENOMEM;
-		goto edescalloc;
-	}
-
-	return sh_chan->descs_allocated;
-
-edescalloc:
-	if (param)
-		clear_bit(param->slave_id, sh_dmae_slave_used);
-etestused:
-efindslave:
-	chan->private = NULL;
-	return ret;
-}
-
-/*
- * sh_dma_free_chan_resources - Free all resources of the channel.
- */
-static void sh_dmae_free_chan_resources(struct dma_chan *chan)
-{
-	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	struct sh_desc *desc, *_desc;
-	LIST_HEAD(list);
-
-	/* Protect against ISR */
-	spin_lock_irq(&sh_chan->desc_lock);
-	dmae_halt(sh_chan);
-	spin_unlock_irq(&sh_chan->desc_lock);
-
-	/* Now no new interrupts will occur */
-
-	/* Prepared and not submitted descriptors can still be on the queue */
-	if (!list_empty(&sh_chan->ld_queue))
-		sh_dmae_chan_ld_cleanup(sh_chan, true);
-
-	if (chan->private) {
-		/* The caller is holding dma_list_mutex */
-		struct sh_dmae_slave *param = chan->private;
-		clear_bit(param->slave_id, sh_dmae_slave_used);
-		chan->private = NULL;
-	}
-
-	spin_lock_irq(&sh_chan->desc_lock);
-
-	list_splice_init(&sh_chan->ld_free, &list);
-	sh_chan->descs_allocated = 0;
-
-	spin_unlock_irq(&sh_chan->desc_lock);
-
-	list_for_each_entry_safe(desc, _desc, &list, node)
-		kfree(desc);
-}
-
-/**
- * sh_dmae_add_desc - get, set up and return one transfer descriptor
- * @sh_chan:	DMA channel
- * @flags:	DMA transfer flags
- * @dest:	destination DMA address, incremented when direction equals
- *		DMA_DEV_TO_MEM
- * @src:	source DMA address, incremented when direction equals
- *		DMA_MEM_TO_DEV
- * @len:	DMA transfer length
- * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
- * @direction:	needed for slave DMA to decide which address to keep constant,
- *		equals DMA_MEM_TO_MEM for MEMCPY
- * Returns 0 or an error
- * Locks: called with desc_lock held
- */
-static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
-	unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
-	struct sh_desc **first, enum dma_transfer_direction direction)
-{
-	struct sh_desc *new;
-	size_t copy_size;
-
-	if (!*len)
-		return NULL;
-
-	/* Allocate the link descriptor from the free list */
-	new = sh_dmae_get_desc(sh_chan);
-	if (!new) {
-		dev_err(sh_chan->dev, "No free link descriptor available\n");
-		return NULL;
-	}
-
-	copy_size = min(*len, (size_t)SH_DMA_TCR_MAX + 1);
-
-	new->hw.sar = *src;
-	new->hw.dar = *dest;
-	new->hw.tcr = copy_size;
-
-	if (!*first) {
-		/* First desc */
-		new->async_tx.cookie = -EBUSY;
-		*first = new;
-	} else {
-		/* Other desc - invisible to the user */
-		new->async_tx.cookie = -EINVAL;
-	}
-
-	dev_dbg(sh_chan->dev,
-		"chaining (%u/%u)@%x -> %x with %p, cookie %d, shift %d\n",
-		copy_size, *len, *src, *dest, &new->async_tx,
-		new->async_tx.cookie, sh_chan->xmit_shift);
-
-	new->mark = DESC_PREPARED;
-	new->async_tx.flags = flags;
-	new->direction = direction;
-
-	*len -= copy_size;
-	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
-		*src += copy_size;
-	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
-		*dest += copy_size;
-
-	return new;
-}
-
-/*
- * sh_dmae_prep_sg - prepare transfer descriptors from an SG list
- *
- * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
- * converted to scatter-gather to guarantee consistent locking and a correct
- * list manipulation. For slave DMA direction carries the usual meaning, and,
- * logically, the SG list is RAM and the addr variable contains slave address,
- * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
- * and the SG list contains only one element and points at the source buffer.
- */
-static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
-	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_transfer_direction direction, unsigned long flags)
-{
-	struct scatterlist *sg;
-	struct sh_desc *first = NULL, *new = NULL /* compiler... */;
-	LIST_HEAD(tx_list);
-	int chunks = 0;
-	unsigned long irq_flags;
-	int i;
-
-	if (!sg_len)
-		return NULL;
-
-	for_each_sg(sgl, sg, sg_len, i)
-		chunks += (sg_dma_len(sg) + SH_DMA_TCR_MAX) /
-			(SH_DMA_TCR_MAX + 1);
-
-	/* Have to lock the whole loop to protect against concurrent release */
-	spin_lock_irqsave(&sh_chan->desc_lock, irq_flags);
-
-	/*
-	 * Chaining:
-	 * first descriptor is what user is dealing with in all API calls, its
-	 *	cookie is at first set to -EBUSY, at tx-submit to a positive
-	 *	number
-	 * if more than one chunk is needed further chunks have cookie = -EINVAL
-	 * the last chunk, if not equal to the first, has cookie = -ENOSPC
-	 * all chunks are linked onto the tx_list head with their .node heads
-	 *	only during this function, then they are immediately spliced
-	 *	back onto the free list in form of a chain
-	 */
-	for_each_sg(sgl, sg, sg_len, i) {
-		dma_addr_t sg_addr = sg_dma_address(sg);
-		size_t len = sg_dma_len(sg);
-
-		if (!len)
-			goto err_get_desc;
-
-		do {
-			dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
-				i, sg, len, (unsigned long long)sg_addr);
-
-			if (direction == DMA_DEV_TO_MEM)
-				new = sh_dmae_add_desc(sh_chan, flags,
-						&sg_addr, addr, &len, &first,
-						direction);
-			else
-				new = sh_dmae_add_desc(sh_chan, flags,
-						addr, &sg_addr, &len, &first,
-						direction);
-			if (!new)
-				goto err_get_desc;
-
-			new->chunks = chunks--;
-			list_add_tail(&new->node, &tx_list);
-		} while (len);
-	}
-
-	if (new != first)
-		new->async_tx.cookie = -ENOSPC;
-
-	/* Put them back on the free list, so, they don't get lost */
-	list_splice_tail(&tx_list, &sh_chan->ld_free);
-
-	spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags);
-
-	return &first->async_tx;
-
-err_get_desc:
-	list_for_each_entry(new, &tx_list, node)
-		new->mark = DESC_IDLE;
-	list_splice(&tx_list, &sh_chan->ld_free);
-
-	spin_unlock_irqrestore(&sh_chan->desc_lock, irq_flags);
-
-	return NULL;
-}
-
-static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
-	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
-	size_t len, unsigned long flags)
-{
-	struct sh_dmae_chan *sh_chan;
-	struct scatterlist sg;
-
-	if (!chan || !len)
-		return NULL;
-
-	sh_chan = to_sh_chan(chan);
-
-	sg_init_table(&sg, 1);
-	sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
-		    offset_in_page(dma_src));
-	sg_dma_address(&sg) = dma_src;
-	sg_dma_len(&sg) = len;
-
-	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
-			       flags);
-}
-
-static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
-	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags,
-	void *context)
-{
-	struct sh_dmae_slave *param;
-	struct sh_dmae_chan *sh_chan;
-	dma_addr_t slave_addr;
-
-	if (!chan)
-		return NULL;
-
-	sh_chan = to_sh_chan(chan);
-	param = chan->private;
-
-	/* Someone calling slave DMA on a public channel? */
-	if (!param || !sg_len) {
-		dev_warn(sh_chan->dev, "%s: bad parameter: %p, %d, %d\n",
-			 __func__, param, sg_len, param ? param->slave_id : -1);
-		return NULL;
-	}
-
-	slave_addr = param->config->addr;
-
-	/*
-	 * if (param != NULL), this is a successfully requested slave channel,
-	 * therefore param->config != NULL too.
-	 */
-	return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &slave_addr,
-			       direction, flags);
-}
-
-static int sh_dmae_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
-			   unsigned long arg)
-{
-	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	unsigned long flags;
-
-	/* Only supports DMA_TERMINATE_ALL */
-	if (cmd != DMA_TERMINATE_ALL)
-		return -ENXIO;
-
-	if (!chan)
-		return -EINVAL;
-
-	spin_lock_irqsave(&sh_chan->desc_lock, flags);
-	dmae_halt(sh_chan);
-
-	if (!list_empty(&sh_chan->ld_queue)) {
-		/* Record partial transfer */
-		struct sh_desc *desc = list_entry(sh_chan->ld_queue.next,
-						  struct sh_desc, node);
-		desc->partial = (desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) <<
-			sh_chan->xmit_shift;
-	}
-	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
-
-	sh_dmae_chan_ld_cleanup(sh_chan, true);
-
-	return 0;
-}
-
-static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
-{
-	struct sh_desc *desc, *_desc;
-	/* Is the "exposed" head of a chain acked? */
-	bool head_acked = false;
-	dma_cookie_t cookie = 0;
-	dma_async_tx_callback callback = NULL;
-	void *param = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sh_chan->desc_lock, flags);
-	list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
-		struct dma_async_tx_descriptor *tx = &desc->async_tx;
-
-		BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
-		BUG_ON(desc->mark != DESC_SUBMITTED &&
-		       desc->mark != DESC_COMPLETED &&
-		       desc->mark != DESC_WAITING);
-
-		/*
-		 * queue is ordered, and we use this loop to (1) clean up all
-		 * completed descriptors, and to (2) update descriptor flags of
-		 * any chunks in a (partially) completed chain
-		 */
-		if (!all && desc->mark == DESC_SUBMITTED &&
-		    desc->cookie != cookie)
-			break;
-
-		if (tx->cookie > 0)
-			cookie = tx->cookie;
-
-		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
-			if (sh_chan->common.completed_cookie != desc->cookie - 1)
-				dev_dbg(sh_chan->dev,
-					"Completing cookie %d, expected %d\n",
-					desc->cookie,
-					sh_chan->common.completed_cookie + 1);
-			sh_chan->common.completed_cookie = desc->cookie;
-		}
-
-		/* Call callback on the last chunk */
-		if (desc->mark == DESC_COMPLETED && tx->callback) {
-			desc->mark = DESC_WAITING;
-			callback = tx->callback;
-			param = tx->callback_param;
-			dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n",
-				tx->cookie, tx, sh_chan->id);
-			BUG_ON(desc->chunks != 1);
-			break;
-		}
-
-		if (tx->cookie > 0 || tx->cookie == -EBUSY) {
-			if (desc->mark == DESC_COMPLETED) {
-				BUG_ON(tx->cookie < 0);
-				desc->mark = DESC_WAITING;
-			}
-			head_acked = async_tx_test_ack(tx);
-		} else {
-			switch (desc->mark) {
-			case DESC_COMPLETED:
-				desc->mark = DESC_WAITING;
-				/* Fall through */
-			case DESC_WAITING:
-				if (head_acked)
-					async_tx_ack(&desc->async_tx);
-			}
-		}
-
-		dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n",
-			tx, tx->cookie);
-
-		if (((desc->mark == DESC_COMPLETED ||
-		      desc->mark == DESC_WAITING) &&
-		     async_tx_test_ack(&desc->async_tx)) || all) {
-			/* Remove from ld_queue list */
-			desc->mark = DESC_IDLE;
-
-			list_move(&desc->node, &sh_chan->ld_free);
-
-			if (list_empty(&sh_chan->ld_queue)) {
-				dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id);
-				pm_runtime_put(sh_chan->dev);
-			}
-		}
-	}
-
-	if (all && !callback)
-		/*
-		 * Terminating and the loop completed normally: forgive
-		 * uncompleted cookies
-		 */
-		sh_chan->common.completed_cookie = sh_chan->common.cookie;
-
-	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
-
-	if (callback)
-		callback(param);
-
-	return callback;
-}
-
-/*
- * sh_chan_ld_cleanup - Clean up link descriptors
- *
- * This function cleans up the ld_queue of DMA channel.
- */
-static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all)
-{
-	while (__ld_cleanup(sh_chan, all))
-		;
-}
-
-/* Called under spin_lock_irq(&sh_chan->desc_lock) */
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
-{
-	struct sh_desc *desc;
-
-	/* DMA work check */
-	if (dmae_is_busy(sh_chan))
-		return;
-
-	/* Find the first not transferred descriptor */
-	list_for_each_entry(desc, &sh_chan->ld_queue, node)
-		if (desc->mark == DESC_SUBMITTED) {
-			dev_dbg(sh_chan->dev, "Queue #%d to %d: %u@%x -> %x\n",
-				desc->async_tx.cookie, sh_chan->id,
-				desc->hw.tcr, desc->hw.sar, desc->hw.dar);
-			/* Get the ld start address from ld_queue */
-			dmae_set_reg(sh_chan, &desc->hw);
-			dmae_start(sh_chan);
-			break;
-		}
-}
-
-static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
-{
-	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-
-	spin_lock_irq(&sh_chan->desc_lock);
-	if (sh_chan->pm_state == DMAE_PM_ESTABLISHED)
-		sh_chan_xfer_ld_queue(sh_chan);
-	else
-		sh_chan->pm_state = DMAE_PM_PENDING;
-	spin_unlock_irq(&sh_chan->desc_lock);
-}
-
-static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
-					dma_cookie_t cookie,
-					struct dma_tx_state *txstate)
-{
-	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	enum dma_status status;
-	unsigned long flags;
-
-	sh_dmae_chan_ld_cleanup(sh_chan, false);
-
-	spin_lock_irqsave(&sh_chan->desc_lock, flags);
-
-	status = dma_cookie_status(chan, cookie, txstate);
-
-	/*
-	 * If we don't find cookie on the queue, it has been aborted and we have
-	 * to report error
-	 */
-	if (status != DMA_SUCCESS) {
-		struct sh_desc *desc;
-		status = DMA_ERROR;
-		list_for_each_entry(desc, &sh_chan->ld_queue, node)
-			if (desc->cookie == cookie) {
-				status = DMA_IN_PROGRESS;
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
-
-	return status;
-}
-
-static irqreturn_t sh_dmae_interrupt(int irq, void *data)
-{
-	irqreturn_t ret = IRQ_NONE;
-	struct sh_dmae_chan *sh_chan = data;
-	u32 chcr;
-
-	spin_lock(&sh_chan->desc_lock);
-
-	chcr = chcr_read(sh_chan);
-
-	if (chcr & CHCR_TE) {
-		/* DMA stop */
-		dmae_halt(sh_chan);
-
-		ret = IRQ_HANDLED;
-		tasklet_schedule(&sh_chan->tasklet);
-	}
-
-	spin_unlock(&sh_chan->desc_lock);
-
-	return ret;
-}
-
-/* Called from error IRQ or NMI */
-static bool sh_dmae_reset(struct sh_dmae_device *shdev)
-{
-	unsigned int handled = 0;
-	int i;
-
-	/* halt the dma controller */
-	sh_dmae_ctl_stop(shdev);
-
-	/* We cannot detect, which channel caused the error, have to reset all */
-	for (i = 0; i < SH_DMAC_MAX_CHANNELS; i++) {
-		struct sh_dmae_chan *sh_chan = shdev->chan[i];
-		struct sh_desc *desc;
-		LIST_HEAD(dl);
-
-		if (!sh_chan)
-			continue;
-
-		spin_lock(&sh_chan->desc_lock);
-
-		/* Stop the channel */
-		dmae_halt(sh_chan);
-
-		list_splice_init(&sh_chan->ld_queue, &dl);
-
-		if (!list_empty(&dl)) {
-			dev_dbg(sh_chan->dev, "Bring down channel %d\n", sh_chan->id);
-			pm_runtime_put(sh_chan->dev);
-		}
-		sh_chan->pm_state = DMAE_PM_ESTABLISHED;
-
-		spin_unlock(&sh_chan->desc_lock);
-
-		/* Complete all  */
-		list_for_each_entry(desc, &dl, node) {
-			struct dma_async_tx_descriptor *tx = &desc->async_tx;
-			desc->mark = DESC_IDLE;
-			if (tx->callback)
-				tx->callback(tx->callback_param);
-		}
-
-		spin_lock(&sh_chan->desc_lock);
-		list_splice(&dl, &sh_chan->ld_free);
-		spin_unlock(&sh_chan->desc_lock);
-
-		handled++;
-	}
-
-	sh_dmae_rst(shdev);
-
-	return !!handled;
-}
-
-static irqreturn_t sh_dmae_err(int irq, void *data)
-{
-	struct sh_dmae_device *shdev = data;
-
-	if (!(dmaor_read(shdev) & DMAOR_AE))
-		return IRQ_NONE;
-
-	sh_dmae_reset(data);
-	return IRQ_HANDLED;
-}
-
-static void dmae_do_tasklet(unsigned long data)
-{
-	struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
-	struct sh_desc *desc;
-	u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
-	u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
-
-	spin_lock_irq(&sh_chan->desc_lock);
-	list_for_each_entry(desc, &sh_chan->ld_queue, node) {
-		if (desc->mark == DESC_SUBMITTED &&
-		    ((desc->direction == DMA_DEV_TO_MEM &&
-		      (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
-		     (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
-			dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
-				desc->async_tx.cookie, &desc->async_tx,
-				desc->hw.dar);
-			desc->mark = DESC_COMPLETED;
-			break;
-		}
-	}
-	/* Next desc */
-	sh_chan_xfer_ld_queue(sh_chan);
-	spin_unlock_irq(&sh_chan->desc_lock);
-
-	sh_dmae_chan_ld_cleanup(sh_chan, false);
-}
-
-static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
-{
-	/* Fast path out if NMIF is not asserted for this controller */
-	if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
-		return false;
-
-	return sh_dmae_reset(shdev);
-}
-
-static int sh_dmae_nmi_handler(struct notifier_block *self,
-			       unsigned long cmd, void *data)
-{
-	struct sh_dmae_device *shdev;
-	int ret = NOTIFY_DONE;
-	bool triggered;
-
-	/*
-	 * Only concern ourselves with NMI events.
-	 *
-	 * Normally we would check the die chain value, but as this needs
-	 * to be architecture independent, check for NMI context instead.
-	 */
-	if (!in_nmi())
-		return NOTIFY_DONE;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
-		/*
-		 * Only stop if one of the controllers has NMIF asserted,
-		 * we do not want to interfere with regular address error
-		 * handling or NMI events that don't concern the DMACs.
-		 */
-		triggered = sh_dmae_nmi_notify(shdev);
-		if (triggered == true)
-			ret = NOTIFY_OK;
-	}
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
-	.notifier_call	= sh_dmae_nmi_handler,
-
-	/* Run before NMI debug handler and KGDB */
-	.priority	= 1,
-};
-
-static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
-					int irq, unsigned long flags)
-{
-	int err;
-	const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
-	struct platform_device *pdev = to_platform_device(shdev->common.dev);
-	struct sh_dmae_chan *new_sh_chan;
-
-	/* alloc channel */
-	new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
-	if (!new_sh_chan) {
-		dev_err(shdev->common.dev,
-			"No free memory for allocating dma channels!\n");
-		return -ENOMEM;
-	}
-
-	new_sh_chan->pm_state = DMAE_PM_ESTABLISHED;
-
-	/* reference struct dma_device */
-	new_sh_chan->common.device = &shdev->common;
-	dma_cookie_init(&new_sh_chan->common);
-
-	new_sh_chan->dev = shdev->common.dev;
-	new_sh_chan->id = id;
-	new_sh_chan->irq = irq;
-	new_sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32);
-
-	/* Init DMA tasklet */
-	tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
-			(unsigned long)new_sh_chan);
-
-	spin_lock_init(&new_sh_chan->desc_lock);
-
-	/* Init descripter manage list */
-	INIT_LIST_HEAD(&new_sh_chan->ld_queue);
-	INIT_LIST_HEAD(&new_sh_chan->ld_free);
-
-	/* Add the channel to DMA device channel list */
-	list_add_tail(&new_sh_chan->common.device_node,
-			&shdev->common.channels);
-	shdev->common.chancnt++;
-
-	if (pdev->id >= 0)
-		snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
-			 "sh-dmae%d.%d", pdev->id, new_sh_chan->id);
-	else
-		snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
-			 "sh-dma%d", new_sh_chan->id);
-
-	/* set up channel irq */
-	err = request_irq(irq, &sh_dmae_interrupt, flags,
-			  new_sh_chan->dev_id, new_sh_chan);
-	if (err) {
-		dev_err(shdev->common.dev, "DMA channel %d request_irq error "
-			"with return %d\n", id, err);
-		goto err_no_irq;
-	}
-
-	shdev->chan[id] = new_sh_chan;
-	return 0;
-
-err_no_irq:
-	/* remove from dmaengine device node */
-	list_del(&new_sh_chan->common.device_node);
-	kfree(new_sh_chan);
-	return err;
-}
-
-static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
-{
-	int i;
-
-	for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
-		if (shdev->chan[i]) {
-			struct sh_dmae_chan *sh_chan = shdev->chan[i];
-
-			free_irq(sh_chan->irq, sh_chan);
-
-			list_del(&sh_chan->common.device_node);
-			kfree(sh_chan);
-			shdev->chan[i] = NULL;
-		}
-	}
-	shdev->common.chancnt = 0;
-}
-
-static int __init sh_dmae_probe(struct platform_device *pdev)
-{
-	struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
-	unsigned long irqflags = IRQF_DISABLED,
-		chan_flag[SH_DMAC_MAX_CHANNELS] = {};
-	int errirq, chan_irq[SH_DMAC_MAX_CHANNELS];
-	int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
-	struct sh_dmae_device *shdev;
-	struct resource *chan, *dmars, *errirq_res, *chanirq_res;
-
-	/* get platform data */
-	if (!pdata || !pdata->channel_num)
-		return -ENODEV;
-
-	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	/* DMARS area is optional */
-	dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	/*
-	 * IRQ resources:
-	 * 1. there always must be at least one IRQ IO-resource. On SH4 it is
-	 *    the error IRQ, in which case it is the only IRQ in this resource:
-	 *    start == end. If it is the only IRQ resource, all channels also
-	 *    use the same IRQ.
-	 * 2. DMA channel IRQ resources can be specified one per resource or in
-	 *    ranges (start != end)
-	 * 3. iff all events (channels and, optionally, error) on this
-	 *    controller use the same IRQ, only one IRQ resource can be
-	 *    specified, otherwise there must be one IRQ per channel, even if
-	 *    some of them are equal
-	 * 4. if all IRQs on this controller are equal or if some specific IRQs
-	 *    specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
-	 *    requested with the IRQF_SHARED flag
-	 */
-	errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!chan || !errirq_res)
-		return -ENODEV;
-
-	if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) {
-		dev_err(&pdev->dev, "DMAC register region already claimed\n");
-		return -EBUSY;
-	}
-
-	if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) {
-		dev_err(&pdev->dev, "DMAC DMARS region already claimed\n");
-		err = -EBUSY;
-		goto ermrdmars;
-	}
-
-	err = -ENOMEM;
-	shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
-	if (!shdev) {
-		dev_err(&pdev->dev, "Not enough memory\n");
-		goto ealloc;
-	}
-
-	shdev->chan_reg = ioremap(chan->start, resource_size(chan));
-	if (!shdev->chan_reg)
-		goto emapchan;
-	if (dmars) {
-		shdev->dmars = ioremap(dmars->start, resource_size(dmars));
-		if (!shdev->dmars)
-			goto emapdmars;
-	}
-
-	/* platform data */
-	shdev->pdata = pdata;
-
-	if (pdata->chcr_offset)
-		shdev->chcr_offset = pdata->chcr_offset;
-	else
-		shdev->chcr_offset = CHCR;
-
-	if (pdata->chcr_ie_bit)
-		shdev->chcr_ie_bit = pdata->chcr_ie_bit;
-	else
-		shdev->chcr_ie_bit = CHCR_IE;
-
-	platform_set_drvdata(pdev, shdev);
-
-	shdev->common.dev = &pdev->dev;
-
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
-
-	spin_lock_irq(&sh_dmae_lock);
-	list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
-	spin_unlock_irq(&sh_dmae_lock);
-
-	/* reset dma controller - only needed as a test */
-	err = sh_dmae_rst(shdev);
-	if (err)
-		goto rst_err;
-
-	INIT_LIST_HEAD(&shdev->common.channels);
-
-	if (!pdata->slave_only)
-		dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
-	if (pdata->slave && pdata->slave_num)
-		dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
-
-	shdev->common.device_alloc_chan_resources
-		= sh_dmae_alloc_chan_resources;
-	shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
-	shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
-	shdev->common.device_tx_status = sh_dmae_tx_status;
-	shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
-
-	/* Compulsory for DMA_SLAVE fields */
-	shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
-	shdev->common.device_control = sh_dmae_control;
-
-	/* Default transfer size of 32 bytes requires 32-byte alignment */
-	shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
-
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
-	chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
-
-	if (!chanirq_res)
-		chanirq_res = errirq_res;
-	else
-		irqres++;
-
-	if (chanirq_res == errirq_res ||
-	    (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
-		irqflags = IRQF_SHARED;
-
-	errirq = errirq_res->start;
-
-	err = request_irq(errirq, sh_dmae_err, irqflags,
-			  "DMAC Address Error", shdev);
-	if (err) {
-		dev_err(&pdev->dev,
-			"DMA failed requesting irq #%d, error %d\n",
-			errirq, err);
-		goto eirq_err;
-	}
-
-#else
-	chanirq_res = errirq_res;
-#endif /* CONFIG_CPU_SH4 || CONFIG_ARCH_SHMOBILE */
-
-	if (chanirq_res->start == chanirq_res->end &&
-	    !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
-		/* Special case - all multiplexed */
-		for (; irq_cnt < pdata->channel_num; irq_cnt++) {
-			if (irq_cnt < SH_DMAC_MAX_CHANNELS) {
-				chan_irq[irq_cnt] = chanirq_res->start;
-				chan_flag[irq_cnt] = IRQF_SHARED;
-			} else {
-				irq_cap = 1;
-				break;
-			}
-		}
-	} else {
-		do {
-			for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
-				if (irq_cnt >= SH_DMAC_MAX_CHANNELS) {
-					irq_cap = 1;
-					break;
-				}
-
-				if ((errirq_res->flags & IORESOURCE_BITS) ==
-				    IORESOURCE_IRQ_SHAREABLE)
-					chan_flag[irq_cnt] = IRQF_SHARED;
-				else
-					chan_flag[irq_cnt] = IRQF_DISABLED;
-				dev_dbg(&pdev->dev,
-					"Found IRQ %d for channel %d\n",
-					i, irq_cnt);
-				chan_irq[irq_cnt++] = i;
-			}
-
-			if (irq_cnt >= SH_DMAC_MAX_CHANNELS)
-				break;
-
-			chanirq_res = platform_get_resource(pdev,
-						IORESOURCE_IRQ, ++irqres);
-		} while (irq_cnt < pdata->channel_num && chanirq_res);
-	}
-
-	/* Create DMA Channel */
-	for (i = 0; i < irq_cnt; i++) {
-		err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
-		if (err)
-			goto chan_probe_err;
-	}
-
-	if (irq_cap)
-		dev_notice(&pdev->dev, "Attempting to register %d DMA "
-			   "channels when a maximum of %d are supported.\n",
-			   pdata->channel_num, SH_DMAC_MAX_CHANNELS);
-
-	pm_runtime_put(&pdev->dev);
-
-	dma_async_device_register(&shdev->common);
-
-	return err;
-
-chan_probe_err:
-	sh_dmae_chan_remove(shdev);
-
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
-	free_irq(errirq, shdev);
-eirq_err:
-#endif
-rst_err:
-	spin_lock_irq(&sh_dmae_lock);
-	list_del_rcu(&shdev->node);
-	spin_unlock_irq(&sh_dmae_lock);
-
-	pm_runtime_put(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
-
-	if (dmars)
-		iounmap(shdev->dmars);
-
-	platform_set_drvdata(pdev, NULL);
-emapdmars:
-	iounmap(shdev->chan_reg);
-	synchronize_rcu();
-emapchan:
-	kfree(shdev);
-ealloc:
-	if (dmars)
-		release_mem_region(dmars->start, resource_size(dmars));
-ermrdmars:
-	release_mem_region(chan->start, resource_size(chan));
-
-	return err;
-}
-
-static int __exit sh_dmae_remove(struct platform_device *pdev)
-{
-	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
-	struct resource *res;
-	int errirq = platform_get_irq(pdev, 0);
-
-	dma_async_device_unregister(&shdev->common);
-
-	if (errirq > 0)
-		free_irq(errirq, shdev);
-
-	spin_lock_irq(&sh_dmae_lock);
-	list_del_rcu(&shdev->node);
-	spin_unlock_irq(&sh_dmae_lock);
-
-	/* channel data remove */
-	sh_dmae_chan_remove(shdev);
-
-	pm_runtime_disable(&pdev->dev);
-
-	if (shdev->dmars)
-		iounmap(shdev->dmars);
-	iounmap(shdev->chan_reg);
-
-	platform_set_drvdata(pdev, NULL);
-
-	synchronize_rcu();
-	kfree(shdev);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
-	return 0;
-}
-
-static void sh_dmae_shutdown(struct platform_device *pdev)
-{
-	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
-	sh_dmae_ctl_stop(shdev);
-}
-
-static int sh_dmae_runtime_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static int sh_dmae_runtime_resume(struct device *dev)
-{
-	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-
-	return sh_dmae_rst(shdev);
-}
-
-#ifdef CONFIG_PM
-static int sh_dmae_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static int sh_dmae_resume(struct device *dev)
-{
-	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i, ret;
-
-	ret = sh_dmae_rst(shdev);
-	if (ret < 0)
-		dev_err(dev, "Failed to reset!\n");
-
-	for (i = 0; i < shdev->pdata->channel_num; i++) {
-		struct sh_dmae_chan *sh_chan = shdev->chan[i];
-		struct sh_dmae_slave *param = sh_chan->common.private;
-
-		if (!sh_chan->descs_allocated)
-			continue;
-
-		if (param) {
-			const struct sh_dmae_slave_config *cfg = param->config;
-			dmae_set_dmars(sh_chan, cfg->mid_rid);
-			dmae_set_chcr(sh_chan, cfg->chcr);
-		} else {
-			dmae_init(sh_chan);
-		}
-	}
-
-	return 0;
-}
-#else
-#define sh_dmae_suspend NULL
-#define sh_dmae_resume NULL
-#endif
-
-const struct dev_pm_ops sh_dmae_pm = {
-	.suspend		= sh_dmae_suspend,
-	.resume			= sh_dmae_resume,
-	.runtime_suspend	= sh_dmae_runtime_suspend,
-	.runtime_resume		= sh_dmae_runtime_resume,
-};
-
-static struct platform_driver sh_dmae_driver = {
-	.remove		= __exit_p(sh_dmae_remove),
-	.shutdown	= sh_dmae_shutdown,
-	.driver = {
-		.owner	= THIS_MODULE,
-		.name	= "sh-dma-engine",
-		.pm	= &sh_dmae_pm,
-	},
-};
-
-static int __init sh_dmae_init(void)
-{
-	/* Wire up NMI handling */
-	int err = register_die_notifier(&sh_dmae_nmi_notifier);
-	if (err)
-		return err;
-
-	return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
-}
-module_init(sh_dmae_init);
-
-static void __exit sh_dmae_exit(void)
-{
-	platform_driver_unregister(&sh_dmae_driver);
-
-	unregister_die_notifier(&sh_dmae_nmi_notifier);
-}
-module_exit(sh_dmae_exit);
-
-MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
-MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:sh-dma-engine");
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
new file mode 100644
index 0000000000000000000000000000000000000000..d52dbc6c54abdb346278b65bfbd8715f317d3e58
--- /dev/null
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -0,0 +1,1415 @@
+/*
+ * DMA driver for Nvidia's Tegra20 APB DMA controller.
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <mach/clk.h>
+#include "dmaengine.h"
+
+#define TEGRA_APBDMA_GENERAL			0x0
+#define TEGRA_APBDMA_GENERAL_ENABLE		BIT(31)
+
+#define TEGRA_APBDMA_CONTROL			0x010
+#define TEGRA_APBDMA_IRQ_MASK			0x01c
+#define TEGRA_APBDMA_IRQ_MASK_SET		0x020
+
+/* CSR register */
+#define TEGRA_APBDMA_CHAN_CSR			0x00
+#define TEGRA_APBDMA_CSR_ENB			BIT(31)
+#define TEGRA_APBDMA_CSR_IE_EOC			BIT(30)
+#define TEGRA_APBDMA_CSR_HOLD			BIT(29)
+#define TEGRA_APBDMA_CSR_DIR			BIT(28)
+#define TEGRA_APBDMA_CSR_ONCE			BIT(27)
+#define TEGRA_APBDMA_CSR_FLOW			BIT(21)
+#define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT		16
+#define TEGRA_APBDMA_CSR_WCOUNT_MASK		0xFFFC
+
+/* STATUS register */
+#define TEGRA_APBDMA_CHAN_STATUS		0x004
+#define TEGRA_APBDMA_STATUS_BUSY		BIT(31)
+#define TEGRA_APBDMA_STATUS_ISE_EOC		BIT(30)
+#define TEGRA_APBDMA_STATUS_HALT		BIT(29)
+#define TEGRA_APBDMA_STATUS_PING_PONG		BIT(28)
+#define TEGRA_APBDMA_STATUS_COUNT_SHIFT		2
+#define TEGRA_APBDMA_STATUS_COUNT_MASK		0xFFFC
+
+/* AHB memory address */
+#define TEGRA_APBDMA_CHAN_AHBPTR		0x010
+
+/* AHB sequence register */
+#define TEGRA_APBDMA_CHAN_AHBSEQ		0x14
+#define TEGRA_APBDMA_AHBSEQ_INTR_ENB		BIT(31)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_8		(0 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_16	(1 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32	(2 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_64	(3 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_128	(4 << 28)
+#define TEGRA_APBDMA_AHBSEQ_DATA_SWAP		BIT(27)
+#define TEGRA_APBDMA_AHBSEQ_BURST_1		(4 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_4		(5 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_8		(6 << 24)
+#define TEGRA_APBDMA_AHBSEQ_DBL_BUF		BIT(19)
+#define TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT		16
+#define TEGRA_APBDMA_AHBSEQ_WRAP_NONE		0
+
+/* APB address */
+#define TEGRA_APBDMA_CHAN_APBPTR		0x018
+
+/* APB sequence register */
+#define TEGRA_APBDMA_CHAN_APBSEQ		0x01c
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8		(0 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16	(1 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32	(2 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64	(3 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_128	(4 << 28)
+#define TEGRA_APBDMA_APBSEQ_DATA_SWAP		BIT(27)
+#define TEGRA_APBDMA_APBSEQ_WRAP_WORD_1		(1 << 16)
+
+/*
+ * If any burst is in flight and DMA paused then this is the time to complete
+ * on-flight burst and update DMA status register.
+ */
+#define TEGRA_APBDMA_BURST_COMPLETE_TIME	20
+
+/* Channel base address offset from APBDMA base address */
+#define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET	0x1000
+
+/* DMA channel register space size */
+#define TEGRA_APBDMA_CHANNEL_REGISTER_SIZE	0x20
+
+struct tegra_dma;
+
+/*
+ * tegra_dma_chip_data Tegra chip specific DMA data
+ * @nr_channels: Number of channels available in the controller.
+ * @max_dma_count: Maximum DMA transfer count supported by DMA controller.
+ */
+struct tegra_dma_chip_data {
+	int nr_channels;
+	int max_dma_count;
+};
+
+/* DMA channel registers */
+struct tegra_dma_channel_regs {
+	unsigned long	csr;
+	unsigned long	ahb_ptr;
+	unsigned long	apb_ptr;
+	unsigned long	ahb_seq;
+	unsigned long	apb_seq;
+};
+
+/*
+ * tegra_dma_sg_req: Dma request details to configure hardware. This
+ * contains the details for one transfer to configure DMA hw.
+ * The client's request for data transfer can be broken into multiple
+ * sub-transfer as per requester details and hw support.
+ * This sub transfer get added in the list of transfer and point to Tegra
+ * DMA descriptor which manages the transfer details.
+ */
+struct tegra_dma_sg_req {
+	struct tegra_dma_channel_regs	ch_regs;
+	int				req_len;
+	bool				configured;
+	bool				last_sg;
+	bool				half_done;
+	struct list_head		node;
+	struct tegra_dma_desc		*dma_desc;
+};
+
+/*
+ * tegra_dma_desc: Tegra DMA descriptors which manages the client requests.
+ * This descriptor keep track of transfer status, callbacks and request
+ * counts etc.
+ */
+struct tegra_dma_desc {
+	struct dma_async_tx_descriptor	txd;
+	int				bytes_requested;
+	int				bytes_transferred;
+	enum dma_status			dma_status;
+	struct list_head		node;
+	struct list_head		tx_list;
+	struct list_head		cb_node;
+	int				cb_count;
+};
+
+struct tegra_dma_channel;
+
+typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
+				bool to_terminate);
+
+/* tegra_dma_channel: Channel specific information */
+struct tegra_dma_channel {
+	struct dma_chan		dma_chan;
+	bool			config_init;
+	int			id;
+	int			irq;
+	unsigned long		chan_base_offset;
+	spinlock_t		lock;
+	bool			busy;
+	struct tegra_dma	*tdma;
+	bool			cyclic;
+
+	/* Different lists for managing the requests */
+	struct list_head	free_sg_req;
+	struct list_head	pending_sg_req;
+	struct list_head	free_dma_desc;
+	struct list_head	cb_desc;
+
+	/* ISR handler and tasklet for bottom half of isr handling */
+	dma_isr_handler		isr_handler;
+	struct tasklet_struct	tasklet;
+	dma_async_tx_callback	callback;
+	void			*callback_param;
+
+	/* Channel-slave specific configuration */
+	struct dma_slave_config dma_sconfig;
+};
+
+/* tegra_dma: Tegra DMA specific information */
+struct tegra_dma {
+	struct dma_device		dma_dev;
+	struct device			*dev;
+	struct clk			*dma_clk;
+	spinlock_t			global_lock;
+	void __iomem			*base_addr;
+	struct tegra_dma_chip_data	*chip_data;
+
+	/* Some register need to be cache before suspend */
+	u32				reg_gen;
+
+	/* Last member of the structure */
+	struct tegra_dma_channel channels[0];
+};
+
+static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val)
+{
+	writel(val, tdma->base_addr + reg);
+}
+
+static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg)
+{
+	return readl(tdma->base_addr + reg);
+}
+
+static inline void tdc_write(struct tegra_dma_channel *tdc,
+		u32 reg, u32 val)
+{
+	writel(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+}
+
+static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg)
+{
+	return readl(tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+}
+
+static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc)
+{
+	return container_of(dc, struct tegra_dma_channel, dma_chan);
+}
+
+static inline struct tegra_dma_desc *txd_to_tegra_dma_desc(
+		struct dma_async_tx_descriptor *td)
+{
+	return container_of(td, struct tegra_dma_desc, txd);
+}
+
+static inline struct device *tdc2dev(struct tegra_dma_channel *tdc)
+{
+	return &tdc->dma_chan.dev->device;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *tx);
+static int tegra_dma_runtime_suspend(struct device *dev);
+static int tegra_dma_runtime_resume(struct device *dev);
+
+/* Get DMA desc from free list, if not there then allocate it.  */
+static struct tegra_dma_desc *tegra_dma_desc_get(
+		struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	/* Do not allocate if desc are waiting for ack */
+	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+		if (async_tx_test_ack(&dma_desc->txd)) {
+			list_del(&dma_desc->node);
+			spin_unlock_irqrestore(&tdc->lock, flags);
+			return dma_desc;
+		}
+	}
+
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	/* Allocate DMA desc */
+	dma_desc = kzalloc(sizeof(*dma_desc), GFP_ATOMIC);
+	if (!dma_desc) {
+		dev_err(tdc2dev(tdc), "dma_desc alloc failed\n");
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&dma_desc->txd, &tdc->dma_chan);
+	dma_desc->txd.tx_submit = tegra_dma_tx_submit;
+	dma_desc->txd.flags = 0;
+	return dma_desc;
+}
+
+static void tegra_dma_desc_put(struct tegra_dma_channel *tdc,
+		struct tegra_dma_desc *dma_desc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (!list_empty(&dma_desc->tx_list))
+		list_splice_init(&dma_desc->tx_list, &tdc->free_sg_req);
+	list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static struct tegra_dma_sg_req *tegra_dma_sg_req_get(
+		struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sg_req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (!list_empty(&tdc->free_sg_req)) {
+		sg_req = list_first_entry(&tdc->free_sg_req,
+					typeof(*sg_req), node);
+		list_del(&sg_req->node);
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return sg_req;
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	sg_req = kzalloc(sizeof(struct tegra_dma_sg_req), GFP_ATOMIC);
+	if (!sg_req)
+		dev_err(tdc2dev(tdc), "sg_req alloc failed\n");
+	return sg_req;
+}
+
+static int tegra_dma_slave_config(struct dma_chan *dc,
+		struct dma_slave_config *sconfig)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+	if (!list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "Configuration not allowed\n");
+		return -EBUSY;
+	}
+
+	memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig));
+	tdc->config_init = true;
+	return 0;
+}
+
+static void tegra_dma_global_pause(struct tegra_dma_channel *tdc,
+	bool wait_for_burst_complete)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	spin_lock(&tdma->global_lock);
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0);
+	if (wait_for_burst_complete)
+		udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+}
+
+static void tegra_dma_global_resume(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+	spin_unlock(&tdma->global_lock);
+}
+
+static void tegra_dma_stop(struct tegra_dma_channel *tdc)
+{
+	u32 csr;
+	u32 status;
+
+	/* Disable interrupts */
+	csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
+	csr &= ~TEGRA_APBDMA_CSR_IE_EOC;
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+	/* Disable DMA */
+	csr &= ~TEGRA_APBDMA_CSR_ENB;
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+	/* Clear interrupt status if it is there */
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+	}
+	tdc->busy = false;
+}
+
+static void tegra_dma_start(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *sg_req)
+{
+	struct tegra_dma_channel_regs *ch_regs = &sg_req->ch_regs;
+
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, ch_regs->csr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_regs->apb_seq);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_regs->apb_ptr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_regs->ahb_seq);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_regs->ahb_ptr);
+
+	/* Start DMA */
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+				ch_regs->csr | TEGRA_APBDMA_CSR_ENB);
+}
+
+static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *nsg_req)
+{
+	unsigned long status;
+
+	/*
+	 * The DMA controller reloads the new configuration for next transfer
+	 * after last burst of current transfer completes.
+	 * If there is no IEC status then this makes sure that last burst
+	 * has not be completed. There may be case that last burst is on
+	 * flight and so it can complete but because DMA is paused, it
+	 * will not generates interrupt as well as not reload the new
+	 * configuration.
+	 * If there is already IEC status then interrupt handler need to
+	 * load new configuration.
+	 */
+	tegra_dma_global_pause(tdc, false);
+	status  = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+
+	/*
+	 * If interrupt is pending then do nothing as the ISR will handle
+	 * the programing for new request.
+	 */
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_err(tdc2dev(tdc),
+			"Skipping new configuration as interrupt is pending\n");
+		tegra_dma_global_resume(tdc);
+		return;
+	}
+
+	/* Safe to program new configuration */
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, nsg_req->ch_regs.apb_ptr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, nsg_req->ch_regs.ahb_ptr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+				nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB);
+	nsg_req->configured = true;
+
+	tegra_dma_global_resume(tdc);
+}
+
+static void tdc_start_head_req(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sg_req;
+
+	if (list_empty(&tdc->pending_sg_req))
+		return;
+
+	sg_req = list_first_entry(&tdc->pending_sg_req,
+					typeof(*sg_req), node);
+	tegra_dma_start(tdc, sg_req);
+	sg_req->configured = true;
+	tdc->busy = true;
+}
+
+static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *hsgreq;
+	struct tegra_dma_sg_req *hnsgreq;
+
+	if (list_empty(&tdc->pending_sg_req))
+		return;
+
+	hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+	if (!list_is_last(&hsgreq->node, &tdc->pending_sg_req)) {
+		hnsgreq = list_first_entry(&hsgreq->node,
+					typeof(*hnsgreq), node);
+		tegra_dma_configure_for_next(tdc, hnsgreq);
+	}
+}
+
+static inline int get_current_xferred_count(struct tegra_dma_channel *tdc,
+	struct tegra_dma_sg_req *sg_req, unsigned long status)
+{
+	return sg_req->req_len - (status & TEGRA_APBDMA_STATUS_COUNT_MASK) - 4;
+}
+
+static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+
+	while (!list_empty(&tdc->pending_sg_req)) {
+		sgreq = list_first_entry(&tdc->pending_sg_req,
+						typeof(*sgreq), node);
+		list_del(&sgreq->node);
+		list_add_tail(&sgreq->node, &tdc->free_sg_req);
+		if (sgreq->last_sg) {
+			dma_desc = sgreq->dma_desc;
+			dma_desc->dma_status = DMA_ERROR;
+			list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+
+			/* Add in cb list if it is not there. */
+			if (!dma_desc->cb_count)
+				list_add_tail(&dma_desc->cb_node,
+							&tdc->cb_desc);
+			dma_desc->cb_count++;
+		}
+	}
+	tdc->isr_handler = NULL;
+}
+
+static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *last_sg_req, bool to_terminate)
+{
+	struct tegra_dma_sg_req *hsgreq = NULL;
+
+	if (list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "Dma is running without req\n");
+		tegra_dma_stop(tdc);
+		return false;
+	}
+
+	/*
+	 * Check that head req on list should be in flight.
+	 * If it is not in flight then abort transfer as
+	 * looping of transfer can not continue.
+	 */
+	hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+	if (!hsgreq->configured) {
+		tegra_dma_stop(tdc);
+		dev_err(tdc2dev(tdc), "Error in dma transfer, aborting dma\n");
+		tegra_dma_abort_all(tdc);
+		return false;
+	}
+
+	/* Configure next request */
+	if (!to_terminate)
+		tdc_configure_next_head_desc(tdc);
+	return true;
+}
+
+static void handle_once_dma_done(struct tegra_dma_channel *tdc,
+	bool to_terminate)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+
+	tdc->busy = false;
+	sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+	dma_desc = sgreq->dma_desc;
+	dma_desc->bytes_transferred += sgreq->req_len;
+
+	list_del(&sgreq->node);
+	if (sgreq->last_sg) {
+		dma_desc->dma_status = DMA_SUCCESS;
+		dma_cookie_complete(&dma_desc->txd);
+		if (!dma_desc->cb_count)
+			list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+		dma_desc->cb_count++;
+		list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+	}
+	list_add_tail(&sgreq->node, &tdc->free_sg_req);
+
+	/* Do not start DMA if it is going to be terminate */
+	if (to_terminate || list_empty(&tdc->pending_sg_req))
+		return;
+
+	tdc_start_head_req(tdc);
+	return;
+}
+
+static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
+		bool to_terminate)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+	bool st;
+
+	sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+	dma_desc = sgreq->dma_desc;
+	dma_desc->bytes_transferred += sgreq->req_len;
+
+	/* Callback need to be call */
+	if (!dma_desc->cb_count)
+		list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+	dma_desc->cb_count++;
+
+	/* If not last req then put at end of pending list */
+	if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
+		list_del(&sgreq->node);
+		list_add_tail(&sgreq->node, &tdc->pending_sg_req);
+		sgreq->configured = false;
+		st = handle_continuous_head_request(tdc, sgreq, to_terminate);
+		if (!st)
+			dma_desc->dma_status = DMA_ERROR;
+	}
+	return;
+}
+
+static void tegra_dma_tasklet(unsigned long data)
+{
+	struct tegra_dma_channel *tdc = (struct tegra_dma_channel *)data;
+	dma_async_tx_callback callback = NULL;
+	void *callback_param = NULL;
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+	int cb_count;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	while (!list_empty(&tdc->cb_desc)) {
+		dma_desc  = list_first_entry(&tdc->cb_desc,
+					typeof(*dma_desc), cb_node);
+		list_del(&dma_desc->cb_node);
+		callback = dma_desc->txd.callback;
+		callback_param = dma_desc->txd.callback_param;
+		cb_count = dma_desc->cb_count;
+		dma_desc->cb_count = 0;
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		while (cb_count-- && callback)
+			callback(callback_param);
+		spin_lock_irqsave(&tdc->lock, flags);
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
+{
+	struct tegra_dma_channel *tdc = dev_id;
+	unsigned long status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+		tdc->isr_handler(tdc, false);
+		tasklet_schedule(&tdc->tasklet);
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	dev_info(tdc2dev(tdc),
+		"Interrupt already served status 0x%08lx\n", status);
+	return IRQ_NONE;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct tegra_dma_desc *dma_desc = txd_to_tegra_dma_desc(txd);
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(txd->chan);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	dma_desc->dma_status = DMA_IN_PROGRESS;
+	cookie = dma_cookie_assign(&dma_desc->txd);
+	list_splice_tail_init(&dma_desc->tx_list, &tdc->pending_sg_req);
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return cookie;
+}
+
+static void tegra_dma_issue_pending(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "No DMA request\n");
+		goto end;
+	}
+	if (!tdc->busy) {
+		tdc_start_head_req(tdc);
+
+		/* Continuous single mode: Configure next req */
+		if (tdc->cyclic) {
+			/*
+			 * Wait for 1 burst time for configure DMA for
+			 * next transfer.
+			 */
+			udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+			tdc_configure_next_head_desc(tdc);
+		}
+	}
+end:
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return;
+}
+
+static void tegra_dma_terminate_all(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+	unsigned long status;
+	bool was_busy;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (list_empty(&tdc->pending_sg_req)) {
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return;
+	}
+
+	if (!tdc->busy)
+		goto skip_dma_stop;
+
+	/* Pause DMA before checking the queue status */
+	tegra_dma_global_pause(tdc, true);
+
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_dbg(tdc2dev(tdc), "%s():handling isr\n", __func__);
+		tdc->isr_handler(tdc, true);
+		status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	}
+
+	was_busy = tdc->busy;
+	tegra_dma_stop(tdc);
+
+	if (!list_empty(&tdc->pending_sg_req) && was_busy) {
+		sgreq = list_first_entry(&tdc->pending_sg_req,
+					typeof(*sgreq), node);
+		sgreq->dma_desc->bytes_transferred +=
+				get_current_xferred_count(tdc, sgreq, status);
+	}
+	tegra_dma_global_resume(tdc);
+
+skip_dma_stop:
+	tegra_dma_abort_all(tdc);
+
+	while (!list_empty(&tdc->cb_desc)) {
+		dma_desc  = list_first_entry(&tdc->cb_desc,
+					typeof(*dma_desc), cb_node);
+		list_del(&dma_desc->cb_node);
+		dma_desc->cb_count = 0;
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc;
+	struct tegra_dma_sg_req *sg_req;
+	enum dma_status ret;
+	unsigned long flags;
+	unsigned int residual;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	ret = dma_cookie_status(dc, cookie, txstate);
+	if (ret == DMA_SUCCESS) {
+		dma_set_residue(txstate, 0);
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return ret;
+	}
+
+	/* Check on wait_ack desc status */
+	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+		if (dma_desc->txd.cookie == cookie) {
+			residual =  dma_desc->bytes_requested -
+					(dma_desc->bytes_transferred %
+						dma_desc->bytes_requested);
+			dma_set_residue(txstate, residual);
+			ret = dma_desc->dma_status;
+			spin_unlock_irqrestore(&tdc->lock, flags);
+			return ret;
+		}
+	}
+
+	/* Check in pending list */
+	list_for_each_entry(sg_req, &tdc->pending_sg_req, node) {
+		dma_desc = sg_req->dma_desc;
+		if (dma_desc->txd.cookie == cookie) {
+			residual =  dma_desc->bytes_requested -
+					(dma_desc->bytes_transferred %
+						dma_desc->bytes_requested);
+			dma_set_residue(txstate, residual);
+			ret = dma_desc->dma_status;
+			spin_unlock_irqrestore(&tdc->lock, flags);
+			return ret;
+		}
+	}
+
+	dev_dbg(tdc2dev(tdc), "cookie %d does not found\n", cookie);
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return ret;
+}
+
+static int tegra_dma_device_control(struct dma_chan *dc, enum dma_ctrl_cmd cmd,
+			unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		return tegra_dma_slave_config(dc,
+				(struct dma_slave_config *)arg);
+
+	case DMA_TERMINATE_ALL:
+		tegra_dma_terminate_all(dc);
+		return 0;
+
+	default:
+		break;
+	}
+
+	return -ENXIO;
+}
+
+static inline int get_bus_width(struct tegra_dma_channel *tdc,
+		enum dma_slave_buswidth slave_bw)
+{
+	switch (slave_bw) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64;
+	default:
+		dev_warn(tdc2dev(tdc),
+			"slave bw is not supported, using 32bits\n");
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+	}
+}
+
+static inline int get_burst_size(struct tegra_dma_channel *tdc,
+	u32 burst_size, enum dma_slave_buswidth slave_bw, int len)
+{
+	int burst_byte;
+	int burst_ahb_width;
+
+	/*
+	 * burst_size from client is in terms of the bus_width.
+	 * convert them into AHB memory width which is 4 byte.
+	 */
+	burst_byte = burst_size * slave_bw;
+	burst_ahb_width = burst_byte / 4;
+
+	/* If burst size is 0 then calculate the burst size based on length */
+	if (!burst_ahb_width) {
+		if (len & 0xF)
+			return TEGRA_APBDMA_AHBSEQ_BURST_1;
+		else if ((len >> 4) & 0x1)
+			return TEGRA_APBDMA_AHBSEQ_BURST_4;
+		else
+			return TEGRA_APBDMA_AHBSEQ_BURST_8;
+	}
+	if (burst_ahb_width < 4)
+		return TEGRA_APBDMA_AHBSEQ_BURST_1;
+	else if (burst_ahb_width < 8)
+		return TEGRA_APBDMA_AHBSEQ_BURST_4;
+	else
+		return TEGRA_APBDMA_AHBSEQ_BURST_8;
+}
+
+static int get_transfer_param(struct tegra_dma_channel *tdc,
+	enum dma_transfer_direction direction, unsigned long *apb_addr,
+	unsigned long *apb_seq,	unsigned long *csr, unsigned int *burst_size,
+	enum dma_slave_buswidth *slave_bw)
+{
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		*apb_addr = tdc->dma_sconfig.dst_addr;
+		*apb_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width);
+		*burst_size = tdc->dma_sconfig.dst_maxburst;
+		*slave_bw = tdc->dma_sconfig.dst_addr_width;
+		*csr = TEGRA_APBDMA_CSR_DIR;
+		return 0;
+
+	case DMA_DEV_TO_MEM:
+		*apb_addr = tdc->dma_sconfig.src_addr;
+		*apb_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width);
+		*burst_size = tdc->dma_sconfig.src_maxburst;
+		*slave_bw = tdc->dma_sconfig.src_addr_width;
+		*csr = 0;
+		return 0;
+
+	default:
+		dev_err(tdc2dev(tdc), "Dma direction is not supported\n");
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
+	struct dma_chan *dc, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc;
+	unsigned int	    i;
+	struct scatterlist      *sg;
+	unsigned long csr, ahb_seq, apb_ptr, apb_seq;
+	struct list_head req_list;
+	struct tegra_dma_sg_req  *sg_req = NULL;
+	u32 burst_size;
+	enum dma_slave_buswidth slave_bw;
+	int ret;
+
+	if (!tdc->config_init) {
+		dev_err(tdc2dev(tdc), "dma channel is not configured\n");
+		return NULL;
+	}
+	if (sg_len < 1) {
+		dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len);
+		return NULL;
+	}
+
+	ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw);
+	if (ret < 0)
+		return NULL;
+
+	INIT_LIST_HEAD(&req_list);
+
+	ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+					TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+	csr |= TEGRA_APBDMA_CSR_ONCE | TEGRA_APBDMA_CSR_FLOW;
+	csr |= tdc->dma_sconfig.slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+	if (flags & DMA_PREP_INTERRUPT)
+		csr |= TEGRA_APBDMA_CSR_IE_EOC;
+
+	apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+	dma_desc = tegra_dma_desc_get(tdc);
+	if (!dma_desc) {
+		dev_err(tdc2dev(tdc), "Dma descriptors not available\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&dma_desc->tx_list);
+	INIT_LIST_HEAD(&dma_desc->cb_node);
+	dma_desc->cb_count = 0;
+	dma_desc->bytes_requested = 0;
+	dma_desc->bytes_transferred = 0;
+	dma_desc->dma_status = DMA_IN_PROGRESS;
+
+	/* Make transfer requests */
+	for_each_sg(sgl, sg, sg_len, i) {
+		u32 len, mem;
+
+		mem = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		if ((len & 3) || (mem & 3) ||
+				(len > tdc->tdma->chip_data->max_dma_count)) {
+			dev_err(tdc2dev(tdc),
+				"Dma length/memory address is not supported\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		sg_req = tegra_dma_sg_req_get(tdc);
+		if (!sg_req) {
+			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+		dma_desc->bytes_requested += len;
+
+		sg_req->ch_regs.apb_ptr = apb_ptr;
+		sg_req->ch_regs.ahb_ptr = mem;
+		sg_req->ch_regs.csr = csr | ((len - 4) & 0xFFFC);
+		sg_req->ch_regs.apb_seq = apb_seq;
+		sg_req->ch_regs.ahb_seq = ahb_seq;
+		sg_req->configured = false;
+		sg_req->last_sg = false;
+		sg_req->dma_desc = dma_desc;
+		sg_req->req_len = len;
+
+		list_add_tail(&sg_req->node, &dma_desc->tx_list);
+	}
+	sg_req->last_sg = true;
+	if (flags & DMA_CTRL_ACK)
+		dma_desc->txd.flags = DMA_CTRL_ACK;
+
+	/*
+	 * Make sure that mode should not be conflicting with currently
+	 * configured mode.
+	 */
+	if (!tdc->isr_handler) {
+		tdc->isr_handler = handle_once_dma_done;
+		tdc->cyclic = false;
+	} else {
+		if (tdc->cyclic) {
+			dev_err(tdc2dev(tdc), "DMA configured in cyclic mode\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+	}
+
+	return &dma_desc->txd;
+}
+
+struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	void *context)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc = NULL;
+	struct tegra_dma_sg_req  *sg_req = NULL;
+	unsigned long csr, ahb_seq, apb_ptr, apb_seq;
+	int len;
+	size_t remain_len;
+	dma_addr_t mem = buf_addr;
+	u32 burst_size;
+	enum dma_slave_buswidth slave_bw;
+	int ret;
+
+	if (!buf_len || !period_len) {
+		dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (!tdc->config_init) {
+		dev_err(tdc2dev(tdc), "DMA slave is not configured\n");
+		return NULL;
+	}
+
+	/*
+	 * We allow to take more number of requests till DMA is
+	 * not started. The driver will loop over all requests.
+	 * Once DMA is started then new requests can be queued only after
+	 * terminating the DMA.
+	 */
+	if (tdc->busy) {
+		dev_err(tdc2dev(tdc), "Request not allowed when dma running\n");
+		return NULL;
+	}
+
+	/*
+	 * We only support cycle transfer when buf_len is multiple of
+	 * period_len.
+	 */
+	if (buf_len % period_len) {
+		dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n");
+		return NULL;
+	}
+
+	len = period_len;
+	if ((len & 3) || (buf_addr & 3) ||
+			(len > tdc->tdma->chip_data->max_dma_count)) {
+		dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n");
+		return NULL;
+	}
+
+	ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw);
+	if (ret < 0)
+		return NULL;
+
+
+	ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+					TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+	csr |= TEGRA_APBDMA_CSR_FLOW | TEGRA_APBDMA_CSR_IE_EOC;
+	csr |= tdc->dma_sconfig.slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+
+	apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+	dma_desc = tegra_dma_desc_get(tdc);
+	if (!dma_desc) {
+		dev_err(tdc2dev(tdc), "not enough descriptors available\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&dma_desc->tx_list);
+	INIT_LIST_HEAD(&dma_desc->cb_node);
+	dma_desc->cb_count = 0;
+
+	dma_desc->bytes_transferred = 0;
+	dma_desc->bytes_requested = buf_len;
+	remain_len = buf_len;
+
+	/* Split transfer equal to period size */
+	while (remain_len) {
+		sg_req = tegra_dma_sg_req_get(tdc);
+		if (!sg_req) {
+			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+		sg_req->ch_regs.apb_ptr = apb_ptr;
+		sg_req->ch_regs.ahb_ptr = mem;
+		sg_req->ch_regs.csr = csr | ((len - 4) & 0xFFFC);
+		sg_req->ch_regs.apb_seq = apb_seq;
+		sg_req->ch_regs.ahb_seq = ahb_seq;
+		sg_req->configured = false;
+		sg_req->half_done = false;
+		sg_req->last_sg = false;
+		sg_req->dma_desc = dma_desc;
+		sg_req->req_len = len;
+
+		list_add_tail(&sg_req->node, &dma_desc->tx_list);
+		remain_len -= len;
+		mem += len;
+	}
+	sg_req->last_sg = true;
+	dma_desc->txd.flags = 0;
+
+	/*
+	 * Make sure that mode should not be conflicting with currently
+	 * configured mode.
+	 */
+	if (!tdc->isr_handler) {
+		tdc->isr_handler = handle_cont_sngl_cycle_dma_done;
+		tdc->cyclic = true;
+	} else {
+		if (!tdc->cyclic) {
+			dev_err(tdc2dev(tdc), "DMA configuration conflict\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+	}
+
+	return &dma_desc->txd;
+}
+
+static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+	dma_cookie_init(&tdc->dma_chan);
+	tdc->config_init = false;
+	return 0;
+}
+
+static void tegra_dma_free_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+	struct tegra_dma_desc *dma_desc;
+	struct tegra_dma_sg_req *sg_req;
+	struct list_head dma_desc_list;
+	struct list_head sg_req_list;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&dma_desc_list);
+	INIT_LIST_HEAD(&sg_req_list);
+
+	dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id);
+
+	if (tdc->busy)
+		tegra_dma_terminate_all(dc);
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	list_splice_init(&tdc->pending_sg_req, &sg_req_list);
+	list_splice_init(&tdc->free_sg_req, &sg_req_list);
+	list_splice_init(&tdc->free_dma_desc, &dma_desc_list);
+	INIT_LIST_HEAD(&tdc->cb_desc);
+	tdc->config_init = false;
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	while (!list_empty(&dma_desc_list)) {
+		dma_desc = list_first_entry(&dma_desc_list,
+					typeof(*dma_desc), node);
+		list_del(&dma_desc->node);
+		kfree(dma_desc);
+	}
+
+	while (!list_empty(&sg_req_list)) {
+		sg_req = list_first_entry(&sg_req_list, typeof(*sg_req), node);
+		list_del(&sg_req->node);
+		kfree(sg_req);
+	}
+}
+
+/* Tegra20 specific DMA controller information */
+static struct tegra_dma_chip_data tegra20_dma_chip_data = {
+	.nr_channels		= 16,
+	.max_dma_count		= 1024UL * 64,
+};
+
+#if defined(CONFIG_OF)
+/* Tegra30 specific DMA controller information */
+static struct tegra_dma_chip_data tegra30_dma_chip_data = {
+	.nr_channels		= 32,
+	.max_dma_count		= 1024UL * 64,
+};
+
+static const struct of_device_id tegra_dma_of_match[] __devinitconst = {
+	{
+		.compatible = "nvidia,tegra30-apbdma",
+		.data = &tegra30_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra20-apbdma",
+		.data = &tegra20_dma_chip_data,
+	}, {
+	},
+};
+MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
+#endif
+
+static int __devinit tegra_dma_probe(struct platform_device *pdev)
+{
+	struct resource	*res;
+	struct tegra_dma *tdma;
+	int ret;
+	int i;
+	struct tegra_dma_chip_data *cdata = NULL;
+
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_device(of_match_ptr(tegra_dma_of_match),
+					&pdev->dev);
+		if (!match) {
+			dev_err(&pdev->dev, "Error: No device match found\n");
+			return -ENODEV;
+		}
+		cdata = match->data;
+	} else {
+		/* If no device tree then fallback to tegra20 */
+		cdata = &tegra20_dma_chip_data;
+	}
+
+	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
+			sizeof(struct tegra_dma_channel), GFP_KERNEL);
+	if (!tdma) {
+		dev_err(&pdev->dev, "Error: memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	tdma->dev = &pdev->dev;
+	tdma->chip_data = cdata;
+	platform_set_drvdata(pdev, tdma);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "No mem resource for DMA\n");
+		return -EINVAL;
+	}
+
+	tdma->base_addr = devm_request_and_ioremap(&pdev->dev, res);
+	if (!tdma->base_addr) {
+		dev_err(&pdev->dev,
+			"Cannot request memregion/iomap dma address\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	tdma->dma_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(tdma->dma_clk)) {
+		dev_err(&pdev->dev, "Error: Missing controller clock\n");
+		return PTR_ERR(tdma->dma_clk);
+	}
+
+	spin_lock_init(&tdma->global_lock);
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev)) {
+		ret = tegra_dma_runtime_resume(&pdev->dev);
+		if (ret) {
+			dev_err(&pdev->dev, "dma_runtime_resume failed %d\n",
+				ret);
+			goto err_pm_disable;
+		}
+	}
+
+	/* Reset DMA controller */
+	tegra_periph_reset_assert(tdma->dma_clk);
+	udelay(2);
+	tegra_periph_reset_deassert(tdma->dma_clk);
+
+	/* Enable global DMA registers */
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+
+	INIT_LIST_HEAD(&tdma->dma_dev.channels);
+	for (i = 0; i < cdata->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		char irq_name[30];
+
+		tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
+					i * TEGRA_APBDMA_CHANNEL_REGISTER_SIZE;
+
+		res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!res) {
+			ret = -EINVAL;
+			dev_err(&pdev->dev, "No irq resource for chan %d\n", i);
+			goto err_irq;
+		}
+		tdc->irq = res->start;
+		snprintf(irq_name, sizeof(irq_name), "apbdma.%d", i);
+		ret = devm_request_irq(&pdev->dev, tdc->irq,
+				tegra_dma_isr, 0, irq_name, tdc);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"request_irq failed with err %d channel %d\n",
+				i, ret);
+			goto err_irq;
+		}
+
+		tdc->dma_chan.device = &tdma->dma_dev;
+		dma_cookie_init(&tdc->dma_chan);
+		list_add_tail(&tdc->dma_chan.device_node,
+				&tdma->dma_dev.channels);
+		tdc->tdma = tdma;
+		tdc->id = i;
+
+		tasklet_init(&tdc->tasklet, tegra_dma_tasklet,
+				(unsigned long)tdc);
+		spin_lock_init(&tdc->lock);
+
+		INIT_LIST_HEAD(&tdc->pending_sg_req);
+		INIT_LIST_HEAD(&tdc->free_sg_req);
+		INIT_LIST_HEAD(&tdc->free_dma_desc);
+		INIT_LIST_HEAD(&tdc->cb_desc);
+	}
+
+	dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+	tdma->dma_dev.dev = &pdev->dev;
+	tdma->dma_dev.device_alloc_chan_resources =
+					tegra_dma_alloc_chan_resources;
+	tdma->dma_dev.device_free_chan_resources =
+					tegra_dma_free_chan_resources;
+	tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg;
+	tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic;
+	tdma->dma_dev.device_control = tegra_dma_device_control;
+	tdma->dma_dev.device_tx_status = tegra_dma_tx_status;
+	tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending;
+
+	ret = dma_async_device_register(&tdma->dma_dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Tegra20 APB DMA driver registration failed %d\n", ret);
+		goto err_irq;
+	}
+
+	dev_info(&pdev->dev, "Tegra20 APB DMA driver register %d channels\n",
+			cdata->nr_channels);
+	return 0;
+
+err_irq:
+	while (--i >= 0) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		tasklet_kill(&tdc->tasklet);
+	}
+
+err_pm_disable:
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_dma_runtime_suspend(&pdev->dev);
+	return ret;
+}
+
+static int __devexit tegra_dma_remove(struct platform_device *pdev)
+{
+	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+	int i;
+	struct tegra_dma_channel *tdc;
+
+	dma_async_device_unregister(&tdma->dma_dev);
+
+	for (i = 0; i < tdma->chip_data->nr_channels; ++i) {
+		tdc = &tdma->channels[i];
+		tasklet_kill(&tdc->tasklet);
+	}
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_dma_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+static int tegra_dma_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(tdma->dma_clk);
+	return 0;
+}
+
+static int tegra_dma_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = clk_prepare_enable(tdma->dma_clk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_dma_dev_pm_ops __devinitconst = {
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = tegra_dma_runtime_suspend,
+	.runtime_resume = tegra_dma_runtime_resume,
+#endif
+};
+
+static struct platform_driver tegra_dmac_driver = {
+	.driver = {
+		.name	= "tegra-apbdma",
+		.owner = THIS_MODULE,
+		.pm	= &tegra_dma_dev_pm_ops,
+		.of_match_table = of_match_ptr(tegra_dma_of_match),
+	},
+	.probe		= tegra_dma_probe,
+	.remove		= __devexit_p(tegra_dma_remove),
+};
+
+module_platform_driver(tegra_dmac_driver);
+
+MODULE_ALIAS("platform:tegra20-apbdma");
+MODULE_DESCRIPTION("NVIDIA Tegra APB DMA Controller driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index b2af7136cd27445d33225e36d5314005dcfb39b6..5d8142773fac073b75cf56db111ecde2d398717d 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -213,8 +213,6 @@ struct sh_mmcif_host {
 	struct mmc_host *mmc;
 	struct mmc_request *mrq;
 	struct platform_device *pd;
-	struct sh_dmae_slave dma_slave_tx;
-	struct sh_dmae_slave dma_slave_rx;
 	struct clk *hclk;
 	unsigned int clk;
 	int bus_width;
@@ -373,59 +371,69 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
 		desc, cookie);
 }
 
-static bool sh_mmcif_filter(struct dma_chan *chan, void *arg)
-{
-	dev_dbg(chan->device->dev, "%s: slave data %p\n", __func__, arg);
-	chan->private = arg;
-	return true;
-}
-
 static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
 				 struct sh_mmcif_plat_data *pdata)
 {
-	struct sh_dmae_slave *tx, *rx;
+	struct resource *res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
+	struct dma_slave_config cfg;
+	dma_cap_mask_t mask;
+	int ret;
+
 	host->dma_active = false;
 
 	if (!pdata)
 		return;
 
+	if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0)
+		return;
+
 	/* We can only either use DMA for both Tx and Rx or not use it at all */
-	if (pdata->dma) {
-		dev_warn(&host->pd->dev,
-			 "Update your platform to use embedded DMA slave IDs\n");
-		tx = &pdata->dma->chan_priv_tx;
-		rx = &pdata->dma->chan_priv_rx;
-	} else {
-		tx = &host->dma_slave_tx;
-		tx->slave_id = pdata->slave_id_tx;
-		rx = &host->dma_slave_rx;
-		rx->slave_id = pdata->slave_id_rx;
-	}
-	if (tx->slave_id > 0 && rx->slave_id > 0) {
-		dma_cap_mask_t mask;
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
 
-		dma_cap_zero(mask);
-		dma_cap_set(DMA_SLAVE, mask);
+	host->chan_tx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_tx);
+	dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__,
+		host->chan_tx);
 
-		host->chan_tx = dma_request_channel(mask, sh_mmcif_filter, tx);
-		dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__,
-			host->chan_tx);
+	if (!host->chan_tx)
+		return;
 
-		if (!host->chan_tx)
-			return;
+	cfg.slave_id = pdata->slave_id_tx;
+	cfg.direction = DMA_MEM_TO_DEV;
+	cfg.dst_addr = res->start + MMCIF_CE_DATA;
+	cfg.src_addr = 0;
+	ret = dmaengine_slave_config(host->chan_tx, &cfg);
+	if (ret < 0)
+		goto ecfgtx;
 
-		host->chan_rx = dma_request_channel(mask, sh_mmcif_filter, rx);
-		dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__,
-			host->chan_rx);
+	host->chan_rx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_rx);
+	dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__,
+		host->chan_rx);
 
-		if (!host->chan_rx) {
-			dma_release_channel(host->chan_tx);
-			host->chan_tx = NULL;
-			return;
-		}
+	if (!host->chan_rx)
+		goto erqrx;
 
-		init_completion(&host->dma_complete);
-	}
+	cfg.slave_id = pdata->slave_id_rx;
+	cfg.direction = DMA_DEV_TO_MEM;
+	cfg.dst_addr = 0;
+	cfg.src_addr = res->start + MMCIF_CE_DATA;
+	ret = dmaengine_slave_config(host->chan_rx, &cfg);
+	if (ret < 0)
+		goto ecfgrx;
+
+	init_completion(&host->dma_complete);
+
+	return;
+
+ecfgrx:
+	dma_release_channel(host->chan_rx);
+	host->chan_rx = NULL;
+erqrx:
+ecfgtx:
+	dma_release_channel(host->chan_tx);
+	host->chan_tx = NULL;
 }
 
 static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index a842939e46555295f32418ac05f96c495d734ea2..0bdc146178dbcee41aa55c1f27754bb36c256d37 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -169,10 +169,10 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 			mmc_data->get_cd = sh_mobile_sdhi_get_cd;
 
 		if (p->dma_slave_tx > 0 && p->dma_slave_rx > 0) {
-			priv->param_tx.slave_id = p->dma_slave_tx;
-			priv->param_rx.slave_id = p->dma_slave_rx;
-			priv->dma_priv.chan_priv_tx = &priv->param_tx;
-			priv->dma_priv.chan_priv_rx = &priv->param_rx;
+			priv->param_tx.shdma_slave.slave_id = p->dma_slave_tx;
+			priv->param_rx.shdma_slave.slave_id = p->dma_slave_rx;
+			priv->dma_priv.chan_priv_tx = &priv->param_tx.shdma_slave;
+			priv->dma_priv.chan_priv_rx = &priv->param_rx.shdma_slave;
 			priv->dma_priv.alignment_shift = 1; /* 2-byte alignment */
 			mmc_data->dma = &priv->dma_priv;
 		}
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 1bd9163bc1181eb0aaeae952acfaf910911b3720..d4d8c9453cd8c8b3e01a056f2ce3a2e20926412d 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1615,9 +1615,9 @@ static bool filter(struct dma_chan *chan, void *slave)
 	struct sh_dmae_slave *param = slave;
 
 	dev_dbg(chan->device->dev, "%s: slave ID %d\n", __func__,
-		param->slave_id);
+		param->shdma_slave.slave_id);
 
-	chan->private = param;
+	chan->private = &param->shdma_slave;
 	return true;
 }
 
@@ -1656,7 +1656,7 @@ static void sci_request_dma(struct uart_port *port)
 	param = &s->param_tx;
 
 	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_TX */
-	param->slave_id = s->cfg->dma_slave_tx;
+	param->shdma_slave.slave_id = s->cfg->dma_slave_tx;
 
 	s->cookie_tx = -EINVAL;
 	chan = dma_request_channel(mask, filter, param);
@@ -1684,7 +1684,7 @@ static void sci_request_dma(struct uart_port *port)
 	param = &s->param_rx;
 
 	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_RX */
-	param->slave_id = s->cfg->dma_slave_rx;
+	param->shdma_slave.slave_id = s->cfg->dma_slave_rx;
 
 	chan = dma_request_channel(mask, filter, param);
 	dev_dbg(port->dev, "%s: RX: got channel %p\n", __func__, chan);
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 08313574aac8719aaf1f1b85b1c339cd25f23123..30b757a3f59e1e70c9a1478687e66f15267510b3 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -994,7 +994,7 @@ static bool usbhsf_dma_filter(struct dma_chan *chan, void *param)
 	 *
 	 * usbhs doesn't recognize id = 0 as valid DMA
 	 */
-	if (0 == slave->slave_id)
+	if (0 == slave->shdma_slave.slave_id)
 		return false;
 
 	chan->private = slave;
@@ -1173,8 +1173,8 @@ int usbhs_fifo_probe(struct usbhs_priv *priv)
 	fifo->port	= D0FIFO;
 	fifo->sel	= D0FIFOSEL;
 	fifo->ctr	= D0FIFOCTR;
-	fifo->tx_slave.slave_id	= usbhs_get_dparam(priv, d0_tx_id);
-	fifo->rx_slave.slave_id	= usbhs_get_dparam(priv, d0_rx_id);
+	fifo->tx_slave.shdma_slave.slave_id	= usbhs_get_dparam(priv, d0_tx_id);
+	fifo->rx_slave.shdma_slave.slave_id	= usbhs_get_dparam(priv, d0_rx_id);
 
 	/* D1FIFO */
 	fifo = usbhsf_get_d1fifo(priv);
@@ -1182,8 +1182,8 @@ int usbhs_fifo_probe(struct usbhs_priv *priv)
 	fifo->port	= D1FIFO;
 	fifo->sel	= D1FIFOSEL;
 	fifo->ctr	= D1FIFOCTR;
-	fifo->tx_slave.slave_id	= usbhs_get_dparam(priv, d1_tx_id);
-	fifo->rx_slave.slave_id	= usbhs_get_dparam(priv, d1_rx_id);
+	fifo->tx_slave.shdma_slave.slave_id	= usbhs_get_dparam(priv, d1_tx_id);
+	fifo->rx_slave.shdma_slave.slave_id	= usbhs_get_dparam(priv, d1_rx_id);
 
 	return 0;
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index cc0756a35ae3bbf2339ca39937260b90fc6d0fd0..9c02a4508b25380123ab6e2b24c6879645b2961a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -338,6 +338,9 @@ enum dma_slave_buswidth {
  * @device_fc: Flow Controller Settings. Only valid for slave channels. Fill
  * with 'true' if peripheral should be flow controller. Direction will be
  * selected at Runtime.
+ * @slave_id: Slave requester id. Only valid for slave channels. The dma
+ * slave peripheral will have unique id as dma requester which need to be
+ * pass as slave config.
  *
  * This struct is passed in as configuration data to a DMA engine
  * in order to set up a certain channel for DMA transport at runtime.
@@ -365,6 +368,7 @@ struct dma_slave_config {
 	u32 src_maxburst;
 	u32 dst_maxburst;
 	bool device_fc;
+	unsigned int slave_id;
 };
 
 static inline const char *dma_chan_name(struct dma_chan *chan)
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index c2f73cbb4d5cb14912a01efa59983b21e5ff081a..e7d5dd67bb74fa42fbac10a67840667bc6c8e062 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -32,17 +32,11 @@
  * 1111 : Peripheral clock (sup_pclk set '1')
  */
 
-struct sh_mmcif_dma {
-	struct sh_dmae_slave chan_priv_tx;
-	struct sh_dmae_slave chan_priv_rx;
-};
-
 struct sh_mmcif_plat_data {
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	void (*down_pwr)(struct platform_device *pdev);
 	int (*get_cd)(struct platform_device *pdef);
-	struct sh_mmcif_dma	*dma;		/* Deprecated. Instead */
-	unsigned int		slave_id_tx;	/* use embedded slave_id_[tr]x */
+	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
 	bool			use_cd_gpio : 1;
 	unsigned int		cd_gpio;
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 425450b980b8a8e60ea532b76702d552c3da6857..b64d6bec6f903637617f06f153547295336d4133 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -10,38 +10,27 @@
 #ifndef SH_DMA_H
 #define SH_DMA_H
 
-#include <linux/list.h>
 #include <linux/dmaengine.h>
+#include <linux/list.h>
+#include <linux/shdma-base.h>
+#include <linux/types.h>
+
+struct device;
 
 /* Used by slave DMA clients to request DMA to/from a specific peripheral */
 struct sh_dmae_slave {
-	unsigned int			slave_id; /* Set by the platform */
-	struct device			*dma_dev; /* Set by the platform */
-	const struct sh_dmae_slave_config	*config;  /* Set by the driver */
-};
-
-struct sh_dmae_regs {
-	u32 sar; /* SAR / source address */
-	u32 dar; /* DAR / destination address */
-	u32 tcr; /* TCR / transfer count */
-};
-
-struct sh_desc {
-	struct sh_dmae_regs hw;
-	struct list_head node;
-	struct dma_async_tx_descriptor async_tx;
-	enum dma_transfer_direction direction;
-	dma_cookie_t cookie;
-	size_t partial;
-	int chunks;
-	int mark;
+	struct shdma_slave		shdma_slave;	/* Set by the platform */
 };
 
+/*
+ * Supplied by platforms to specify, how a DMA channel has to be configured for
+ * a certain peripheral
+ */
 struct sh_dmae_slave_config {
-	unsigned int			slave_id;
-	dma_addr_t			addr;
-	u32				chcr;
-	char				mid_rid;
+	int		slave_id;
+	dma_addr_t	addr;
+	u32		chcr;
+	char		mid_rid;
 };
 
 struct sh_dmae_channel {
@@ -110,4 +99,6 @@ struct sh_dmae_pdata {
 #define CHCR_TE	0x00000002
 #define CHCR_IE	0x00000004
 
+bool shdma_chan_filter(struct dma_chan *chan, void *arg);
+
 #endif
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
new file mode 100644
index 0000000000000000000000000000000000000000..93f9821554b6792adc3a2a435de89449c3b0e3ef
--- /dev/null
+++ b/include/linux/shdma-base.h
@@ -0,0 +1,124 @@
+/*
+ * Dmaengine driver base library for DMA controllers, found on SH-based SoCs
+ *
+ * extracted from shdma.c and headers
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SHDMA_BASE_H
+#define SHDMA_BASE_H
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+/**
+ * shdma_pm_state - DMA channel PM state
+ * SHDMA_PM_ESTABLISHED:	either idle or during data transfer
+ * SHDMA_PM_BUSY:		during the transfer preparation, when we have to
+ *				drop the lock temporarily
+ * SHDMA_PM_PENDING:	transfers pending
+ */
+enum shdma_pm_state {
+	SHDMA_PM_ESTABLISHED,
+	SHDMA_PM_BUSY,
+	SHDMA_PM_PENDING,
+};
+
+struct device;
+
+/*
+ * Drivers, using this library are expected to embed struct shdma_dev,
+ * struct shdma_chan, struct shdma_desc, and struct shdma_slave
+ * in their respective device, channel, descriptor and slave objects.
+ */
+
+struct shdma_slave {
+	int slave_id;
+};
+
+struct shdma_desc {
+	struct list_head node;
+	struct dma_async_tx_descriptor async_tx;
+	enum dma_transfer_direction direction;
+	dma_cookie_t cookie;
+	int chunks;
+	int mark;
+};
+
+struct shdma_chan {
+	spinlock_t chan_lock;		/* Channel operation lock */
+	struct list_head ld_queue;	/* Link descriptors queue */
+	struct list_head ld_free;	/* Free link descriptors */
+	struct dma_chan dma_chan;	/* DMA channel */
+	struct device *dev;		/* Channel device */
+	void *desc;			/* buffer for descriptor array */
+	int desc_num;			/* desc count */
+	size_t max_xfer_len;		/* max transfer length */
+	int id;				/* Raw id of this channel */
+	int irq;			/* Channel IRQ */
+	int slave_id;			/* Client ID for slave DMA */
+	enum shdma_pm_state pm_state;
+};
+
+/**
+ * struct shdma_ops - simple DMA driver operations
+ * desc_completed:	return true, if this is the descriptor, that just has
+ *			completed (atomic)
+ * halt_channel:	stop DMA channel operation (atomic)
+ * channel_busy:	return true, if the channel is busy (atomic)
+ * slave_addr:		return slave DMA address
+ * desc_setup:		set up the hardware specific descriptor portion (atomic)
+ * set_slave:		bind channel to a slave
+ * setup_xfer:		configure channel hardware for operation (atomic)
+ * start_xfer:		start the DMA transfer (atomic)
+ * embedded_desc:	return Nth struct shdma_desc pointer from the
+ *			descriptor array
+ * chan_irq:		process channel IRQ, return true if a transfer has
+ *			completed (atomic)
+ */
+struct shdma_ops {
+	bool (*desc_completed)(struct shdma_chan *, struct shdma_desc *);
+	void (*halt_channel)(struct shdma_chan *);
+	bool (*channel_busy)(struct shdma_chan *);
+	dma_addr_t (*slave_addr)(struct shdma_chan *);
+	int (*desc_setup)(struct shdma_chan *, struct shdma_desc *,
+			  dma_addr_t, dma_addr_t, size_t *);
+	int (*set_slave)(struct shdma_chan *, int, bool);
+	void (*setup_xfer)(struct shdma_chan *, int);
+	void (*start_xfer)(struct shdma_chan *, struct shdma_desc *);
+	struct shdma_desc *(*embedded_desc)(void *, int);
+	bool (*chan_irq)(struct shdma_chan *, int);
+};
+
+struct shdma_dev {
+	struct dma_device dma_dev;
+	struct shdma_chan **schan;
+	const struct shdma_ops *ops;
+	size_t desc_size;
+};
+
+#define shdma_for_each_chan(c, d, i) for (i = 0, c = (d)->schan[0]; \
+				i < (d)->dma_dev.chancnt; c = (d)->schan[++i])
+
+int shdma_request_irq(struct shdma_chan *, int,
+			   unsigned long, const char *);
+void shdma_free_irq(struct shdma_chan *);
+bool shdma_reset(struct shdma_dev *sdev);
+void shdma_chan_probe(struct shdma_dev *sdev,
+			   struct shdma_chan *schan, int id);
+void shdma_chan_remove(struct shdma_chan *schan);
+int shdma_init(struct device *dev, struct shdma_dev *sdev,
+		    int chan_num);
+void shdma_cleanup(struct shdma_dev *sdev);
+
+#endif
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 53486ff9c2afab75064de91599a2e1297a744359..0540408a9fa9ab706c28e7ef6ed60a329bec027c 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -1631,8 +1631,8 @@ static void fsi_handler_init(struct fsi_priv *fsi)
 	fsi->capture.priv	= fsi;
 
 	if (fsi->info->tx_id) {
-		fsi->playback.slave.slave_id	= fsi->info->tx_id;
-		fsi->playback.handler		= &fsi_dma_push_handler;
+		fsi->playback.slave.shdma_slave.slave_id = fsi->info->tx_id;
+		fsi->playback.handler = &fsi_dma_push_handler;
 	}
 }
 
diff --git a/sound/soc/sh/siu_pcm.c b/sound/soc/sh/siu_pcm.c
index 5cfcc655e95ffe33670c68e0e998e7a1ceec17af..488f9becb44f3a6653dd66a72e1881d8c2168694 100644
--- a/sound/soc/sh/siu_pcm.c
+++ b/sound/soc/sh/siu_pcm.c
@@ -330,12 +330,9 @@ static bool filter(struct dma_chan *chan, void *slave)
 {
 	struct sh_dmae_slave *param = slave;
 
-	pr_debug("%s: slave ID %d\n", __func__, param->slave_id);
+	pr_debug("%s: slave ID %d\n", __func__, param->shdma_slave.slave_id);
 
-	if (unlikely(param->dma_dev != chan->device->dev))
-		return false;
-
-	chan->private = param;
+	chan->private = &param->shdma_slave;
 	return true;
 }
 
@@ -360,16 +357,15 @@ static int siu_pcm_open(struct snd_pcm_substream *ss)
 	if (ss->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		siu_stream = &port_info->playback;
 		param = &siu_stream->param;
-		param->slave_id = port ? pdata->dma_slave_tx_b :
+		param->shdma_slave.slave_id = port ? pdata->dma_slave_tx_b :
 			pdata->dma_slave_tx_a;
 	} else {
 		siu_stream = &port_info->capture;
 		param = &siu_stream->param;
-		param->slave_id = port ? pdata->dma_slave_rx_b :
+		param->shdma_slave.slave_id = port ? pdata->dma_slave_rx_b :
 			pdata->dma_slave_rx_a;
 	}
 
-	param->dma_dev = pdata->dma_dev;
 	/* Get DMA channel */
 	siu_stream->chan = dma_request_channel(mask, filter, param);
 	if (!siu_stream->chan) {