diff --git a/arch/arm/mach-exynos/dma.c b/arch/arm/mach-exynos/dma.c
index f60b66dbcf84ebe5ebe750e3e89521724d7a8d73..21d568b3b1497c90bb79eb841f45595020351db9 100644
--- a/arch/arm/mach-exynos/dma.c
+++ b/arch/arm/mach-exynos/dma.c
@@ -303,10 +303,12 @@ static int __init exynos_dma_init(void)
 
 	dma_cap_set(DMA_SLAVE, exynos_pdma0_pdata.cap_mask);
 	dma_cap_set(DMA_CYCLIC, exynos_pdma0_pdata.cap_mask);
+	dma_cap_set(DMA_PRIVATE, exynos_pdma0_pdata.cap_mask);
 	amba_device_register(&exynos_pdma0_device, &iomem_resource);
 
 	dma_cap_set(DMA_SLAVE, exynos_pdma1_pdata.cap_mask);
 	dma_cap_set(DMA_CYCLIC, exynos_pdma1_pdata.cap_mask);
+	dma_cap_set(DMA_PRIVATE, exynos_pdma1_pdata.cap_mask);
 	amba_device_register(&exynos_pdma1_device, &iomem_resource);
 
 	dma_cap_set(DMA_MEMCPY, exynos_mdma1_pdata.cap_mask);
diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c
index e10648801b2ecd3560fec71f7e2dc3681d1e8bce..5633d698f1e189c29d04eb09f8f18276f9d85b73 100644
--- a/arch/arm/mach-spear13xx/spear13xx.c
+++ b/arch/arm/mach-spear13xx/spear13xx.c
@@ -78,6 +78,9 @@ struct dw_dma_platform_data dmac_plat_data = {
 	.nr_channels = 8,
 	.chan_allocation_order = CHAN_ALLOCATION_DESCENDING,
 	.chan_priority = CHAN_PRIORITY_DESCENDING,
+	.block_size = 4095U,
+	.nr_masters = 2,
+	.data_width = { 3, 3, 0, 0 },
 };
 
 void __init spear13xx_l2x0_init(void)
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 0445c4fd67e311e864cf6f9d8e70f4ef3bb530f6..b323d8d3185b35b91a21da1857bcd340d935449c 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -605,6 +605,9 @@ static void __init genclk_init_parent(struct clk *clk)
 
 static struct dw_dma_platform_data dw_dmac0_data = {
 	.nr_channels	= 3,
+	.block_size	= 4095U,
+	.nr_masters	= 2,
+	.data_width	= { 2, 2, 0, 0 },
 };
 
 static struct resource dw_dmac0_resource[] = {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d06ea2950dd9ffb9ad266bcedbb2db934e020376..677cd6e4e1a1c1afb387217fdc9f0f363c3149cf 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -208,6 +208,16 @@ config SIRF_DMA
 	help
 	  Enable support for the CSR SiRFprimaII DMA engine.
 
+config TI_EDMA
+	tristate "TI EDMA support"
+	depends on ARCH_DAVINCI
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	default n
+	help
+	  Enable support for the TI EDMA controller. This DMA
+	  engine is found on TI DaVinci and AM33xx parts.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
@@ -292,6 +302,13 @@ config DMA_OMAP
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 
+config MMP_PDMA
+	bool "MMP PDMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
+	select DMA_ENGINE
+	help
+	  Support the MMP PDMA engine for PXA and MMP platfrom.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 4cf6b128ab9a466b8f4c2c5237064f0f8e80846f..7428feaa8705393e13a7425ea5259dc8eabd4d30 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
+obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
@@ -32,3 +33,4 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6fbeebb9486fdae644a0268393c7650033b856b4..d1cc5791476bc39c700f9ab0913eb5f3e50d1bda 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1892,6 +1892,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->pd = dev_get_platdata(&adev->dev);
 	if (!pl08x->pd) {
 		dev_err(&adev->dev, "no platform data supplied\n");
+		ret = -EINVAL;
 		goto out_no_platdata;
 	}
 
@@ -1943,6 +1944,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		dev_err(&adev->dev, "%s failed to allocate "
 			"physical channel holders\n",
 			__func__);
+		ret = -ENOMEM;
 		goto out_no_phychans;
 	}
 
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index d3c5a5a88f1e9af91b97848f6d9c107ac2010da5..c4b0eb3cde819977fbf2ac45cfb1b588ccc04f37 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -36,12 +36,22 @@
  * which does not support descriptor writeback.
  */
 
+static inline unsigned int dwc_get_dms(struct dw_dma_slave *slave)
+{
+	return slave ? slave->dst_master : 0;
+}
+
+static inline unsigned int dwc_get_sms(struct dw_dma_slave *slave)
+{
+	return slave ? slave->src_master : 1;
+}
+
 #define DWC_DEFAULT_CTLLO(_chan) ({				\
 		struct dw_dma_slave *__slave = (_chan->private);	\
 		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
 		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
-		int _dms = __slave ? __slave->dst_master : 0;	\
-		int _sms = __slave ? __slave->src_master : 1;	\
+		int _dms = dwc_get_dms(__slave);		\
+		int _sms = dwc_get_sms(__slave);		\
 		u8 _smsize = __slave ? _sconfig->src_maxburst :	\
 			DW_DMA_MSIZE_16;			\
 		u8 _dmsize = __slave ? _sconfig->dst_maxburst :	\
@@ -55,16 +65,6 @@
 		 | DWC_CTLL_SMS(_sms));				\
 	})
 
-/*
- * This is configuration-dependent and usually a funny size like 4095.
- *
- * Note that this is a transfer count, i.e. if we transfer 32-bit
- * words, we can do 16380 bytes per descriptor.
- *
- * This parameter is also system-specific.
- */
-#define DWC_MAX_COUNT	4095U
-
 /*
  * Number of descriptors to allocate for each channel. This should be
  * made configurable somehow; preferably, the clients (at least the
@@ -177,6 +177,11 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 		cfghi = dws->cfg_hi;
 		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+	} else {
+		if (dwc->dma_sconfig.direction == DMA_MEM_TO_DEV)
+			cfghi = DWC_CFGH_DST_PER(dwc->dma_sconfig.slave_id);
+		else if (dwc->dma_sconfig.direction == DMA_DEV_TO_MEM)
+			cfghi = DWC_CFGH_SRC_PER(dwc->dma_sconfig.slave_id);
 	}
 
 	channel_writel(dwc, CFG_LO, cfglo);
@@ -206,7 +211,7 @@ static inline unsigned int dwc_fast_fls(unsigned long long v)
 	return 0;
 }
 
-static void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
 {
 	dev_err(chan2dev(&dwc->chan),
 		"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
@@ -227,10 +232,29 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
 
 /*----------------------------------------------------------------------*/
 
+/* Perform single block transfer */
+static inline void dwc_do_single_block(struct dw_dma_chan *dwc,
+				       struct dw_desc *desc)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	u32		ctllo;
+
+	/* Software emulation of LLP mode relies on interrupts to continue
+	 * multi block transfer. */
+	ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
+
+	channel_writel(dwc, SAR, desc->lli.sar);
+	channel_writel(dwc, DAR, desc->lli.dar);
+	channel_writel(dwc, CTL_LO, ctllo);
+	channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
+	channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
 /* Called with dwc->lock held and bh disabled */
 static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 {
 	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	unsigned long	was_soft_llp;
 
 	/* ASSERT:  channel is idle */
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
@@ -242,6 +266,26 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 		return;
 	}
 
+	if (dwc->nollp) {
+		was_soft_llp = test_and_set_bit(DW_DMA_IS_SOFT_LLP,
+						&dwc->flags);
+		if (was_soft_llp) {
+			dev_err(chan2dev(&dwc->chan),
+				"BUG: Attempted to start new LLP transfer "
+				"inside ongoing one\n");
+			return;
+		}
+
+		dwc_initialize(dwc);
+
+		dwc->tx_list = &first->tx_list;
+		dwc->tx_node_active = first->tx_list.next;
+
+		dwc_do_single_block(dwc, first);
+
+		return;
+	}
+
 	dwc_initialize(dwc);
 
 	channel_writel(dwc, LLP, first->txd.phys);
@@ -553,8 +597,36 @@ static void dw_dma_tasklet(unsigned long data)
 			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
-		else if (status_xfer & (1 << i))
+		else if (status_xfer & (1 << i)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&dwc->lock, flags);
+			if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+				if (dwc->tx_node_active != dwc->tx_list) {
+					struct dw_desc *desc =
+						list_entry(dwc->tx_node_active,
+							   struct dw_desc,
+							   desc_node);
+
+					dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+					/* move pointer to next descriptor */
+					dwc->tx_node_active =
+						dwc->tx_node_active->next;
+
+					dwc_do_single_block(dwc, desc);
+
+					spin_unlock_irqrestore(&dwc->lock, flags);
+					continue;
+				} else {
+					/* we are done here */
+					clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+				}
+			}
+			spin_unlock_irqrestore(&dwc->lock, flags);
+
 			dwc_scan_descriptors(dw, dwc);
+		}
 	}
 
 	/*
@@ -636,6 +708,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma_slave	*dws = chan->private;
 	struct dw_desc		*desc;
 	struct dw_desc		*first;
 	struct dw_desc		*prev;
@@ -643,6 +716,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	size_t			offset;
 	unsigned int		src_width;
 	unsigned int		dst_width;
+	unsigned int		data_width;
 	u32			ctllo;
 
 	dev_vdbg(chan2dev(chan),
@@ -655,7 +729,11 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		return NULL;
 	}
 
-	src_width = dst_width = dwc_fast_fls(src | dest | len);
+	data_width = min_t(unsigned int, dwc->dw->data_width[dwc_get_sms(dws)],
+					 dwc->dw->data_width[dwc_get_dms(dws)]);
+
+	src_width = dst_width = min_t(unsigned int, data_width,
+				      dwc_fast_fls(src | dest | len));
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
@@ -667,7 +745,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 	for (offset = 0; offset < len; offset += xfer_count << src_width) {
 		xfer_count = min_t(size_t, (len - offset) >> src_width,
-				DWC_MAX_COUNT);
+					   dwc->block_size);
 
 		desc = dwc_desc_get(dwc);
 		if (!desc)
@@ -725,6 +803,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	dma_addr_t		reg;
 	unsigned int		reg_width;
 	unsigned int		mem_width;
+	unsigned int		data_width;
 	unsigned int		i;
 	struct scatterlist	*sg;
 	size_t			total_len = 0;
@@ -748,6 +827,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
 			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
 
+		data_width = dwc->dw->data_width[dwc_get_sms(dws)];
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -755,7 +836,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = dwc_fast_fls(mem | len);
+			mem_width = min_t(unsigned int,
+					  data_width, dwc_fast_fls(mem | len));
 
 slave_sg_todev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -768,8 +850,8 @@ slave_sg_todev_fill_desc:
 			desc->lli.sar = mem;
 			desc->lli.dar = reg;
 			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
-			if ((len >> mem_width) > DWC_MAX_COUNT) {
-				dlen = DWC_MAX_COUNT << mem_width;
+			if ((len >> mem_width) > dwc->block_size) {
+				dlen = dwc->block_size << mem_width;
 				mem += dlen;
 				len -= dlen;
 			} else {
@@ -808,6 +890,8 @@ slave_sg_todev_fill_desc:
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
 			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
+		data_width = dwc->dw->data_width[dwc_get_dms(dws)];
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
@@ -815,7 +899,8 @@ slave_sg_todev_fill_desc:
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = dwc_fast_fls(mem | len);
+			mem_width = min_t(unsigned int,
+					  data_width, dwc_fast_fls(mem | len));
 
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -828,8 +913,8 @@ slave_sg_fromdev_fill_desc:
 			desc->lli.sar = reg;
 			desc->lli.dar = mem;
 			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
-			if ((len >> reg_width) > DWC_MAX_COUNT) {
-				dlen = DWC_MAX_COUNT << reg_width;
+			if ((len >> reg_width) > dwc->block_size) {
+				dlen = dwc->block_size << reg_width;
 				mem += dlen;
 				len -= dlen;
 			} else {
@@ -945,6 +1030,8 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	} else if (cmd == DMA_TERMINATE_ALL) {
 		spin_lock_irqsave(&dwc->lock, flags);
 
+		clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+
 		dwc_chan_disable(dw, dwc);
 
 		dwc->paused = false;
@@ -1187,6 +1274,13 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	unsigned long			flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
+	if (dwc->nollp) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dev_dbg(chan2dev(&dwc->chan),
+				"channel doesn't support LLP transfers\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		dev_dbg(chan2dev(&dwc->chan),
@@ -1212,7 +1306,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	periods = buf_len / period_len;
 
 	/* Check for too big/unaligned periods and unaligned DMA buffer. */
-	if (period_len > (DWC_MAX_COUNT << reg_width))
+	if (period_len > (dwc->block_size << reg_width))
 		goto out_err;
 	if (unlikely(period_len & ((1 << reg_width) - 1)))
 		goto out_err;
@@ -1374,6 +1468,11 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	struct resource		*io;
 	struct dw_dma		*dw;
 	size_t			size;
+	void __iomem		*regs;
+	bool			autocfg;
+	unsigned int		dw_params;
+	unsigned int		nr_channels;
+	unsigned int		max_blk_size = 0;
 	int			irq;
 	int			err;
 	int			i;
@@ -1390,32 +1489,46 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	size = sizeof(struct dw_dma);
-	size += pdata->nr_channels * sizeof(struct dw_dma_chan);
-	dw = kzalloc(size, GFP_KERNEL);
+	regs = devm_request_and_ioremap(&pdev->dev, io);
+	if (!regs)
+		return -EBUSY;
+
+	dw_params = dma_read_byaddr(regs, DW_PARAMS);
+	autocfg = dw_params >> DW_PARAMS_EN & 0x1;
+
+	if (autocfg)
+		nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 0x7) + 1;
+	else
+		nr_channels = pdata->nr_channels;
+
+	size = sizeof(struct dw_dma) + nr_channels * sizeof(struct dw_dma_chan);
+	dw = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
 	if (!dw)
 		return -ENOMEM;
 
-	if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
-		err = -EBUSY;
-		goto err_kfree;
-	}
+	dw->clk = devm_clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(dw->clk))
+		return PTR_ERR(dw->clk);
+	clk_prepare_enable(dw->clk);
 
-	dw->regs = ioremap(io->start, DW_REGLEN);
-	if (!dw->regs) {
-		err = -ENOMEM;
-		goto err_release_r;
-	}
+	dw->regs = regs;
+
+	/* get hardware configuration parameters */
+	if (autocfg) {
+		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
 
-	dw->clk = clk_get(&pdev->dev, "hclk");
-	if (IS_ERR(dw->clk)) {
-		err = PTR_ERR(dw->clk);
-		goto err_clk;
+		dw->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+		for (i = 0; i < dw->nr_masters; i++) {
+			dw->data_width[i] =
+				(dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
+		}
+	} else {
+		dw->nr_masters = pdata->nr_masters;
+		memcpy(dw->data_width, pdata->data_width, 4);
 	}
-	clk_prepare_enable(dw->clk);
 
 	/* Calculate all channel mask before DMA setup */
-	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+	dw->all_chan_mask = (1 << nr_channels) - 1;
 
 	/* force dma off, just in case */
 	dw_dma_off(dw);
@@ -1423,17 +1536,19 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	/* disable BLOCK interrupts as well */
 	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 
-	err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+	err = devm_request_irq(&pdev->dev, irq, dw_dma_interrupt, 0,
+			       "dw_dmac", dw);
 	if (err)
-		goto err_irq;
+		return err;
 
 	platform_set_drvdata(pdev, dw);
 
 	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
 
 	INIT_LIST_HEAD(&dw->dma.channels);
-	for (i = 0; i < pdata->nr_channels; i++) {
+	for (i = 0; i < nr_channels; i++) {
 		struct dw_dma_chan	*dwc = &dw->chan[i];
+		int			r = nr_channels - i - 1;
 
 		dwc->chan.device = &dw->dma;
 		dma_cookie_init(&dwc->chan);
@@ -1445,7 +1560,7 @@ static int __devinit dw_probe(struct platform_device *pdev)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = pdata->nr_channels - i - 1;
+			dwc->priority = r;
 		else
 			dwc->priority = i;
 
@@ -1458,6 +1573,32 @@ static int __devinit dw_probe(struct platform_device *pdev)
 		INIT_LIST_HEAD(&dwc->free_list);
 
 		channel_clear_bit(dw, CH_EN, dwc->mask);
+
+		dwc->dw = dw;
+
+		/* hardware configuration */
+		if (autocfg) {
+			unsigned int dwc_params;
+
+			dwc_params = dma_read_byaddr(regs + r * sizeof(u32),
+						     DWC_PARAMS);
+
+			/* Decode maximum block size for given channel. The
+			 * stored 4 bit value represents blocks from 0x00 for 3
+			 * up to 0x0a for 4095. */
+			dwc->block_size =
+				(4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
+			dwc->nollp =
+				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+		} else {
+			dwc->block_size = pdata->block_size;
+
+			/* Check if channel supports multi block transfer */
+			channel_writel(dwc, LLP, 0xfffffffc);
+			dwc->nollp =
+				(channel_readl(dwc, LLP) & 0xfffffffc) == 0;
+			channel_writel(dwc, LLP, 0);
+		}
 	}
 
 	/* Clear all interrupts on all channels. */
@@ -1486,35 +1627,21 @@ static int __devinit dw_probe(struct platform_device *pdev)
 	dma_writel(dw, CFG, DW_CFG_DMA_EN);
 
 	printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
-			dev_name(&pdev->dev), pdata->nr_channels);
+			dev_name(&pdev->dev), nr_channels);
 
 	dma_async_device_register(&dw->dma);
 
 	return 0;
-
-err_irq:
-	clk_disable_unprepare(dw->clk);
-	clk_put(dw->clk);
-err_clk:
-	iounmap(dw->regs);
-	dw->regs = NULL;
-err_release_r:
-	release_resource(io);
-err_kfree:
-	kfree(dw);
-	return err;
 }
 
 static int __devexit dw_remove(struct platform_device *pdev)
 {
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
-	struct resource		*io;
 
 	dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
-	free_irq(platform_get_irq(pdev, 0), dw);
 	tasklet_kill(&dw->tasklet);
 
 	list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
@@ -1523,17 +1650,6 @@ static int __devexit dw_remove(struct platform_device *pdev)
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 	}
 
-	clk_disable_unprepare(dw->clk);
-	clk_put(dw->clk);
-
-	iounmap(dw->regs);
-	dw->regs = NULL;
-
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(io->start, DW_REGLEN);
-
-	kfree(dw);
-
 	return 0;
 }
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 50830bee087a65eac6178e9111b72db9cbe0c49a..ff39fa6cd2bc443a7d96fd9d85be015f643704f5 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -82,9 +82,39 @@ struct dw_dma_regs {
 	DW_REG(ID);
 	DW_REG(TEST);
 
+	/* reserved */
+	DW_REG(__reserved0);
+	DW_REG(__reserved1);
+
 	/* optional encoded params, 0x3c8..0x3f7 */
+	u32	__reserved;
+
+	/* per-channel configuration registers */
+	u32	DWC_PARAMS[DW_DMA_MAX_NR_CHANNELS];
+	u32	MULTI_BLK_TYPE;
+	u32	MAX_BLK_SIZE;
+
+	/* top-level parameters */
+	u32	DW_PARAMS;
 };
 
+/* To access the registers in early stage of probe */
+#define dma_read_byaddr(addr, name) \
+	readl((addr) + offsetof(struct dw_dma_regs, name))
+
+/* Bitfields in DW_PARAMS */
+#define DW_PARAMS_NR_CHAN	8		/* number of channels */
+#define DW_PARAMS_NR_MASTER	11		/* number of AHB masters */
+#define DW_PARAMS_DATA_WIDTH(n)	(15 + 2 * (n))
+#define DW_PARAMS_DATA_WIDTH1	15		/* master 1 data width */
+#define DW_PARAMS_DATA_WIDTH2	17		/* master 2 data width */
+#define DW_PARAMS_DATA_WIDTH3	19		/* master 3 data width */
+#define DW_PARAMS_DATA_WIDTH4	21		/* master 4 data width */
+#define DW_PARAMS_EN		28		/* encoded parameters */
+
+/* Bitfields in DWC_PARAMS */
+#define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
+
 /* Bitfields in CTL_LO */
 #define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
 #define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
@@ -140,10 +170,9 @@ struct dw_dma_regs {
 /* Bitfields in CFG */
 #define DW_CFG_DMA_EN		(1 << 0)
 
-#define DW_REGLEN		0x400
-
 enum dw_dmac_flags {
 	DW_DMA_IS_CYCLIC = 0,
+	DW_DMA_IS_SOFT_LLP = 1,
 };
 
 struct dw_dma_chan {
@@ -154,6 +183,10 @@ struct dw_dma_chan {
 	bool			paused;
 	bool			initialized;
 
+	/* software emulation of the LLP transfers */
+	struct list_head	*tx_list;
+	struct list_head	*tx_node_active;
+
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
@@ -165,8 +198,15 @@ struct dw_dma_chan {
 
 	unsigned int		descs_allocated;
 
+	/* hardware configuration */
+	unsigned int		block_size;
+	bool			nollp;
+
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
+
+	/* backlink to dw_dma */
+	struct dw_dma		*dw;
 };
 
 static inline struct dw_dma_chan_regs __iomem *
@@ -193,6 +233,10 @@ struct dw_dma {
 
 	u8			all_chan_mask;
 
+	/* hardware configuration */
+	unsigned char		nr_masters;
+	unsigned char		data_width[4];
+
 	struct dw_dma_chan	chan[0];
 };
 
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
new file mode 100644
index 0000000000000000000000000000000000000000..05aea3ce850602634a0a715fe11a27ba020e40de
--- /dev/null
+++ b/drivers/dma/edma.c
@@ -0,0 +1,671 @@
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <mach/edma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/*
+ * This will go away when the private EDMA API is folded
+ * into this driver and the platform device(s) are
+ * instantiated in the arch code. We can only get away
+ * with this simplification because DA8XX may not be built
+ * in the same kernel image with other DaVinci parts. This
+ * avoids having to sprinkle dmaengine driver platform devices
+ * and data throughout all the existing board files.
+ */
+#ifdef CONFIG_ARCH_DAVINCI_DA8XX
+#define EDMA_CTLRS	2
+#define EDMA_CHANS	32
+#else
+#define EDMA_CTLRS	1
+#define EDMA_CHANS	64
+#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
+
+/* Max of 16 segments per channel to conserve PaRAM slots */
+#define MAX_NR_SG		16
+#define EDMA_MAX_SLOTS		MAX_NR_SG
+#define EDMA_DESCRIPTORS	16
+
+struct edma_desc {
+	struct virt_dma_desc		vdesc;
+	struct list_head		node;
+	int				absync;
+	int				pset_nr;
+	struct edmacc_param		pset[0];
+};
+
+struct edma_cc;
+
+struct edma_chan {
+	struct virt_dma_chan		vchan;
+	struct list_head		node;
+	struct edma_desc		*edesc;
+	struct edma_cc			*ecc;
+	int				ch_num;
+	bool				alloced;
+	int				slot[EDMA_MAX_SLOTS];
+	dma_addr_t			addr;
+	int				addr_width;
+	int				maxburst;
+};
+
+struct edma_cc {
+	int				ctlr;
+	struct dma_device		dma_slave;
+	struct edma_chan		slave_chans[EDMA_CHANS];
+	int				num_slave_chans;
+	int				dummy_slot;
+};
+
+static inline struct edma_cc *to_edma_cc(struct dma_device *d)
+{
+	return container_of(d, struct edma_cc, dma_slave);
+}
+
+static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct edma_chan, vchan.chan);
+}
+
+static inline struct edma_desc
+*to_edma_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct edma_desc, vdesc.tx);
+}
+
+static void edma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct edma_desc, vdesc));
+}
+
+/* Dispatch a queued descriptor to the controller (caller holds lock) */
+static void edma_execute(struct edma_chan *echan)
+{
+	struct virt_dma_desc *vdesc = vchan_next_desc(&echan->vchan);
+	struct edma_desc *edesc;
+	int i;
+
+	if (!vdesc) {
+		echan->edesc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+
+	echan->edesc = edesc = to_edma_desc(&vdesc->tx);
+
+	/* Write descriptor PaRAM set(s) */
+	for (i = 0; i < edesc->pset_nr; i++) {
+		edma_write_slot(echan->slot[i], &edesc->pset[i]);
+		dev_dbg(echan->vchan.chan.device->dev,
+			"\n pset[%d]:\n"
+			"  chnum\t%d\n"
+			"  slot\t%d\n"
+			"  opt\t%08x\n"
+			"  src\t%08x\n"
+			"  dst\t%08x\n"
+			"  abcnt\t%08x\n"
+			"  ccnt\t%08x\n"
+			"  bidx\t%08x\n"
+			"  cidx\t%08x\n"
+			"  lkrld\t%08x\n",
+			i, echan->ch_num, echan->slot[i],
+			edesc->pset[i].opt,
+			edesc->pset[i].src,
+			edesc->pset[i].dst,
+			edesc->pset[i].a_b_cnt,
+			edesc->pset[i].ccnt,
+			edesc->pset[i].src_dst_bidx,
+			edesc->pset[i].src_dst_cidx,
+			edesc->pset[i].link_bcntrld);
+		/* Link to the previous slot if not the last set */
+		if (i != (edesc->pset_nr - 1))
+			edma_link(echan->slot[i], echan->slot[i+1]);
+		/* Final pset links to the dummy pset */
+		else
+			edma_link(echan->slot[i], echan->ecc->dummy_slot);
+	}
+
+	edma_start(echan->ch_num);
+}
+
+static int edma_terminate_all(struct edma_chan *echan)
+{
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after edma_dma() returns (even if it does, it will see
+	 * echan->edesc is NULL and exit.)
+	 */
+	if (echan->edesc) {
+		echan->edesc = NULL;
+		edma_stop(echan->ch_num);
+	}
+
+	vchan_get_all_descriptors(&echan->vchan, &head);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&echan->vchan, &head);
+
+	return 0;
+}
+
+
+static int edma_slave_config(struct edma_chan *echan,
+	struct dma_slave_config *config)
+{
+	if ((config->src_addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+	    (config->dst_addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	if (config->direction == DMA_MEM_TO_DEV) {
+		if (config->dst_addr)
+			echan->addr = config->dst_addr;
+		if (config->dst_addr_width)
+			echan->addr_width = config->dst_addr_width;
+		if (config->dst_maxburst)
+			echan->maxburst = config->dst_maxburst;
+	} else if (config->direction == DMA_DEV_TO_MEM) {
+		if (config->src_addr)
+			echan->addr = config->src_addr;
+		if (config->src_addr_width)
+			echan->addr_width = config->src_addr_width;
+		if (config->src_maxburst)
+			echan->maxburst = config->src_maxburst;
+	}
+
+	return 0;
+}
+
+static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			unsigned long arg)
+{
+	int ret = 0;
+	struct dma_slave_config *config;
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		edma_terminate_all(echan);
+		break;
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		ret = edma_slave_config(echan, config);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *edma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags, void *context)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	struct scatterlist *sg;
+	int i;
+	int acnt, bcnt, ccnt, src, dst, cidx;
+	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+
+	if (unlikely(!echan || !sgl || !sg_len))
+		return NULL;
+
+	if (echan->addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "Undefined slave buswidth\n");
+		return NULL;
+	}
+
+	if (sg_len > MAX_NR_SG) {
+		dev_err(dev, "Exceeded max SG segments %d > %d\n",
+			sg_len, MAX_NR_SG);
+		return NULL;
+	}
+
+	edesc = kzalloc(sizeof(*edesc) + sg_len *
+		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (!edesc) {
+		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		return NULL;
+	}
+
+	edesc->pset_nr = sg_len;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* Allocate a PaRAM slot, if needed */
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+						EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				dev_err(dev, "Failed to allocate slot\n");
+				return NULL;
+			}
+		}
+
+		acnt = echan->addr_width;
+
+		/*
+		 * If the maxburst is equal to the fifo width, use
+		 * A-synced transfers. This allows for large contiguous
+		 * buffer transfers using only one PaRAM set.
+		 */
+		if (echan->maxburst == 1) {
+			edesc->absync = false;
+			ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
+			bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
+			if (bcnt)
+				ccnt++;
+			else
+				bcnt = SZ_64K - 1;
+			cidx = acnt;
+		/*
+		 * If maxburst is greater than the fifo address_width,
+		 * use AB-synced transfers where A count is the fifo
+		 * address_width and B count is the maxburst. In this
+		 * case, we are limited to transfers of C count frames
+		 * of (address_width * maxburst) where C count is limited
+		 * to SZ_64K-1. This places an upper bound on the length
+		 * of an SG segment that can be handled.
+		 */
+		} else {
+			edesc->absync = true;
+			bcnt = echan->maxburst;
+			ccnt = sg_dma_len(sg) / (acnt * bcnt);
+			if (ccnt > (SZ_64K - 1)) {
+				dev_err(dev, "Exceeded max SG segment size\n");
+				return NULL;
+			}
+			cidx = acnt * bcnt;
+		}
+
+		if (direction == DMA_MEM_TO_DEV) {
+			src = sg_dma_address(sg);
+			dst = echan->addr;
+			src_bidx = acnt;
+			src_cidx = cidx;
+			dst_bidx = 0;
+			dst_cidx = 0;
+		} else {
+			src = echan->addr;
+			dst = sg_dma_address(sg);
+			src_bidx = 0;
+			src_cidx = 0;
+			dst_bidx = acnt;
+			dst_cidx = cidx;
+		}
+
+		edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+		/* Configure A or AB synchronized transfers */
+		if (edesc->absync)
+			edesc->pset[i].opt |= SYNCDIM;
+		/* If this is the last set, enable completion interrupt flag */
+		if (i == sg_len - 1)
+			edesc->pset[i].opt |= TCINTEN;
+
+		edesc->pset[i].src = src;
+		edesc->pset[i].dst = dst;
+
+		edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
+		edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+		edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
+		edesc->pset[i].ccnt = ccnt;
+		edesc->pset[i].link_bcntrld = 0xffffffff;
+
+	}
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
+{
+	struct edma_chan *echan = data;
+	struct device *dev = echan->vchan.chan.device->dev;
+	struct edma_desc *edesc;
+	unsigned long flags;
+
+	/* Stop the channel */
+	edma_stop(echan->ch_num);
+
+	switch (ch_status) {
+	case DMA_COMPLETE:
+		dev_dbg(dev, "transfer complete on channel %d\n", ch_num);
+
+		spin_lock_irqsave(&echan->vchan.lock, flags);
+
+		edesc = echan->edesc;
+		if (edesc) {
+			edma_execute(echan);
+			vchan_cookie_complete(&edesc->vdesc);
+		}
+
+		spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+		break;
+	case DMA_CC_ERROR:
+		dev_dbg(dev, "transfer error on channel %d\n", ch_num);
+		break;
+	default:
+		break;
+	}
+}
+
+/* Alloc channel resources */
+static int edma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int ret;
+	int a_ch_num;
+	LIST_HEAD(descs);
+
+	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
+					chan, EVENTQ_DEFAULT);
+
+	if (a_ch_num < 0) {
+		ret = -ENODEV;
+		goto err_no_chan;
+	}
+
+	if (a_ch_num != echan->ch_num) {
+		dev_err(dev, "failed to allocate requested channel %u:%u\n",
+			EDMA_CTLR(echan->ch_num),
+			EDMA_CHAN_SLOT(echan->ch_num));
+		ret = -ENODEV;
+		goto err_wrong_chan;
+	}
+
+	echan->alloced = true;
+	echan->slot[0] = echan->ch_num;
+
+	dev_info(dev, "allocated channel for %u:%u\n",
+		 EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
+
+	return 0;
+
+err_wrong_chan:
+	edma_free_channel(a_ch_num);
+err_no_chan:
+	return ret;
+}
+
+/* Free channel resources */
+static void edma_free_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int i;
+
+	/* Terminate transfers */
+	edma_stop(echan->ch_num);
+
+	vchan_free_chan_resources(&echan->vchan);
+
+	/* Free EDMA PaRAM slots */
+	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
+		if (echan->slot[i] >= 0) {
+			edma_free_slot(echan->slot[i]);
+			echan->slot[i] = -1;
+		}
+	}
+
+	/* Free EDMA channel */
+	if (echan->alloced) {
+		edma_free_channel(echan->ch_num);
+		echan->alloced = false;
+	}
+
+	dev_info(dev, "freeing channel for %u\n", echan->ch_num);
+}
+
+/* Send pending descriptor to hardware */
+static void edma_issue_pending(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	if (vchan_issue_pending(&echan->vchan) && !echan->edesc)
+		edma_execute(echan);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+}
+
+static size_t edma_desc_size(struct edma_desc *edesc)
+{
+	int i;
+	size_t size;
+
+	if (edesc->absync)
+		for (size = i = 0; i < edesc->pset_nr; i++)
+			size += (edesc->pset[i].a_b_cnt & 0xffff) *
+				(edesc->pset[i].a_b_cnt >> 16) *
+				 edesc->pset[i].ccnt;
+	else
+		size = (edesc->pset[0].a_b_cnt & 0xffff) *
+			(edesc->pset[0].a_b_cnt >> 16) +
+			(edesc->pset[0].a_b_cnt & 0xffff) *
+			(SZ_64K - 1) * edesc->pset[0].ccnt;
+
+	return size;
+}
+
+/* Check request completion status */
+static enum dma_status edma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&echan->vchan, cookie);
+	if (vdesc) {
+		txstate->residue = edma_desc_size(to_edma_desc(&vdesc->tx));
+	} else if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
+		struct edma_desc *edesc = echan->edesc;
+		txstate->residue = edma_desc_size(edesc);
+	} else {
+		txstate->residue = 0;
+	}
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+	return ret;
+}
+
+static void __init edma_chan_init(struct edma_cc *ecc,
+				  struct dma_device *dma,
+				  struct edma_chan *echans)
+{
+	int i, j;
+
+	for (i = 0; i < EDMA_CHANS; i++) {
+		struct edma_chan *echan = &echans[i];
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
+		echan->ecc = ecc;
+		echan->vchan.desc_free = edma_desc_free;
+
+		vchan_init(&echan->vchan, dma);
+
+		INIT_LIST_HEAD(&echan->node);
+		for (j = 0; j < EDMA_MAX_SLOTS; j++)
+			echan->slot[j] = -1;
+	}
+}
+
+static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
+			  struct device *dev)
+{
+	dma->device_prep_slave_sg = edma_prep_slave_sg;
+	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
+	dma->device_free_chan_resources = edma_free_chan_resources;
+	dma->device_issue_pending = edma_issue_pending;
+	dma->device_tx_status = edma_tx_status;
+	dma->device_control = edma_control;
+	dma->dev = dev;
+
+	INIT_LIST_HEAD(&dma->channels);
+}
+
+static int __devinit edma_probe(struct platform_device *pdev)
+{
+	struct edma_cc *ecc;
+	int ret;
+
+	ecc = devm_kzalloc(&pdev->dev, sizeof(*ecc), GFP_KERNEL);
+	if (!ecc) {
+		dev_err(&pdev->dev, "Can't allocate controller\n");
+		return -ENOMEM;
+	}
+
+	ecc->ctlr = pdev->id;
+	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
+	if (ecc->dummy_slot < 0) {
+		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
+		return -EIO;
+	}
+
+	dma_cap_zero(ecc->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask);
+
+	edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev);
+
+	edma_chan_init(ecc, &ecc->dma_slave, ecc->slave_chans);
+
+	ret = dma_async_device_register(&ecc->dma_slave);
+	if (ret)
+		goto err_reg1;
+
+	platform_set_drvdata(pdev, ecc);
+
+	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
+
+	return 0;
+
+err_reg1:
+	edma_free_slot(ecc->dummy_slot);
+	return ret;
+}
+
+static int __devexit edma_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&ecc->dma_slave);
+	edma_free_slot(ecc->dummy_slot);
+
+	return 0;
+}
+
+static struct platform_driver edma_driver = {
+	.probe		= edma_probe,
+	.remove		= __devexit_p(edma_remove),
+	.driver = {
+		.name = "edma-dma-engine",
+		.owner = THIS_MODULE,
+	},
+};
+
+bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &edma_driver.driver) {
+		struct edma_chan *echan = to_edma_chan(chan);
+		unsigned ch_req = *(unsigned *)param;
+		return ch_req == echan->ch_num;
+	}
+	return false;
+}
+EXPORT_SYMBOL(edma_filter_fn);
+
+static struct platform_device *pdev0, *pdev1;
+
+static const struct platform_device_info edma_dev_info0 = {
+	.name = "edma-dma-engine",
+	.id = 0,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
+static const struct platform_device_info edma_dev_info1 = {
+	.name = "edma-dma-engine",
+	.id = 1,
+	.dma_mask = DMA_BIT_MASK(32),
+};
+
+static int edma_init(void)
+{
+	int ret = platform_driver_register(&edma_driver);
+
+	if (ret == 0) {
+		pdev0 = platform_device_register_full(&edma_dev_info0);
+		if (IS_ERR(pdev0)) {
+			platform_driver_unregister(&edma_driver);
+			ret = PTR_ERR(pdev0);
+			goto out;
+		}
+	}
+
+	if (EDMA_CTLRS == 2) {
+		pdev1 = platform_device_register_full(&edma_dev_info1);
+		if (IS_ERR(pdev1)) {
+			platform_driver_unregister(&edma_driver);
+			platform_device_unregister(pdev0);
+			ret = PTR_ERR(pdev1);
+		}
+	}
+
+out:
+	return ret;
+}
+subsys_initcall(edma_init);
+
+static void __exit edma_exit(void)
+{
+	platform_device_unregister(pdev0);
+	if (pdev1)
+		platform_device_unregister(pdev1);
+	platform_driver_unregister(&edma_driver);
+}
+module_exit(edma_exit);
+
+MODULE_AUTHOR("Matt Porter <mporter@ti.com>");
+MODULE_DESCRIPTION("TI EDMA DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 86895760b598ef61ca63ebce58f3c1f2e5093cbf..b9d66785144511ff294cb59096d6de1b766cbece 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -434,12 +434,11 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t f
 		return NULL;
 	memset(hw, 0, sizeof(*hw));
 
-	desc = kmem_cache_alloc(ioat2_cache, flags);
+	desc = kmem_cache_zalloc(ioat2_cache, flags);
 	if (!desc) {
 		pci_pool_free(dma->dma_pool, hw, phys);
 		return NULL;
 	}
-	memset(desc, 0, sizeof(*desc));
 
 	dma_async_tx_descriptor_init(&desc->txd, chan);
 	desc->txd.tx_submit = ioat2_tx_submit_unlock;
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 5e3a40f7994510d6b084815459e77f4690da8970..c0573061b45dfc5f45c40255d4a6d9c99a1fe882 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -40,6 +40,17 @@ MODULE_VERSION(IOAT_DMA_VERSION);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel Corporation");
 
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3	0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4	0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5	0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6	0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7	0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8	0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9	0x0e2f
+
 static struct pci_device_id ioat_pci_tbl[] = {
 	/* I/OAT v1 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
@@ -83,6 +94,17 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
 
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
new file mode 100644
index 0000000000000000000000000000000000000000..14da1f403edfdb4f0571abbbea5bddeec1120e02
--- /dev/null
+++ b/drivers/dma/mmp_pdma.c
@@ -0,0 +1,875 @@
+/*
+ * Copyright 2012 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+
+#include "dmaengine.h"
+
+#define DCSR		0x0000
+#define DALGN		0x00a0
+#define DINT		0x00f0
+#define DDADR		0x0200
+#define DSADR		0x0204
+#define DTADR		0x0208
+#define DCMD		0x020c
+
+#define DCSR_RUN	(1 << 31)	/* Run Bit (read / write) */
+#define DCSR_NODESC	(1 << 30)	/* No-Descriptor Fetch (read / write) */
+#define DCSR_STOPIRQEN	(1 << 29)	/* Stop Interrupt Enable (read / write) */
+#define DCSR_REQPEND	(1 << 8)	/* Request Pending (read-only) */
+#define DCSR_STOPSTATE	(1 << 3)	/* Stop State (read-only) */
+#define DCSR_ENDINTR	(1 << 2)	/* End Interrupt (read / write) */
+#define DCSR_STARTINTR	(1 << 1)	/* Start Interrupt (read / write) */
+#define DCSR_BUSERR	(1 << 0)	/* Bus Error Interrupt (read / write) */
+
+#define DCSR_EORIRQEN	(1 << 28)       /* End of Receive Interrupt Enable (R/W) */
+#define DCSR_EORJMPEN	(1 << 27)       /* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN	(1 << 26)       /* STOP on an EOR */
+#define DCSR_SETCMPST	(1 << 25)       /* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST	(1 << 24)       /* Clear Descriptor Compare Status */
+#define DCSR_CMPST	(1 << 10)       /* The Descriptor Compare Status */
+#define DCSR_EORINTR	(1 << 9)        /* The end of Receive */
+
+#define DRCMR_MAPVLD	(1 << 7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f		/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	(1 << 0)	/* Stop (read / write) */
+
+#define DCMD_INCSRCADDR	(1 << 31)	/* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR	(1 << 30)	/* Target Address Increment Setting. */
+#define DCMD_FLOWSRC	(1 << 29)	/* Flow Control by the source. */
+#define DCMD_FLOWTRG	(1 << 28)	/* Flow Control by the target. */
+#define DCMD_STARTIRQEN	(1 << 22)	/* Start Interrupt Enable */
+#define DCMD_ENDIRQEN	(1 << 21)	/* End Interrupt Enable */
+#define DCMD_ENDIAN	(1 << 18)	/* Device Endian-ness. */
+#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
+#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
+#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
+#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	0x1000
+
+struct mmp_pdma_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(32);
+
+struct mmp_pdma_desc_sw {
+	struct mmp_pdma_desc_hw desc;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+};
+
+struct mmp_pdma_phy;
+
+struct mmp_pdma_chan {
+	struct device *dev;
+	struct dma_chan chan;
+	struct dma_async_tx_descriptor desc;
+	struct mmp_pdma_phy *phy;
+	enum dma_transfer_direction dir;
+
+	/* channel's basic info */
+	struct tasklet_struct tasklet;
+	u32 dcmd;
+	u32 drcmr;
+	u32 dev_addr;
+
+	/* list for desc */
+	spinlock_t desc_lock;		/* Descriptor list lock */
+	struct list_head chain_pending;	/* Link descriptors queue for pending */
+	struct list_head chain_running;	/* Link descriptors queue for running */
+	bool idle;			/* channel statue machine */
+
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+};
+
+struct mmp_pdma_phy {
+	int idx;
+	void __iomem *base;
+	struct mmp_pdma_chan *vchan;
+};
+
+struct mmp_pdma_device {
+	int				dma_channels;
+	void __iomem			*base;
+	struct device			*dev;
+	struct dma_device		device;
+	struct mmp_pdma_phy		*phy;
+};
+
+#define tx_to_mmp_pdma_desc(tx) container_of(tx, struct mmp_pdma_desc_sw, async_tx)
+#define to_mmp_pdma_desc(lh) container_of(lh, struct mmp_pdma_desc_sw, node)
+#define to_mmp_pdma_chan(dchan) container_of(dchan, struct mmp_pdma_chan, chan)
+#define to_mmp_pdma_dev(dmadev) container_of(dmadev, struct mmp_pdma_device, device)
+
+static void set_desc(struct mmp_pdma_phy *phy, dma_addr_t addr)
+{
+	u32 reg = (phy->idx << 4) + DDADR;
+
+	writel(addr, phy->base + reg);
+}
+
+static void enable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg;
+
+	if (!phy->vchan)
+		return;
+
+	reg = phy->vchan->drcmr;
+	reg = (((reg) < 64) ? 0x0100 : 0x1100) + (((reg) & 0x3f) << 2);
+	writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+	reg = (phy->idx << 2) + DCSR;
+	writel(readl(phy->base + reg) | DCSR_RUN,
+					phy->base + reg);
+}
+
+static void disable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg;
+
+	if (phy) {
+		reg = (phy->idx << 2) + DCSR;
+		writel(readl(phy->base + reg) & ~DCSR_RUN,
+						phy->base + reg);
+	}
+}
+
+static int clear_chan_irq(struct mmp_pdma_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+	u32 reg = (phy->idx << 2) + DCSR;
+
+	if (dint & BIT(phy->idx)) {
+		/* clear irq */
+		dcsr = readl(phy->base + reg);
+		writel(dcsr, phy->base + reg);
+		if ((dcsr & DCSR_BUSERR) && (phy->vchan))
+			dev_warn(phy->vchan->dev, "DCSR_BUSERR\n");
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+static irqreturn_t mmp_pdma_chan_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_phy *phy = dev_id;
+
+	if (clear_chan_irq(phy) == 0) {
+		tasklet_schedule(&phy->vchan->tasklet);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_device *pdev = dev_id;
+	struct mmp_pdma_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret;
+	int irq_num = 0;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phy[i];
+		ret = mmp_pdma_chan_handler(irq, phy);
+		if (ret == IRQ_HANDLED)
+			irq_num++;
+	}
+
+	if (irq_num)
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+/* lookup free phy channel as descending priority */
+static struct mmp_pdma_phy *lookup_phy(struct mmp_pdma_chan *pchan)
+{
+	int prio, i;
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	struct mmp_pdma_phy *phy;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+	for (prio = 0; prio <= (((pdev->dma_channels - 1) & 0xf) >> 2); prio++) {
+		for (i = 0; i < pdev->dma_channels; i++) {
+			if (prio != ((i & 0xf) >> 2))
+				continue;
+			phy = &pdev->phy[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				return phy;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/* desc->tx_list ==> pending list */
+static void append_pending_queue(struct mmp_pdma_chan *chan,
+					struct mmp_pdma_desc_sw *desc)
+{
+	struct mmp_pdma_desc_sw *tail =
+				to_mmp_pdma_desc(chan->chain_pending.prev);
+
+	if (list_empty(&chan->chain_pending))
+		goto out_splice;
+
+	/* one irq per queue, even appended */
+	tail->desc.ddadr = desc->async_tx.phys;
+	tail->desc.dcmd &= ~DCMD_ENDIRQEN;
+
+	/* softly link to pending list */
+out_splice:
+	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+}
+
+/**
+ * start_pending_queue - transfer any pending transactions
+ * pending list ==> running list
+ */
+static void start_pending_queue(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+
+	/* still in running, irq will start the pending list */
+	if (!chan->idle) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		return;
+	}
+
+	if (list_empty(&chan->chain_pending)) {
+		/* chance to re-fetch phy channel with higher prio */
+		if (chan->phy) {
+			chan->phy->vchan = NULL;
+			chan->phy = NULL;
+		}
+		dev_dbg(chan->dev, "no pending list\n");
+		return;
+	}
+
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(chan->dev, "no free dma channel\n");
+			return;
+		}
+	}
+
+	/*
+	 * pending -> running
+	 * reintilize pending list
+	 */
+	desc = list_first_entry(&chan->chain_pending,
+				struct mmp_pdma_desc_sw, node);
+	list_splice_tail_init(&chan->chain_pending, &chan->chain_running);
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_desc(chan->phy, desc->async_tx.phys);
+	enable_chan(chan->phy);
+	chan->idle = false;
+}
+
+
+/* desc->tx_list ==> pending list */
+static dma_cookie_t mmp_pdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(tx->chan);
+	struct mmp_pdma_desc_sw *desc = tx_to_mmp_pdma_desc(tx);
+	struct mmp_pdma_desc_sw *child;
+	unsigned long flags;
+	dma_cookie_t cookie = -EBUSY;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	list_for_each_entry(child, &desc->tx_list, node) {
+		cookie = dma_cookie_assign(&child->async_tx);
+	}
+
+	append_pending_queue(chan, desc);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return cookie;
+}
+
+struct mmp_pdma_desc_sw *mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+	dma_addr_t pdesc;
+
+	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		dev_err(chan->dev, "out of memory for link descriptor\n");
+		return NULL;
+	}
+
+	memset(desc, 0, sizeof(*desc));
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+	/* each desc has submit */
+	desc->async_tx.tx_submit = mmp_pdma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+	return desc;
+}
+
+/**
+ * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel.
+ *
+ * This function will create a dma pool for descriptor allocation.
+ * Request irq only when channel is requested
+ * Return - The number of allocated descriptors.
+ */
+
+static int mmp_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool =
+		dma_pool_create(dev_name(&dchan->dev->device), chan->dev,
+				  sizeof(struct mmp_pdma_desc_sw),
+				  __alignof__(struct mmp_pdma_desc_sw), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+	if (chan->phy) {
+		chan->phy->vchan = NULL;
+		chan->phy = NULL;
+	}
+	chan->idle = true;
+	chan->dev_addr = 0;
+	return 1;
+}
+
+static void mmp_pdma_free_desc_list(struct mmp_pdma_chan *chan,
+				  struct list_head *list)
+{
+	struct mmp_pdma_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node) {
+		list_del(&desc->node);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+static void mmp_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+	mmp_pdma_free_desc_list(chan, &chan->chain_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+	chan->idle = true;
+	chan->dev_addr = 0;
+	if (chan->phy) {
+		chan->phy->vchan = NULL;
+		chan->phy = NULL;
+	}
+	return;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy = 0;
+
+	if (!dchan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+
+	if (!chan->dir) {
+		chan->dir = DMA_MEM_TO_MEM;
+		chan->dcmd = DCMD_INCTRGADDR | DCMD_INCSRCADDR;
+		chan->dcmd |= DCMD_BURST32;
+	}
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+
+		new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+
+		if (chan->dir == DMA_MEM_TO_DEV) {
+			dma_src += copy;
+		} else if (chan->dir == DMA_DEV_TO_MEM) {
+			dma_dst += copy;
+		} else if (chan->dir == DMA_MEM_TO_MEM) {
+			dma_src += copy;
+			dma_dst += copy;
+		}
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
+			 unsigned long flags, void *context)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	int i;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sgl);
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+
+			/* allocate and populate the descriptor */
+			new = mmp_pdma_alloc_descriptor(chan);
+			if (!new) {
+				dev_err(chan->dev, "no memory for desc\n");
+				goto fail;
+			}
+
+			new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len);
+			if (dir == DMA_MEM_TO_DEV) {
+				new->desc.dsadr = addr;
+				new->desc.dtadr = chan->dev_addr;
+			} else {
+				new->desc.dsadr = chan->dev_addr;
+				new->desc.dtadr = addr;
+			}
+
+			if (!first)
+				first = new;
+			else
+				prev->desc.ddadr = new->async_tx.phys;
+
+			new->async_tx.cookie = 0;
+			async_tx_ack(&new->async_tx);
+			prev = new;
+
+			/* Insert the link descriptor to the LD ring */
+			list_add_tail(&new->node, &first->tx_list);
+
+			/* update metadata */
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	first->async_tx.cookie = -EBUSY;
+	first->async_tx.flags = flags;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static int mmp_pdma_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	struct dma_slave_config *cfg = (void *)arg;
+	unsigned long flags;
+	int ret = 0;
+	u32 maxburst = 0, addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	if (!dchan)
+		return -EINVAL;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		disable_chan(chan->phy);
+		if (chan->phy) {
+			chan->phy->vchan = NULL;
+			chan->phy = NULL;
+		}
+		spin_lock_irqsave(&chan->desc_lock, flags);
+		mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+		mmp_pdma_free_desc_list(chan, &chan->chain_running);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		chan->idle = true;
+		break;
+	case DMA_SLAVE_CONFIG:
+		if (cfg->direction == DMA_DEV_TO_MEM) {
+			chan->dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC;
+			maxburst = cfg->src_maxburst;
+			width = cfg->src_addr_width;
+			addr = cfg->src_addr;
+		} else if (cfg->direction == DMA_MEM_TO_DEV) {
+			chan->dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG;
+			maxburst = cfg->dst_maxburst;
+			width = cfg->dst_addr_width;
+			addr = cfg->dst_addr;
+		}
+
+		if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+			chan->dcmd |= DCMD_WIDTH1;
+		else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+			chan->dcmd |= DCMD_WIDTH2;
+		else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			chan->dcmd |= DCMD_WIDTH4;
+
+		if (maxburst == 8)
+			chan->dcmd |= DCMD_BURST8;
+		else if (maxburst == 16)
+			chan->dcmd |= DCMD_BURST16;
+		else if (maxburst == 32)
+			chan->dcmd |= DCMD_BURST32;
+
+		if (cfg) {
+			chan->dir = cfg->direction;
+			chan->drcmr = cfg->slave_id;
+		}
+		chan->dev_addr = addr;
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return ret;
+}
+
+/**
+ * mmp_pdma_issue_pending - Issue the DMA start command
+ * pending list ==> running list
+ */
+static void mmp_pdma_issue_pending(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/*
+ * dma_do_tasklet
+ * Do call back
+ * Start pending list
+ */
+static void dma_do_tasklet(unsigned long data)
+{
+	struct mmp_pdma_chan *chan = (struct mmp_pdma_chan *)data;
+	struct mmp_pdma_desc_sw *desc, *_desc;
+	LIST_HEAD(chain_cleanup);
+	unsigned long flags;
+
+	/* submit pending list; callback for each desc; free desc */
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->chain_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_mmp_pdma_desc(chan->chain_running.prev);
+		cookie = desc->async_tx.cookie;
+		dma_cookie_complete(&desc->async_tx);
+
+		dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+	}
+
+	/*
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
+	 */
+	list_splice_tail_init(&chan->chain_running, &chain_cleanup);
+
+	/* the hardware is now idle and ready for more */
+	chan->idle = true;
+
+	/* Start any pending transactions automatically */
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chain_cleanup, node) {
+		struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+		/* Run the link descriptor callback function */
+		if (txd->callback)
+			txd->callback(txd->callback_param);
+
+		dma_pool_free(chan->desc_pool, desc, txd->phys);
+	}
+}
+
+static int __devexit mmp_pdma_remove(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev = platform_get_drvdata(op);
+
+	dma_async_device_unregister(&pdev->device);
+	return 0;
+}
+
+static int __devinit mmp_pdma_chan_init(struct mmp_pdma_device *pdev,
+							int idx, int irq)
+{
+	struct mmp_pdma_phy *phy  = &pdev->phy[idx];
+	struct mmp_pdma_chan *chan;
+	int ret;
+
+	chan = devm_kzalloc(pdev->dev,
+			sizeof(struct mmp_pdma_chan), GFP_KERNEL);
+	if (chan == NULL)
+		return -ENOMEM;
+
+	phy->idx = idx;
+	phy->base = pdev->base;
+
+	if (irq) {
+		ret = devm_request_irq(pdev->dev, irq,
+			mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+		if (ret) {
+			dev_err(pdev->dev, "channel request irq fail!\n");
+			return ret;
+		}
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	chan->dev = pdev->dev;
+	chan->chan.device = &pdev->device;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	INIT_LIST_HEAD(&chan->chain_pending);
+	INIT_LIST_HEAD(&chan->chain_running);
+
+	/* register virt channel to dma engine */
+	list_add_tail(&chan->chan.device_node,
+			&pdev->device.channels);
+
+	return 0;
+}
+
+static struct of_device_id mmp_pdma_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
+
+static int __devinit mmp_pdma_probe(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int i, ret, irq = 0;
+	int dma_channels = 0, irq_num = 0;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+	pdev->dev = &op->dev;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	pdev->base = devm_request_and_ioremap(pdev->dev, iores);
+	if (!pdev->base)
+		return -EADDRNOTAVAIL;
+
+	of_id = of_match_device(mmp_pdma_dt_ids, pdev->dev);
+	if (of_id)
+		of_property_read_u32(pdev->dev->of_node,
+				"#dma-channels", &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+	pdev->dma_channels = dma_channels;
+
+	for (i = 0; i < dma_channels; i++) {
+		if (platform_get_irq(op, i) > 0)
+			irq_num++;
+	}
+
+	pdev->phy = devm_kzalloc(pdev->dev,
+		dma_channels * sizeof(struct mmp_pdma_chan), GFP_KERNEL);
+	if (pdev->phy == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pdev->device.channels);
+
+	if (irq_num != dma_channels) {
+		/* all chan share one irq, demux inside */
+		irq = platform_get_irq(op, 0);
+		ret = devm_request_irq(pdev->dev, irq,
+			mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < dma_channels; i++) {
+		irq = (irq_num != dma_channels) ? 0 : platform_get_irq(op, i);
+		ret = mmp_pdma_chan_init(pdev, i, irq);
+		if (ret)
+			return ret;
+	}
+
+	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
+	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	pdev->device.dev = &op->dev;
+	pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
+	pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
+	pdev->device.device_tx_status = mmp_pdma_tx_status;
+	pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
+	pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
+	pdev->device.device_control = mmp_pdma_control;
+	pdev->device.copy_align = PDMA_ALIGNMENT;
+
+	if (pdev->dev->coherent_dma_mask)
+		dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask);
+	else
+		dma_set_mask(pdev->dev, DMA_BIT_MASK(64));
+
+	ret = dma_async_device_register(&pdev->device);
+	if (ret) {
+		dev_err(pdev->device.dev, "unable to register\n");
+		return ret;
+	}
+
+	dev_info(pdev->device.dev, "initialized\n");
+	return 0;
+}
+
+static const struct platform_device_id mmp_pdma_id_table[] = {
+	{ "mmp-pdma", },
+	{ },
+};
+
+static struct platform_driver mmp_pdma_driver = {
+	.driver		= {
+		.name	= "mmp-pdma",
+		.owner  = THIS_MODULE,
+		.of_match_table = mmp_pdma_dt_ids,
+	},
+	.id_table	= mmp_pdma_id_table,
+	.probe		= mmp_pdma_probe,
+	.remove		= __devexit_p(mmp_pdma_remove),
+};
+
+module_platform_driver(mmp_pdma_driver);
+
+MODULE_DESCRIPTION("MARVELL MMP Periphera DMA Driver");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 6d9c82e891d7c722505fe50507bd34218cb6bc2d..f3e8d71bcbc7018f12974197268a6cdf66ebd53b 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <mach/regs-icu.h>
 #include <linux/platform_data/dma-mmp_tdma.h>
+#include <linux/of_device.h>
 
 #include "dmaengine.h"
 
@@ -127,7 +128,6 @@ struct mmp_tdma_device {
 	void __iomem			*base;
 	struct dma_device		device;
 	struct mmp_tdma_chan		*tdmac[TDMA_CHANNEL_NUM];
-	int				irq;
 };
 
 #define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan)
@@ -492,7 +492,7 @@ static int __devinit mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
 		return -ENOMEM;
 	}
 	if (irq)
-		tdmac->irq = irq + idx;
+		tdmac->irq = irq;
 	tdmac->dev	   = tdev->dev;
 	tdmac->chan.device = &tdev->device;
 	tdmac->idx	   = idx;
@@ -505,34 +505,43 @@ static int __devinit mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
 	/* add the channel to tdma_chan list */
 	list_add_tail(&tdmac->chan.device_node,
 			&tdev->device.channels);
-
 	return 0;
 }
 
+static struct of_device_id mmp_tdma_dt_ids[] = {
+	{ .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
+	{ .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_tdma_dt_ids);
+
 static int __devinit mmp_tdma_probe(struct platform_device *pdev)
 {
-	const struct platform_device_id *id = platform_get_device_id(pdev);
-	enum mmp_tdma_type type = id->driver_data;
+	enum mmp_tdma_type type;
+	const struct of_device_id *of_id;
 	struct mmp_tdma_device *tdev;
 	struct resource *iores;
 	int i, ret;
-	int irq = 0;
+	int irq = 0, irq_num = 0;
 	int chan_num = TDMA_CHANNEL_NUM;
 
+	of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev);
+	if (of_id)
+		type = (enum mmp_tdma_type) of_id->data;
+	else
+		type = platform_get_device_id(pdev)->driver_data;
+
 	/* always have couple channels */
 	tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL);
 	if (!tdev)
 		return -ENOMEM;
 
 	tdev->dev = &pdev->dev;
-	iores = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!iores)
-		return -EINVAL;
 
-	if (resource_size(iores) != chan_num)
-		tdev->irq = iores->start;
-	else
-		irq = iores->start;
+	for (i = 0; i < chan_num; i++) {
+		if (platform_get_irq(pdev, i) > 0)
+			irq_num++;
+	}
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iores)
@@ -542,25 +551,26 @@ static int __devinit mmp_tdma_probe(struct platform_device *pdev)
 	if (!tdev->base)
 		return -EADDRNOTAVAIL;
 
-	if (tdev->irq) {
-		ret = devm_request_irq(&pdev->dev, tdev->irq,
+	INIT_LIST_HEAD(&tdev->device.channels);
+
+	if (irq_num != chan_num) {
+		irq = platform_get_irq(pdev, 0);
+		ret = devm_request_irq(&pdev->dev, irq,
 			mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
 		if (ret)
 			return ret;
 	}
 
-	dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
-	dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
-
-	INIT_LIST_HEAD(&tdev->device.channels);
-
 	/* initialize channel parameters */
 	for (i = 0; i < chan_num; i++) {
+		irq = (irq_num != chan_num) ? 0 : platform_get_irq(pdev, i);
 		ret = mmp_tdma_chan_init(tdev, i, irq, type);
 		if (ret)
 			return ret;
 	}
 
+	dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
 	tdev->device.dev = &pdev->dev;
 	tdev->device.device_alloc_chan_resources =
 					mmp_tdma_alloc_chan_resources;
@@ -595,6 +605,7 @@ static struct platform_driver mmp_tdma_driver = {
 	.driver		= {
 		.name	= "mmp-tdma",
 		.owner  = THIS_MODULE,
+		.of_match_table = mmp_tdma_dt_ids,
 	},
 	.id_table	= mmp_tdma_id_table,
 	.probe		= mmp_tdma_probe,
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 734a4eb84d6579675110033eeda6676ff40c4c91..9f02e794b12b00d4ed75d62a67bd255f321992ee 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -101,7 +101,8 @@ struct mxs_dma_ccw {
 	u32		pio_words[MXS_PIO_WORDS];
 };
 
-#define NUM_CCW	(int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw))
+#define CCW_BLOCK_SIZE	(4 * PAGE_SIZE)
+#define NUM_CCW	(int)(CCW_BLOCK_SIZE / sizeof(struct mxs_dma_ccw))
 
 struct mxs_dma_chan {
 	struct mxs_dma_engine		*mxs_dma;
@@ -354,14 +355,15 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 
 	mxs_chan->chan_irq = data->chan_irq;
 
-	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
-				&mxs_chan->ccw_phys, GFP_KERNEL);
+	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
+				CCW_BLOCK_SIZE, &mxs_chan->ccw_phys,
+				GFP_KERNEL);
 	if (!mxs_chan->ccw) {
 		ret = -ENOMEM;
 		goto err_alloc;
 	}
 
-	memset(mxs_chan->ccw, 0, PAGE_SIZE);
+	memset(mxs_chan->ccw, 0, CCW_BLOCK_SIZE);
 
 	if (mxs_chan->chan_irq != NO_IRQ) {
 		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
@@ -387,7 +389,7 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 err_clk:
 	free_irq(mxs_chan->chan_irq, mxs_dma);
 err_irq:
-	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
 			mxs_chan->ccw, mxs_chan->ccw_phys);
 err_alloc:
 	return ret;
@@ -402,7 +404,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 
 	free_irq(mxs_chan->chan_irq, mxs_dma);
 
-	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
 			mxs_chan->ccw, mxs_chan->ccw_phys);
 
 	clk_disable_unprepare(mxs_dma->clk);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 169c0dbd71aef5d171ca9b5ec9af8626939366b0..665668b6f2b14f7d6df5179d5ae37576336d1ea6 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -23,7 +23,6 @@
 #include <linux/dmaengine.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl330.h>
-#include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 
@@ -586,8 +585,6 @@ struct dma_pl330_dmac {
 
 	/* Peripheral channels connected to this DMAC */
 	struct dma_pl330_chan *peripherals; /* keep at end */
-
-	struct clk *clk;
 };
 
 struct dma_pl330_desc {
@@ -2395,7 +2392,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
 	if (!pch->pl330_chid) {
 		spin_unlock_irqrestore(&pch->lock, flags);
-		return 0;
+		return -ENOMEM;
 	}
 
 	tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
@@ -2889,29 +2886,17 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		goto probe_err1;
 	}
 
-	pdmac->clk = clk_get(&adev->dev, "dma");
-	if (IS_ERR(pdmac->clk)) {
-		dev_err(&adev->dev, "Cannot get operation clock.\n");
-		ret = -EINVAL;
-		goto probe_err2;
-	}
-
 	amba_set_drvdata(adev, pdmac);
 
-#ifndef CONFIG_PM_RUNTIME
-	/* enable dma clk */
-	clk_enable(pdmac->clk);
-#endif
-
 	irq = adev->irq[0];
 	ret = request_irq(irq, pl330_irq_handler, 0,
 			dev_name(&adev->dev), pi);
 	if (ret)
-		goto probe_err3;
+		goto probe_err2;
 
 	ret = pl330_add(pi);
 	if (ret)
-		goto probe_err4;
+		goto probe_err3;
 
 	INIT_LIST_HEAD(&pdmac->desc_pool);
 	spin_lock_init(&pdmac->pool_lock);
@@ -2933,7 +2918,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	if (!pdmac->peripherals) {
 		ret = -ENOMEM;
 		dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n");
-		goto probe_err5;
+		goto probe_err4;
 	}
 
 	for (i = 0; i < num_chan; i++) {
@@ -2961,6 +2946,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		if (pi->pcfg.num_peri) {
 			dma_cap_set(DMA_SLAVE, pd->cap_mask);
 			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+			dma_cap_set(DMA_PRIVATE, pd->cap_mask);
 		}
 	}
 
@@ -2976,7 +2962,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = dma_async_device_register(pd);
 	if (ret) {
 		dev_err(&adev->dev, "unable to register DMAC\n");
-		goto probe_err5;
+		goto probe_err4;
 	}
 
 	dev_info(&adev->dev,
@@ -2989,15 +2975,10 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	return 0;
 
-probe_err5:
-	pl330_del(pi);
 probe_err4:
-	free_irq(irq, pi);
+	pl330_del(pi);
 probe_err3:
-#ifndef CONFIG_PM_RUNTIME
-	clk_disable(pdmac->clk);
-#endif
-	clk_put(pdmac->clk);
+	free_irq(irq, pi);
 probe_err2:
 	iounmap(pi->base);
 probe_err1:
@@ -3044,10 +3025,6 @@ static int __devexit pl330_remove(struct amba_device *adev)
 	res = &adev->res;
 	release_mem_region(res->start, resource_size(res));
 
-#ifndef CONFIG_PM_RUNTIME
-	clk_disable(pdmac->clk);
-#endif
-
 	kfree(pdmac);
 
 	return 0;
@@ -3063,49 +3040,10 @@ static struct amba_id pl330_ids[] = {
 
 MODULE_DEVICE_TABLE(amba, pl330_ids);
 
-#ifdef CONFIG_PM_RUNTIME
-static int pl330_runtime_suspend(struct device *dev)
-{
-	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
-
-	if (!pdmac) {
-		dev_err(dev, "failed to get dmac\n");
-		return -ENODEV;
-	}
-
-	clk_disable(pdmac->clk);
-
-	return 0;
-}
-
-static int pl330_runtime_resume(struct device *dev)
-{
-	struct dma_pl330_dmac *pdmac = dev_get_drvdata(dev);
-
-	if (!pdmac) {
-		dev_err(dev, "failed to get dmac\n");
-		return -ENODEV;
-	}
-
-	clk_enable(pdmac->clk);
-
-	return 0;
-}
-#else
-#define pl330_runtime_suspend	NULL
-#define pl330_runtime_resume	NULL
-#endif /* CONFIG_PM_RUNTIME */
-
-static const struct dev_pm_ops pl330_pm_ops = {
-	.runtime_suspend = pl330_runtime_suspend,
-	.runtime_resume = pl330_runtime_resume,
-};
-
 static struct amba_driver pl330_driver = {
 	.drv = {
 		.owner = THIS_MODULE,
 		.name = "dma-pl330",
-		.pm = &pl330_pm_ops,
 	},
 	.id_table = pl330_ids,
 	.probe = pl330_probe,
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 3eed8b35b0f19d51f025a5de2dc656b9fe3f211c..64385cde044b3feefcc49001edbf0e9ee647bd0d 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -570,21 +570,19 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 
 	if (of_property_read_u32(dn, "cell-index", &id)) {
 		dev_err(dev, "Fail to get DMAC index\n");
-		ret = -ENODEV;
-		goto free_mem;
+		return -ENODEV;
 	}
 
 	sdma->irq = irq_of_parse_and_map(dn, 0);
 	if (sdma->irq == NO_IRQ) {
 		dev_err(dev, "Error mapping IRQ!\n");
-		ret = -EINVAL;
-		goto free_mem;
+		return -EINVAL;
 	}
 
 	ret = of_address_to_resource(dn, 0, &res);
 	if (ret) {
 		dev_err(dev, "Error parsing memory region!\n");
-		goto free_mem;
+		goto irq_dispose;
 	}
 
 	regs_start = res.start;
@@ -597,12 +595,11 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 		goto irq_dispose;
 	}
 
-	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
-		sdma);
+	ret = request_irq(sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME, sdma);
 	if (ret) {
 		dev_err(dev, "Error requesting IRQ!\n");
 		ret = -EINVAL;
-		goto unmap_mem;
+		goto irq_dispose;
 	}
 
 	dma = &sdma->dma;
@@ -652,13 +649,9 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 	return 0;
 
 free_irq:
-	devm_free_irq(dev, sdma->irq, sdma);
+	free_irq(sdma->irq, sdma);
 irq_dispose:
 	irq_dispose_mapping(sdma->irq);
-unmap_mem:
-	iounmap(sdma->base);
-free_mem:
-	devm_kfree(dev, sdma);
 	return ret;
 }
 
@@ -668,10 +661,8 @@ static int __devexit sirfsoc_dma_remove(struct platform_device *op)
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
 	dma_async_device_unregister(&sdma->dma);
-	devm_free_irq(dev, sdma->irq, sdma);
+	free_irq(sdma->irq, sdma);
 	irq_dispose_mapping(sdma->irq);
-	iounmap(sdma->base);
-	devm_kfree(dev, sdma);
 	return 0;
 }
 
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index eee8d9b9a20bcdfde84eaa75dc05617b0524dd36..ae55091c22728a4342af8b3f288ecb4b013628c2 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2921,19 +2921,23 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	struct d40_base *base = NULL;
 	int num_log_chans = 0;
 	int num_phy_chans;
+	int clk_ret = -EINVAL;
 	int i;
 	u32 pid;
 	u32 cid;
 	u8 rev;
 
 	clk = clk_get(&pdev->dev, NULL);
-
 	if (IS_ERR(clk)) {
 		d40_err(&pdev->dev, "No matching clock found\n");
 		goto failure;
 	}
 
-	clk_enable(clk);
+	clk_ret = clk_prepare_enable(clk);
+	if (clk_ret) {
+		d40_err(&pdev->dev, "Failed to prepare/enable clock\n");
+		goto failure;
+	}
 
 	/* Get IO for DMAC base address */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
@@ -3063,10 +3067,10 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	return base;
 
 failure:
-	if (!IS_ERR(clk)) {
-		clk_disable(clk);
+	if (!clk_ret)
+		clk_disable_unprepare(clk);
+	if (!IS_ERR(clk))
 		clk_put(clk);
-	}
 	if (virtbase)
 		iounmap(virtbase);
 	if (res)
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 45fbeed1c1a5e56d621254399d4b7c63b93861df..528c62dd4b00e1b52928ff66f3a54e7ae206680f 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -169,6 +169,7 @@ typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
 /* tegra_dma_channel: Channel specific information */
 struct tegra_dma_channel {
 	struct dma_chan		dma_chan;
+	char			name[30];
 	bool			config_init;
 	int			id;
 	int			irq;
@@ -475,8 +476,7 @@ static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
 	while (!list_empty(&tdc->pending_sg_req)) {
 		sgreq = list_first_entry(&tdc->pending_sg_req,
 						typeof(*sgreq), node);
-		list_del(&sgreq->node);
-		list_add_tail(&sgreq->node, &tdc->free_sg_req);
+		list_move_tail(&sgreq->node, &tdc->free_sg_req);
 		if (sgreq->last_sg) {
 			dma_desc = sgreq->dma_desc;
 			dma_desc->dma_status = DMA_ERROR;
@@ -570,8 +570,7 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
 
 	/* If not last req then put at end of pending list */
 	if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
-		list_del(&sgreq->node);
-		list_add_tail(&sgreq->node, &tdc->pending_sg_req);
+		list_move_tail(&sgreq->node, &tdc->pending_sg_req);
 		sgreq->configured = false;
 		st = handle_continuous_head_request(tdc, sgreq, to_terminate);
 		if (!st)
@@ -1284,7 +1283,6 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < cdata->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
-		char irq_name[30];
 
 		tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
 					i * TEGRA_APBDMA_CHANNEL_REGISTER_SIZE;
@@ -1296,9 +1294,9 @@ static int __devinit tegra_dma_probe(struct platform_device *pdev)
 			goto err_irq;
 		}
 		tdc->irq = res->start;
-		snprintf(irq_name, sizeof(irq_name), "apbdma.%d", i);
+		snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
 		ret = devm_request_irq(&pdev->dev, tdc->irq,
-				tegra_dma_isr, 0, irq_name, tdc);
+				tegra_dma_isr, 0, tdc->name, tdc);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"request_irq failed with err %d channel %d\n",
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8c2ff2490d991575223878aa9c06a8330d2500b2..1acae359cabea0c3c48b93390220b9909f012e39 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -134,6 +134,7 @@ config SPI_DAVINCI
 	tristate "Texas Instruments DaVinci/DA8x/OMAP-L/AM1x SoC SPI controller"
 	depends on ARCH_DAVINCI
 	select SPI_BITBANG
+	select TI_EDMA
 	help
 	  SPI master controller for DaVinci/DA8x/OMAP-L/AM1x SPI modules.
 
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 3afe2f4f5b8eb231bae471d6e4bdd5d48d385dae..147dfa87a64b6260b52f71b62d7880ec6905fdfb 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -25,13 +25,14 @@
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/edma.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
 
 #include <linux/platform_data/spi-davinci.h>
-#include <mach/edma.h>
 
 #define SPI_NO_RESOURCE		((resource_size_t)-1)
 
@@ -113,14 +114,6 @@
 #define SPIDEF		0x4c
 #define SPIFMT0		0x50
 
-/* We have 2 DMA channels per CS, one for RX and one for TX */
-struct davinci_spi_dma {
-	int			tx_channel;
-	int			rx_channel;
-	int			dummy_param_slot;
-	enum dma_event_q	eventq;
-};
-
 /* SPI Controller driver's private data. */
 struct davinci_spi {
 	struct spi_bitbang	bitbang;
@@ -134,11 +127,14 @@ struct davinci_spi {
 
 	const void		*tx;
 	void			*rx;
-#define SPI_TMP_BUFSZ	(SMP_CACHE_BYTES + 1)
-	u8			rx_tmp_buf[SPI_TMP_BUFSZ];
 	int			rcount;
 	int			wcount;
-	struct davinci_spi_dma	dma;
+
+	struct dma_chan		*dma_rx;
+	struct dma_chan		*dma_tx;
+	int			dma_rx_chnum;
+	int			dma_tx_chnum;
+
 	struct davinci_spi_platform_data *pdata;
 
 	void			(*get_rx)(u32 rx_data, struct davinci_spi *);
@@ -496,21 +492,23 @@ out:
 	return errors;
 }
 
-static void davinci_spi_dma_callback(unsigned lch, u16 status, void *data)
+static void davinci_spi_dma_rx_callback(void *data)
 {
-	struct davinci_spi *dspi = data;
-	struct davinci_spi_dma *dma = &dspi->dma;
+	struct davinci_spi *dspi = (struct davinci_spi *)data;
 
-	edma_stop(lch);
+	dspi->rcount = 0;
 
-	if (status == DMA_COMPLETE) {
-		if (lch == dma->rx_channel)
-			dspi->rcount = 0;
-		if (lch == dma->tx_channel)
-			dspi->wcount = 0;
-	}
+	if (!dspi->wcount && !dspi->rcount)
+		complete(&dspi->done);
+}
 
-	if ((!dspi->wcount && !dspi->rcount) || (status != DMA_COMPLETE))
+static void davinci_spi_dma_tx_callback(void *data)
+{
+	struct davinci_spi *dspi = (struct davinci_spi *)data;
+
+	dspi->wcount = 0;
+
+	if (!dspi->wcount && !dspi->rcount)
 		complete(&dspi->done);
 }
 
@@ -526,20 +524,20 @@ static void davinci_spi_dma_callback(unsigned lch, u16 status, void *data)
 static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct davinci_spi *dspi;
-	int data_type, ret;
+	int data_type, ret = -ENOMEM;
 	u32 tx_data, spidat1;
 	u32 errors = 0;
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
 	unsigned uninitialized_var(rx_buf_count);
-	struct device *sdev;
+	void *dummy_buf = NULL;
+	struct scatterlist sg_rx, sg_tx;
 
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = dspi->pdata;
 	spicfg = (struct davinci_spi_config *)spi->controller_data;
 	if (!spicfg)
 		spicfg = &davinci_spi_default_cfg;
-	sdev = dspi->bitbang.master->dev.parent;
 
 	/* convert len to words based on bits_per_word */
 	data_type = dspi->bytes_per_word[spi->chip_select];
@@ -567,112 +565,83 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		spidat1 |= tx_data & 0xFFFF;
 		iowrite32(spidat1, dspi->base + SPIDAT1);
 	} else {
-		struct davinci_spi_dma *dma;
-		unsigned long tx_reg, rx_reg;
-		struct edmacc_param param;
-		void *rx_buf;
-		int b, c;
-
-		dma = &dspi->dma;
-
-		tx_reg = (unsigned long)dspi->pbase + SPIDAT1;
-		rx_reg = (unsigned long)dspi->pbase + SPIBUF;
-
-		/*
-		 * Transmit DMA setup
-		 *
-		 * If there is transmit data, map the transmit buffer, set it
-		 * as the source of data and set the source B index to data
-		 * size. If there is no transmit data, set the transmit register
-		 * as the source of data, and set the source B index to zero.
-		 *
-		 * The destination is always the transmit register itself. And
-		 * the destination never increments.
-		 */
-
-		if (t->tx_buf) {
-			t->tx_dma = dma_map_single(&spi->dev, (void *)t->tx_buf,
-						t->len, DMA_TO_DEVICE);
-			if (dma_mapping_error(&spi->dev, t->tx_dma)) {
-				dev_dbg(sdev, "Unable to DMA map %d bytes"
-						"TX buffer\n", t->len);
-				return -ENOMEM;
-			}
-		}
-
-		/*
-		 * If number of words is greater than 65535, then we need
-		 * to configure a 3 dimension transfer.  Use the BCNTRLD
-		 * feature to allow for transfers that aren't even multiples
-		 * of 65535 (or any other possible b size) by first transferring
-		 * the remainder amount then grabbing the next N blocks of
-		 * 65535 words.
-		 */
-
-		c = dspi->wcount / (SZ_64K - 1);	/* N 65535 Blocks */
-		b = dspi->wcount - c * (SZ_64K - 1);	/* Remainder */
-		if (b)
-			c++;
+		struct dma_slave_config dma_rx_conf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = (unsigned long)dspi->pbase + SPIBUF,
+			.src_addr_width = data_type,
+			.src_maxburst = 1,
+		};
+		struct dma_slave_config dma_tx_conf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = (unsigned long)dspi->pbase + SPIDAT1,
+			.dst_addr_width = data_type,
+			.dst_maxburst = 1,
+		};
+		struct dma_async_tx_descriptor *rxdesc;
+		struct dma_async_tx_descriptor *txdesc;
+		void *buf;
+
+		dummy_buf = kzalloc(t->len, GFP_KERNEL);
+		if (!dummy_buf)
+			goto err_alloc_dummy_buf;
+
+		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
+		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
+
+		sg_init_table(&sg_rx, 1);
+		if (!t->rx_buf)
+			buf = dummy_buf;
 		else
-			b = SZ_64K - 1;
-
-		param.opt = TCINTEN | EDMA_TCC(dma->tx_channel);
-		param.src = t->tx_buf ? t->tx_dma : tx_reg;
-		param.a_b_cnt = b << 16 | data_type;
-		param.dst = tx_reg;
-		param.src_dst_bidx = t->tx_buf ? data_type : 0;
-		param.link_bcntrld = 0xffffffff;
-		param.src_dst_cidx = t->tx_buf ? data_type : 0;
-		param.ccnt = c;
-		edma_write_slot(dma->tx_channel, &param);
-		edma_link(dma->tx_channel, dma->dummy_param_slot);
-
-		/*
-		 * Receive DMA setup
-		 *
-		 * If there is receive buffer, use it to receive data. If there
-		 * is none provided, use a temporary receive buffer. Set the
-		 * destination B index to 0 so effectively only one byte is used
-		 * in the temporary buffer (address does not increment).
-		 *
-		 * The source of receive data is the receive data register. The
-		 * source address never increments.
-		 */
-
-		if (t->rx_buf) {
-			rx_buf = t->rx_buf;
-			rx_buf_count = t->len;
-		} else {
-			rx_buf = dspi->rx_tmp_buf;
-			rx_buf_count = sizeof(dspi->rx_tmp_buf);
+			buf = t->rx_buf;
+		t->rx_dma = dma_map_single(&spi->dev, buf,
+				t->len, DMA_FROM_DEVICE);
+		if (!t->rx_dma) {
+			ret = -EFAULT;
+			goto err_rx_map;
 		}
+		sg_dma_address(&sg_rx) = t->rx_dma;
+		sg_dma_len(&sg_rx) = t->len;
 
-		t->rx_dma = dma_map_single(&spi->dev, rx_buf, rx_buf_count,
-							DMA_FROM_DEVICE);
-		if (dma_mapping_error(&spi->dev, t->rx_dma)) {
-			dev_dbg(sdev, "Couldn't DMA map a %d bytes RX buffer\n",
-								rx_buf_count);
-			if (t->tx_buf)
-				dma_unmap_single(&spi->dev, t->tx_dma, t->len,
-								DMA_TO_DEVICE);
-			return -ENOMEM;
+		sg_init_table(&sg_tx, 1);
+		if (!t->tx_buf)
+			buf = dummy_buf;
+		else
+			buf = (void *)t->tx_buf;
+		t->tx_dma = dma_map_single(&spi->dev, buf,
+				t->len, DMA_FROM_DEVICE);
+		if (!t->tx_dma) {
+			ret = -EFAULT;
+			goto err_tx_map;
 		}
-
-		param.opt = TCINTEN | EDMA_TCC(dma->rx_channel);
-		param.src = rx_reg;
-		param.a_b_cnt = b << 16 | data_type;
-		param.dst = t->rx_dma;
-		param.src_dst_bidx = (t->rx_buf ? data_type : 0) << 16;
-		param.link_bcntrld = 0xffffffff;
-		param.src_dst_cidx = (t->rx_buf ? data_type : 0) << 16;
-		param.ccnt = c;
-		edma_write_slot(dma->rx_channel, &param);
+		sg_dma_address(&sg_tx) = t->tx_dma;
+		sg_dma_len(&sg_tx) = t->len;
+
+		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
+				&sg_rx, 1, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!rxdesc)
+			goto err_desc;
+
+		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
+				&sg_tx, 1, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!txdesc)
+			goto err_desc;
+
+		rxdesc->callback = davinci_spi_dma_rx_callback;
+		rxdesc->callback_param = (void *)dspi;
+		txdesc->callback = davinci_spi_dma_tx_callback;
+		txdesc->callback_param = (void *)dspi;
 
 		if (pdata->cshold_bug)
 			iowrite16(spidat1 >> 16, dspi->base + SPIDAT1 + 2);
 
-		edma_start(dma->rx_channel);
-		edma_start(dma->tx_channel);
+		dmaengine_submit(rxdesc);
+		dmaengine_submit(txdesc);
+
+		dma_async_issue_pending(dspi->dma_rx);
+		dma_async_issue_pending(dspi->dma_tx);
+
 		set_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 	}
 
@@ -690,15 +659,13 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
 	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
-
-		if (t->tx_buf)
-			dma_unmap_single(&spi->dev, t->tx_dma, t->len,
-								DMA_TO_DEVICE);
-
-		dma_unmap_single(&spi->dev, t->rx_dma, rx_buf_count,
-							DMA_FROM_DEVICE);
-
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
+
+		dma_unmap_single(&spi->dev, t->rx_dma,
+				t->len, DMA_FROM_DEVICE);
+		dma_unmap_single(&spi->dev, t->tx_dma,
+				t->len, DMA_TO_DEVICE);
+		kfree(dummy_buf);
 	}
 
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
@@ -716,11 +683,20 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	if (dspi->rcount != 0 || dspi->wcount != 0) {
-		dev_err(sdev, "SPI data transfer error\n");
+		dev_err(&spi->dev, "SPI data transfer error\n");
 		return -EIO;
 	}
 
 	return t->len;
+
+err_desc:
+	dma_unmap_single(&spi->dev, t->tx_dma, t->len, DMA_TO_DEVICE);
+err_tx_map:
+	dma_unmap_single(&spi->dev, t->rx_dma, t->len, DMA_FROM_DEVICE);
+err_rx_map:
+	kfree(dummy_buf);
+err_alloc_dummy_buf:
+	return ret;
 }
 
 /**
@@ -751,39 +727,33 @@ static irqreturn_t davinci_spi_irq(s32 irq, void *data)
 
 static int davinci_spi_request_dma(struct davinci_spi *dspi)
 {
+	dma_cap_mask_t mask;
+	struct device *sdev = dspi->bitbang.master->dev.parent;
 	int r;
-	struct davinci_spi_dma *dma = &dspi->dma;
 
-	r = edma_alloc_channel(dma->rx_channel, davinci_spi_dma_callback, dspi,
-								dma->eventq);
-	if (r < 0) {
-		pr_err("Unable to request DMA channel for SPI RX\n");
-		r = -EAGAIN;
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	dspi->dma_rx = dma_request_channel(mask, edma_filter_fn,
+					   &dspi->dma_rx_chnum);
+	if (!dspi->dma_rx) {
+		dev_err(sdev, "request RX DMA channel failed\n");
+		r = -ENODEV;
 		goto rx_dma_failed;
 	}
 
-	r = edma_alloc_channel(dma->tx_channel, davinci_spi_dma_callback, dspi,
-								dma->eventq);
-	if (r < 0) {
-		pr_err("Unable to request DMA channel for SPI TX\n");
-		r = -EAGAIN;
+	dspi->dma_tx = dma_request_channel(mask, edma_filter_fn,
+					   &dspi->dma_tx_chnum);
+	if (!dspi->dma_tx) {
+		dev_err(sdev, "request TX DMA channel failed\n");
+		r = -ENODEV;
 		goto tx_dma_failed;
 	}
 
-	r = edma_alloc_slot(EDMA_CTLR(dma->tx_channel), EDMA_SLOT_ANY);
-	if (r < 0) {
-		pr_err("Unable to request SPI TX DMA param slot\n");
-		r = -EAGAIN;
-		goto param_failed;
-	}
-	dma->dummy_param_slot = r;
-	edma_link(dma->dummy_param_slot, dma->dummy_param_slot);
-
 	return 0;
-param_failed:
-	edma_free_channel(dma->tx_channel);
+
 tx_dma_failed:
-	edma_free_channel(dma->rx_channel);
+	dma_release_channel(dspi->dma_rx);
 rx_dma_failed:
 	return r;
 }
@@ -898,9 +868,8 @@ static int __devinit davinci_spi_probe(struct platform_device *pdev)
 	dspi->bitbang.txrx_bufs = davinci_spi_bufs;
 	if (dma_rx_chan != SPI_NO_RESOURCE &&
 	    dma_tx_chan != SPI_NO_RESOURCE) {
-		dspi->dma.rx_channel = dma_rx_chan;
-		dspi->dma.tx_channel = dma_tx_chan;
-		dspi->dma.eventq = pdata->dma_event_q;
+		dspi->dma_rx_chnum = dma_rx_chan;
+		dspi->dma_tx_chnum = dma_tx_chan;
 
 		ret = davinci_spi_request_dma(dspi);
 		if (ret)
@@ -955,9 +924,8 @@ static int __devinit davinci_spi_probe(struct platform_device *pdev)
 	return ret;
 
 free_dma:
-	edma_free_channel(dspi->dma.tx_channel);
-	edma_free_channel(dspi->dma.rx_channel);
-	edma_free_slot(dspi->dma.dummy_param_slot);
+	dma_release_channel(dspi->dma_rx);
+	dma_release_channel(dspi->dma_tx);
 free_clk:
 	clk_disable(dspi->clk);
 	clk_put(dspi->clk);
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 2412e02d7c0f78796202315002474c09c7c7558a..e1c8c9e919ac74401b566ccc08f9eeee67ca0a45 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -19,6 +19,10 @@
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+ *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
@@ -29,6 +33,9 @@ struct dw_dma_platform_data {
 #define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
 #define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
 	unsigned char	chan_priority;
+	unsigned short	block_size;
+	unsigned char	nr_masters;
+	unsigned char	data_width[4];
 };
 
 /* bursts size */
diff --git a/include/linux/edma.h b/include/linux/edma.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1307e7827e822aa9df2613dba78cc53e1137758
--- /dev/null
+++ b/include/linux/edma.h
@@ -0,0 +1,29 @@
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __LINUX_EDMA_H
+#define __LINUX_EDMA_H
+
+struct dma_chan;
+
+#if defined(CONFIG_TI_EDMA) || defined(CONFIG_TI_EDMA_MODULE)
+bool edma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif
diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h
new file mode 100644
index 0000000000000000000000000000000000000000..2a330ec9e2af0dfb4ad1cde525e74f61c106c4c5
--- /dev/null
+++ b/include/linux/platform_data/mmp_dma.h
@@ -0,0 +1,19 @@
+/*
+ *  MMP Platform DMA Management
+ *
+ *  Copyright (c) 2011 Marvell Semiconductors Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#ifndef MMP_DMA_H
+#define MMP_DMA_H
+
+struct mmp_dma_platdata {
+	int dma_channels;
+};
+
+#endif /* MMP_DMA_H */