diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index b0d541042ac61c7d5cf580d4fc37a486ea96521e..d9be7a97dff35e7b1521e709e8a29c278d3fb434 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -66,6 +66,83 @@ the ACPI device explicitly to acpi_platform_device_ids list defined in
 drivers/acpi/acpi_platform.c. This limitation is only for the platform
 devices, SPI and I2C devices are created automatically as described below.
 
+DMA support
+~~~~~~~~~~~
+DMA controllers enumerated via ACPI should be registered in the system to
+provide generic access to their resources. For example, a driver that would
+like to be accessible to slave devices via generic API call
+dma_request_slave_channel() must register itself at the end of the probe
+function like this:
+
+	err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
+	/* Handle the error if it's not a case of !CONFIG_ACPI */
+
+and implement custom xlate function if needed (usually acpi_dma_simple_xlate()
+is enough) which converts the FixedDMA resource provided by struct
+acpi_dma_spec into the corresponding DMA channel. A piece of code for that case
+could look like:
+
+	#ifdef CONFIG_ACPI
+	struct filter_args {
+		/* Provide necessary information for the filter_func */
+		...
+	};
+
+	static bool filter_func(struct dma_chan *chan, void *param)
+	{
+		/* Choose the proper channel */
+		...
+	}
+
+	static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
+			struct acpi_dma *adma)
+	{
+		dma_cap_mask_t cap;
+		struct filter_args args;
+
+		/* Prepare arguments for filter_func */
+		...
+		return dma_request_channel(cap, filter_func, &args);
+	}
+	#else
+	static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
+			struct acpi_dma *adma)
+	{
+		return NULL;
+	}
+	#endif
+
+dma_request_slave_channel() will call xlate_func() for each registered DMA
+controller. In the xlate function the proper channel must be chosen based on
+information in struct acpi_dma_spec and the properties of the controller
+provided by struct acpi_dma.
+
+Clients must call dma_request_slave_channel() with the string parameter that
+corresponds to a specific FixedDMA resource. By default "tx" means the first
+entry of the FixedDMA resource array, "rx" means the second entry. The table
+below shows a layout:
+
+	Device (I2C0)
+	{
+		...
+		Method (_CRS, 0, NotSerialized)
+		{
+			Name (DBUF, ResourceTemplate ()
+			{
+				FixedDMA (0x0018, 0x0004, Width32bit, _Y48)
+				FixedDMA (0x0019, 0x0005, Width32bit, )
+			})
+		...
+		}
+	}
+
+So, the FixedDMA with request line 0x0018 is "tx" and next one is "rx" in
+this example.
+
+In robust cases the client unfortunately needs to call
+acpi_dma_request_slave_chan_by_index() directly and therefore choose the
+specific FixedDMA resource by its index.
+
 SPI serial bus support
 ~~~~~~~~~~~~~~~~~~~~~~
 Slave devices behind SPI bus have SpiSerialBus resource attached to them.
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt
index 3c046ee6e8b5dd54e14f7fcc6fb8699792aa4b7d..c80e8a3402f0f5e744d323c0611dec9727f4930e 100644
--- a/Documentation/devicetree/bindings/dma/atmel-dma.txt
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -1,14 +1,39 @@
 * Atmel Direct Memory Access Controller (DMA)
 
 Required properties:
-- compatible: Should be "atmel,<chip>-dma"
-- reg: Should contain DMA registers location and length
-- interrupts: Should contain DMA interrupt
+- compatible: Should be "atmel,<chip>-dma".
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain DMA interrupt.
+- #dma-cells: Must be <2>, used to represent the number of integer cells in
+the dmas property of client devices.
 
-Examples:
+Example:
 
-dma@ffffec00 {
+dma0: dma@ffffec00 {
 	compatible = "atmel,at91sam9g45-dma";
 	reg = <0xffffec00 0x200>;
 	interrupts = <21>;
+	#dma-cells = <2>;
+};
+
+DMA clients connected to the Atmel DMA controller must use the format
+described in the dma.txt file, using a three-cell specifier for each channel:
+a phandle plus two interger cells.
+The three cells in order are:
+
+1. A phandle pointing to the DMA controller.
+2. The memory interface (16 most significant bits), the peripheral interface
+(16 less significant bits).
+3. The peripheral identifier for the hardware handshaking interface. The
+identifier can be different for tx and rx.
+
+Example:
+
+i2c0@i2c@f8010000 {
+	compatible = "atmel,at91sam9x5-i2c";
+	reg = <0xf8010000 0x100>;
+	interrupts = <9 4 6>;
+	dmas = <&dma0 1 7>,
+	       <&dma0 1 8>;
+	dma-names = "tx", "rx";
 };
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..279ac0a8c5b11dd12305e8ce8f0a15ce8a3aaaef
--- /dev/null
+++ b/Documentation/dmatest.txt
@@ -0,0 +1,81 @@
+				DMA Test Guide
+				==============
+
+		Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+This small document introduces how to test DMA drivers using dmatest module.
+
+	Part 1 - How to build the test module
+
+The menuconfig contains an option that could be found by following path:
+	Device Drivers -> DMA Engine support -> DMA Test client
+
+In the configuration file the option called CONFIG_DMATEST. The dmatest could
+be built as module or inside kernel. Let's consider those cases.
+
+	Part 2 - When dmatest is built as a module...
+
+After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
+folder with nodes will be created. They are the same as module parameters with
+addition of the 'run' node that controls run and stop phases of the test.
+
+Note that in this case test will not run on load automatically.
+
+Example of usage:
+	% echo dma0chan0 > /sys/kernel/debug/dmatest/channel
+	% echo 2000 > /sys/kernel/debug/dmatest/timeout
+	% echo 1 > /sys/kernel/debug/dmatest/iterations
+	% echo 1 > /sys/kernel/debug/dmatest/run
+
+Hint: available channel list could be extracted by running the following
+command:
+	% ls -1 /sys/class/dma/
+
+After a while you will start to get messages about current status or error like
+in the original code.
+
+Note that running a new test will stop any in progress test.
+
+The following command should return actual state of the test.
+	% cat /sys/kernel/debug/dmatest/run
+
+To wait for test done the user may perform a busy loop that checks the state.
+
+	% while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
+	> do
+	> 	echo -n "."
+	> 	sleep 1
+	> done
+	> echo
+
+	Part 3 - When built-in in the kernel...
+
+The module parameters that is supplied to the kernel command line will be used
+for the first performed test. After user gets a control, the test could be
+interrupted or re-run with same or different parameters. For the details see
+the above section "Part 2 - When dmatest is built as a module..."
+
+In both cases the module parameters are used as initial values for the test case.
+You always could check them at run-time by running
+	% grep -H . /sys/module/dmatest/parameters/*
+
+	Part 4 - Gathering the test results
+
+The module provides a storage for the test results in the memory. The gathered
+data could be used after test is done.
+
+The special file 'results' in the debugfs represents gathered data of the in
+progress test. The messages collected are printed to the kernel log as well.
+
+Example of output:
+	% cat /sys/kernel/debug/dmatest/results
+	dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+
+The message format is unified across the different types of errors. A number in
+the parens represents additional information, e.g. error code, error counter,
+or status.
+
+Comparison between buffers is stored to the dedicated structure.
+
+Note that the verify result is now accessible only via file 'results' in the
+debugfs.
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index dab9fc014b971cb7834dd518558dbb58de0996c5..49fd0d501c9bc5edcbb83bbb234891e0c381753c 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/of.h>
 #include <linux/omap-dma.h>
 
 #include "soc.h"
@@ -304,6 +305,9 @@ static int __init omap2_system_dma_init(void)
 	if (res)
 		return res;
 
+	if (of_have_populated_dt())
+		return res;
+
 	pdev = platform_device_register_full(&omap_dma_dev_info);
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index aeaea32bcfdacd5fed2eb7e53536169348791cc8..e9924898043adf2a437b8f2eb3b8f15329ac755f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -63,8 +63,6 @@ config INTEL_IOATDMA
 	depends on PCI && X86
 	select DMA_ENGINE
 	select DCA
-	select ASYNC_TX_DISABLE_PQ_VAL_DMA
-	select ASYNC_TX_DISABLE_XOR_VAL_DMA
 	help
 	  Enable support for the Intel(R) I/OAT DMA engine present
 	  in recent Intel Xeon chipsets.
@@ -174,15 +172,7 @@ config TEGRA20_APB_DMA
 	  This DMA controller transfers data from memory to peripheral fifo
 	  or vice versa. It does not support memory to memory data transfer.
 
-
-
-config SH_DMAE
-	tristate "Renesas SuperH DMAC support"
-	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
-	depends on !SH_DMA_API
-	select DMA_ENGINE
-	help
-	  Enable support for the Renesas SuperH DMA controllers.
+source "drivers/dma/sh/Kconfig"
 
 config COH901318
 	bool "ST-Ericsson COH901318 DMA support"
@@ -328,6 +318,10 @@ config DMA_ENGINE
 config DMA_VIRTUAL_CHANNELS
 	tristate
 
+config DMA_ACPI
+	def_bool y
+	depends on ACPI
+
 config DMA_OF
 	def_bool y
 	depends on OF
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 488e3ff85b522840ce1ba506916c534ff3116455..a2b0df591f958654549c6a15c60a08efb4691678 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -3,6 +3,7 @@ ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
 
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
+obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
 obj-$(CONFIG_NET_DMA) += iovlock.o
@@ -18,7 +19,7 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
-obj-$(CONFIG_SH_DMAE) += sh/
+obj-$(CONFIG_SH_DMAE_BASE) += sh/
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
new file mode 100644
index 0000000000000000000000000000000000000000..ba6fc62e965163bca45cd55d148188812e08374a
--- /dev/null
+++ b/drivers/dma/acpi-dma.c
@@ -0,0 +1,279 @@
+/*
+ * ACPI helpers for DMA request / controller
+ *
+ * Based on of-dma.c
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+
+static LIST_HEAD(acpi_dma_list);
+static DEFINE_MUTEX(acpi_dma_lock);
+
+/**
+ * acpi_dma_controller_register - Register a DMA controller to ACPI DMA helpers
+ * @dev:		struct device of DMA controller
+ * @acpi_dma_xlate:	translation function which converts a dma specifier
+ *			into a dma_chan structure
+ * @data		pointer to controller specific data to be used by
+ *			translation function
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate acpi_dma_controller_free()
+ * call.
+ */
+int acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	struct acpi_device *adev;
+	struct acpi_dma	*adma;
+
+	if (!dev || !acpi_dma_xlate)
+		return -EINVAL;
+
+	/* Check if the device was enumerated by ACPI */
+	if (!ACPI_HANDLE(dev))
+		return -EINVAL;
+
+	if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev))
+		return -EINVAL;
+
+	adma = kzalloc(sizeof(*adma), GFP_KERNEL);
+	if (!adma)
+		return -ENOMEM;
+
+	adma->dev = dev;
+	adma->acpi_dma_xlate = acpi_dma_xlate;
+	adma->data = data;
+
+	/* Now queue acpi_dma controller structure in list */
+	mutex_lock(&acpi_dma_lock);
+	list_add_tail(&adma->dma_controllers, &acpi_dma_list);
+	mutex_unlock(&acpi_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_register);
+
+/**
+ * acpi_dma_controller_free - Remove a DMA controller from ACPI DMA helpers list
+ * @dev:	struct device of DMA controller
+ *
+ * Memory allocated by acpi_dma_controller_register() is freed here.
+ */
+int acpi_dma_controller_free(struct device *dev)
+{
+	struct acpi_dma *adma;
+
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&acpi_dma_lock);
+
+	list_for_each_entry(adma, &acpi_dma_list, dma_controllers)
+		if (adma->dev == dev) {
+			list_del(&adma->dma_controllers);
+			mutex_unlock(&acpi_dma_lock);
+			kfree(adma);
+			return 0;
+		}
+
+	mutex_unlock(&acpi_dma_lock);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_free);
+
+static void devm_acpi_dma_release(struct device *dev, void *res)
+{
+	acpi_dma_controller_free(dev);
+}
+
+/**
+ * devm_acpi_dma_controller_register - resource managed acpi_dma_controller_register()
+ * @dev:		device that is registering this DMA controller
+ * @acpi_dma_xlate:	translation function
+ * @data		pointer to controller specific data
+ *
+ * Managed acpi_dma_controller_register(). DMA controller registered by this
+ * function are automatically freed on driver detach. See
+ * acpi_dma_controller_register() for more information.
+ */
+int devm_acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	void *res;
+	int ret;
+
+	res = devres_alloc(devm_acpi_dma_release, 0, GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	ret = acpi_dma_controller_register(dev, acpi_dma_xlate, data);
+	if (ret) {
+		devres_free(res);
+		return ret;
+	}
+	devres_add(dev, res);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_register);
+
+/**
+ * devm_acpi_dma_controller_free - resource managed acpi_dma_controller_free()
+ *
+ * Unregister a DMA controller registered with
+ * devm_acpi_dma_controller_register(). Normally this function will not need to
+ * be called and the resource management code will ensure that the resource is
+ * freed.
+ */
+void devm_acpi_dma_controller_free(struct device *dev)
+{
+	WARN_ON(devres_destroy(dev, devm_acpi_dma_release, NULL, NULL));
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free);
+
+struct acpi_dma_parser_data {
+	struct acpi_dma_spec dma_spec;
+	size_t index;
+	size_t n;
+};
+
+/**
+ * acpi_dma_parse_fixed_dma - Parse FixedDMA ACPI resources to a DMA specifier
+ * @res:	struct acpi_resource to get FixedDMA resources from
+ * @data:	pointer to a helper struct acpi_dma_parser_data
+ */
+static int acpi_dma_parse_fixed_dma(struct acpi_resource *res, void *data)
+{
+	struct acpi_dma_parser_data *pdata = data;
+
+	if (res->type == ACPI_RESOURCE_TYPE_FIXED_DMA) {
+		struct acpi_resource_fixed_dma *dma = &res->data.fixed_dma;
+
+		if (pdata->n++ == pdata->index) {
+			pdata->dma_spec.chan_id = dma->channels;
+			pdata->dma_spec.slave_id = dma->request_lines;
+		}
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+/**
+ * acpi_dma_request_slave_chan_by_index - Get the DMA slave channel
+ * @dev:	struct device to get DMA request from
+ * @index:	index of FixedDMA descriptor for @dev
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
+		size_t index)
+{
+	struct acpi_dma_parser_data pdata;
+	struct acpi_dma_spec *dma_spec = &pdata.dma_spec;
+	struct list_head resource_list;
+	struct acpi_device *adev;
+	struct acpi_dma *adma;
+	struct dma_chan *chan = NULL;
+
+	/* Check if the device was enumerated by ACPI */
+	if (!dev || !ACPI_HANDLE(dev))
+		return NULL;
+
+	if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev))
+		return NULL;
+
+	memset(&pdata, 0, sizeof(pdata));
+	pdata.index = index;
+
+	/* Initial values for the request line and channel */
+	dma_spec->chan_id = -1;
+	dma_spec->slave_id = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	acpi_dev_get_resources(adev, &resource_list,
+			acpi_dma_parse_fixed_dma, &pdata);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0)
+		return NULL;
+
+	mutex_lock(&acpi_dma_lock);
+
+	list_for_each_entry(adma, &acpi_dma_list, dma_controllers) {
+		dma_spec->dev = adma->dev;
+		chan = adma->acpi_dma_xlate(dma_spec, adma);
+		if (chan)
+			break;
+	}
+
+	mutex_unlock(&acpi_dma_lock);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index);
+
+/**
+ * acpi_dma_request_slave_chan_by_name - Get the DMA slave channel
+ * @dev:	struct device to get DMA request from
+ * @name:	represents corresponding FixedDMA descriptor for @dev
+ *
+ * In order to support both Device Tree and ACPI in a single driver we
+ * translate the names "tx" and "rx" here based on the most common case where
+ * the first FixedDMA descriptor is TX and second is RX.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev,
+		const char *name)
+{
+	size_t index;
+
+	if (!strcmp(name, "tx"))
+		index = 0;
+	else if (!strcmp(name, "rx"))
+		index = 1;
+	else
+		return NULL;
+
+	return acpi_dma_request_slave_chan_by_index(dev, index);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name);
+
+/**
+ * acpi_dma_simple_xlate - Simple ACPI DMA engine translation helper
+ * @dma_spec: pointer to ACPI DMA specifier
+ * @adma: pointer to ACPI DMA controller data
+ *
+ * A simple translation function for ACPI based devices. Passes &struct
+ * dma_spec to the DMA controller driver provided filter function. Returns
+ * pointer to the channel if found or %NULL otherwise.
+ */
+struct dma_chan *acpi_dma_simple_xlate(struct acpi_dma_spec *dma_spec,
+		struct acpi_dma *adma)
+{
+	struct acpi_dma_filter_info *info = adma->data;
+
+	if (!info || !info->filter_fn)
+		return NULL;
+
+	return dma_request_channel(info->dma_cap, info->filter_fn, dma_spec);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_simple_xlate);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 88cfc61329d20fb137a46ff852569bf8b567c60a..e923cda930f98a09c90fa73b868cfeb3b619b30d 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_dma.h>
 
 #include "at_hdmac_regs.h"
 #include "dmaengine.h"
@@ -677,7 +678,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
 			| ATC_FC_MEM2PER
-			| ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF);
+			| ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if);
 		reg = sconfig->dst_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
@@ -716,7 +717,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
 			| ATC_FC_PER2MEM
-			| ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF);
+			| ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if);
 
 		reg = sconfig->src_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
@@ -822,8 +823,8 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
 				| ATC_SRC_ADDR_MODE_INCR
 				| ATC_FC_MEM2PER
-				| ATC_SIF(AT_DMA_MEM_IF)
-				| ATC_DIF(AT_DMA_PER_IF);
+				| ATC_SIF(atchan->mem_if)
+				| ATC_DIF(atchan->per_if);
 		break;
 
 	case DMA_DEV_TO_MEM:
@@ -833,8 +834,8 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
 				| ATC_SRC_ADDR_MODE_FIXED
 				| ATC_FC_PER2MEM
-				| ATC_SIF(AT_DMA_PER_IF)
-				| ATC_DIF(AT_DMA_MEM_IF);
+				| ATC_SIF(atchan->per_if)
+				| ATC_DIF(atchan->mem_if);
 		break;
 
 	default:
@@ -1188,6 +1189,67 @@ static void atc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
 }
 
+#ifdef CONFIG_OF
+static bool at_dma_filter(struct dma_chan *chan, void *slave)
+{
+	struct at_dma_slave *atslave = slave;
+
+	if (atslave->dma_dev == chan->device->dev) {
+		chan->private = atslave;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+				     struct of_dma *of_dma)
+{
+	struct dma_chan *chan;
+	struct at_dma_chan *atchan;
+	struct at_dma_slave *atslave;
+	dma_cap_mask_t mask;
+	unsigned int per_id;
+	struct platform_device *dmac_pdev;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	dmac_pdev = of_find_device_by_node(dma_spec->np);
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
+	if (!atslave)
+		return NULL;
+	/*
+	 * We can fill both SRC_PER and DST_PER, one of these fields will be
+	 * ignored depending on DMA transfer direction.
+	 */
+	per_id = dma_spec->args[1];
+	atslave->cfg = ATC_FIFOCFG_HALFFIFO | ATC_DST_H2SEL_HW
+		      | ATC_SRC_H2SEL_HW | ATC_DST_PER(per_id)
+		      | ATC_SRC_PER(per_id);
+	atslave->dma_dev = &dmac_pdev->dev;
+
+	chan = dma_request_channel(mask, at_dma_filter, atslave);
+	if (!chan)
+		return NULL;
+
+	atchan = to_at_dma_chan(chan);
+	atchan->per_if = dma_spec->args[0] & 0xff;
+	atchan->mem_if = (dma_spec->args[0] >> 16) & 0xff;
+
+	return chan;
+}
+#else
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+				     struct of_dma *of_dma)
+{
+	return NULL;
+}
+#endif
 
 /*--  Module Management  -----------------------------------------------*/
 
@@ -1342,6 +1404,8 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	for (i = 0; i < plat_dat->nr_channels; i++) {
 		struct at_dma_chan	*atchan = &atdma->chan[i];
 
+		atchan->mem_if = AT_DMA_MEM_IF;
+		atchan->per_if = AT_DMA_PER_IF;
 		atchan->chan_common.device = &atdma->dma_common;
 		dma_cookie_init(&atchan->chan_common);
 		list_add_tail(&atchan->chan_common.device_node,
@@ -1388,8 +1452,25 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
 	dma_async_device_register(&atdma->dma_common);
 
+	/*
+	 * Do not return an error if the dmac node is not present in order to
+	 * not break the existing way of requesting channel with
+	 * dma_request_channel().
+	 */
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+						 at_dma_xlate, atdma);
+		if (err) {
+			dev_err(&pdev->dev, "could not register of_dma_controller\n");
+			goto err_of_dma_controller_register;
+		}
+	}
+
 	return 0;
 
+err_of_dma_controller_register:
+	dma_async_device_unregister(&atdma->dma_common);
+	dma_pool_destroy(atdma->dma_desc_pool);
 err_pool_create:
 	platform_set_drvdata(pdev, NULL);
 	free_irq(platform_get_irq(pdev, 0), atdma);
@@ -1406,7 +1487,7 @@ err_kfree:
 	return err;
 }
 
-static int __exit at_dma_remove(struct platform_device *pdev)
+static int at_dma_remove(struct platform_device *pdev)
 {
 	struct at_dma		*atdma = platform_get_drvdata(pdev);
 	struct dma_chan		*chan, *_chan;
@@ -1564,7 +1645,7 @@ static const struct dev_pm_ops at_dma_dev_pm_ops = {
 };
 
 static struct platform_driver at_dma_driver = {
-	.remove		= __exit_p(at_dma_remove),
+	.remove		= at_dma_remove,
 	.shutdown	= at_dma_shutdown,
 	.id_table	= atdma_devtypes,
 	.driver = {
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 0eb3c1388667dc4de3a6d2e5284ba493b3d388bd..c604d26fd4d38cf48b437157a47450c49a076719 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -220,6 +220,8 @@ enum atc_status {
  * @device: parent device
  * @ch_regs: memory mapped register base
  * @mask: channel index in a mask
+ * @per_if: peripheral interface
+ * @mem_if: memory interface
  * @status: transmit status information from irq/prep* functions
  *                to tasklet (use atomic operations)
  * @tasklet: bottom half to finish transaction work
@@ -238,6 +240,8 @@ struct at_dma_chan {
 	struct at_dma		*device;
 	void __iomem		*ch_regs;
 	u8			mask;
+	u8			per_if;
+	u8			mem_if;
 	unsigned long		status;
 	struct tasklet_struct	tasklet;
 	u32			save_cfg;
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 797940e532ff71dbf9a53000f4bfb65017b79433..3b23061cdb41bd0e963a5160bb4fbd7edf62de2d 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -2748,7 +2748,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit coh901318_remove(struct platform_device *pdev)
+static int coh901318_remove(struct platform_device *pdev)
 {
 	struct coh901318_base *base = platform_get_drvdata(pdev);
 
@@ -2760,7 +2760,7 @@ static int __exit coh901318_remove(struct platform_device *pdev)
 
 
 static struct platform_driver coh901318_driver = {
-	.remove = __exit_p(coh901318_remove),
+	.remove = coh901318_remove,
 	.driver = {
 		.name	= "coh901318",
 	},
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b2728d6ba2fdea97774d904bcd04f58d12187772..93f7992bee5c1c933e49b7f93ee41c81265698c8 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -62,6 +62,8 @@
 #include <linux/rculist.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
@@ -174,7 +176,8 @@ static struct class dma_devclass = {
 #define dma_device_satisfies_mask(device, mask) \
 	__dma_device_satisfies_mask((device), &(mask))
 static int
-__dma_device_satisfies_mask(struct dma_device *device, dma_cap_mask_t *want)
+__dma_device_satisfies_mask(struct dma_device *device,
+			    const dma_cap_mask_t *want)
 {
 	dma_cap_mask_t has;
 
@@ -463,7 +466,8 @@ static void dma_channel_rebalance(void)
 		}
 }
 
-static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_device *dev,
+static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
+					  struct dma_device *dev,
 					  dma_filter_fn fn, void *fn_param)
 {
 	struct dma_chan *chan;
@@ -505,7 +509,8 @@ static struct dma_chan *private_candidate(dma_cap_mask_t *mask, struct dma_devic
  * @fn: optional callback to disposition available channels
  * @fn_param: opaque parameter to pass to dma_filter_fn
  */
-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param)
+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+				       dma_filter_fn fn, void *fn_param)
 {
 	struct dma_device *device, *_d;
 	struct dma_chan *chan = NULL;
@@ -555,12 +560,16 @@ EXPORT_SYMBOL_GPL(__dma_request_channel);
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  */
-struct dma_chan *dma_request_slave_channel(struct device *dev, char *name)
+struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name)
 {
 	/* If device-tree is present get slave info from here */
 	if (dev->of_node)
 		return of_dma_request_slave_channel(dev->of_node, name);
 
+	/* If device was enumerated by ACPI get slave info from here */
+	if (ACPI_HANDLE(dev))
+		return acpi_dma_request_slave_chan_by_name(dev, name);
+
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(dma_request_slave_channel);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index a2c8904b63ea44fd9154631170c391bd601054d2..d8ce4ecfef18e079336654b8a53ea89fd613cb44 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -2,6 +2,7 @@
  * DMA Engine test module
  *
  * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2013 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +19,10 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
 
 static unsigned int test_buf_size = 16384;
 module_param(test_buf_size, uint, S_IRUGO);
@@ -61,6 +66,9 @@ module_param(timeout, uint, S_IRUGO);
 MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 		 "Pass -1 for infinite timeout");
 
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT		32
+
 /*
  * Initialization patterns. All bytes in the source buffer has bit 7
  * set, all bytes in the destination buffer has bit 7 cleared.
@@ -78,13 +86,65 @@ MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 #define PATTERN_OVERWRITE	0x20
 #define PATTERN_COUNT_MASK	0x1f
 
+enum dmatest_error_type {
+	DMATEST_ET_OK,
+	DMATEST_ET_MAP_SRC,
+	DMATEST_ET_MAP_DST,
+	DMATEST_ET_PREP,
+	DMATEST_ET_SUBMIT,
+	DMATEST_ET_TIMEOUT,
+	DMATEST_ET_DMA_ERROR,
+	DMATEST_ET_DMA_IN_PROGRESS,
+	DMATEST_ET_VERIFY,
+	DMATEST_ET_VERIFY_BUF,
+};
+
+struct dmatest_verify_buffer {
+	unsigned int	index;
+	u8		expected;
+	u8		actual;
+};
+
+struct dmatest_verify_result {
+	unsigned int			error_count;
+	struct dmatest_verify_buffer	data[MAX_ERROR_COUNT];
+	u8				pattern;
+	bool				is_srcbuf;
+};
+
+struct dmatest_thread_result {
+	struct list_head	node;
+	unsigned int		n;
+	unsigned int		src_off;
+	unsigned int		dst_off;
+	unsigned int		len;
+	enum dmatest_error_type	type;
+	union {
+		unsigned long			data;
+		dma_cookie_t			cookie;
+		enum dma_status			status;
+		int				error;
+		struct dmatest_verify_result	*vr;
+	};
+};
+
+struct dmatest_result {
+	struct list_head	node;
+	char			*name;
+	struct list_head	results;
+};
+
+struct dmatest_info;
+
 struct dmatest_thread {
 	struct list_head	node;
+	struct dmatest_info	*info;
 	struct task_struct	*task;
 	struct dma_chan		*chan;
 	u8			**srcs;
 	u8			**dsts;
 	enum dma_transaction_type type;
+	bool			done;
 };
 
 struct dmatest_chan {
@@ -93,25 +153,69 @@ struct dmatest_chan {
 	struct list_head	threads;
 };
 
-/*
- * These are protected by dma_list_mutex since they're only used by
- * the DMA filter function callback
+/**
+ * struct dmatest_params - test parameters.
+ * @buf_size:		size of the memcpy test buffer
+ * @channel:		bus ID of the channel to test
+ * @device:		bus ID of the DMA Engine to test
+ * @threads_per_chan:	number of threads to start per channel
+ * @max_channels:	maximum number of channels to use
+ * @iterations:		iterations before stopping test
+ * @xor_sources:	number of xor source buffers
+ * @pq_sources:		number of p+q source buffers
+ * @timeout:		transfer timeout in msec, -1 for infinite timeout
  */
-static LIST_HEAD(dmatest_channels);
-static unsigned int nr_channels;
+struct dmatest_params {
+	unsigned int	buf_size;
+	char		channel[20];
+	char		device[20];
+	unsigned int	threads_per_chan;
+	unsigned int	max_channels;
+	unsigned int	iterations;
+	unsigned int	xor_sources;
+	unsigned int	pq_sources;
+	int		timeout;
+};
 
-static bool dmatest_match_channel(struct dma_chan *chan)
+/**
+ * struct dmatest_info - test information.
+ * @params:		test parameters
+ * @lock:		access protection to the fields of this structure
+ */
+struct dmatest_info {
+	/* Test parameters */
+	struct dmatest_params	params;
+
+	/* Internal state */
+	struct list_head	channels;
+	unsigned int		nr_channels;
+	struct mutex		lock;
+
+	/* debugfs related stuff */
+	struct dentry		*root;
+	struct dmatest_params	dbgfs_params;
+
+	/* Test results */
+	struct list_head	results;
+	struct mutex		results_lock;
+};
+
+static struct dmatest_info test_info;
+
+static bool dmatest_match_channel(struct dmatest_params *params,
+		struct dma_chan *chan)
 {
-	if (test_channel[0] == '\0')
+	if (params->channel[0] == '\0')
 		return true;
-	return strcmp(dma_chan_name(chan), test_channel) == 0;
+	return strcmp(dma_chan_name(chan), params->channel) == 0;
 }
 
-static bool dmatest_match_device(struct dma_device *device)
+static bool dmatest_match_device(struct dmatest_params *params,
+		struct dma_device *device)
 {
-	if (test_device[0] == '\0')
+	if (params->device[0] == '\0')
 		return true;
-	return strcmp(dev_name(device->dev), test_device) == 0;
+	return strcmp(dev_name(device->dev), params->device) == 0;
 }
 
 static unsigned long dmatest_random(void)
@@ -122,7 +226,8 @@ static unsigned long dmatest_random(void)
 	return buf;
 }
 
-static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len,
+		unsigned int buf_size)
 {
 	unsigned int i;
 	u8 *buf;
@@ -133,13 +238,14 @@ static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
 		for ( ; i < start + len; i++)
 			buf[i] = PATTERN_SRC | PATTERN_COPY
 				| (~i & PATTERN_COUNT_MASK);
-		for ( ; i < test_buf_size; i++)
+		for ( ; i < buf_size; i++)
 			buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
 		buf++;
 	}
 }
 
-static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
+		unsigned int buf_size)
 {
 	unsigned int i;
 	u8 *buf;
@@ -150,40 +256,14 @@ static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
 		for ( ; i < start + len; i++)
 			buf[i] = PATTERN_DST | PATTERN_OVERWRITE
 				| (~i & PATTERN_COUNT_MASK);
-		for ( ; i < test_buf_size; i++)
+		for ( ; i < buf_size; i++)
 			buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
 	}
 }
 
-static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
-		unsigned int counter, bool is_srcbuf)
-{
-	u8		diff = actual ^ pattern;
-	u8		expected = pattern | (~counter & PATTERN_COUNT_MASK);
-	const char	*thread_name = current->comm;
-
-	if (is_srcbuf)
-		pr_warning("%s: srcbuf[0x%x] overwritten!"
-				" Expected %02x, got %02x\n",
-				thread_name, index, expected, actual);
-	else if ((pattern & PATTERN_COPY)
-			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
-		pr_warning("%s: dstbuf[0x%x] not copied!"
-				" Expected %02x, got %02x\n",
-				thread_name, index, expected, actual);
-	else if (diff & PATTERN_SRC)
-		pr_warning("%s: dstbuf[0x%x] was copied!"
-				" Expected %02x, got %02x\n",
-				thread_name, index, expected, actual);
-	else
-		pr_warning("%s: dstbuf[0x%x] mismatch!"
-				" Expected %02x, got %02x\n",
-				thread_name, index, expected, actual);
-}
-
-static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
-		unsigned int end, unsigned int counter, u8 pattern,
-		bool is_srcbuf)
+static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
+		unsigned int start, unsigned int end, unsigned int counter,
+		u8 pattern, bool is_srcbuf)
 {
 	unsigned int i;
 	unsigned int error_count = 0;
@@ -191,6 +271,7 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
 	u8 expected;
 	u8 *buf;
 	unsigned int counter_orig = counter;
+	struct dmatest_verify_buffer *vb;
 
 	for (; (buf = *bufs); bufs++) {
 		counter = counter_orig;
@@ -198,18 +279,21 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
 			actual = buf[i];
 			expected = pattern | (~counter & PATTERN_COUNT_MASK);
 			if (actual != expected) {
-				if (error_count < 32)
-					dmatest_mismatch(actual, pattern, i,
-							 counter, is_srcbuf);
+				if (error_count < MAX_ERROR_COUNT && vr) {
+					vb = &vr->data[error_count];
+					vb->index = i;
+					vb->expected = expected;
+					vb->actual = actual;
+				}
 				error_count++;
 			}
 			counter++;
 		}
 	}
 
-	if (error_count > 32)
+	if (error_count > MAX_ERROR_COUNT)
 		pr_warning("%s: %u errors suppressed\n",
-			current->comm, error_count - 32);
+			current->comm, error_count - MAX_ERROR_COUNT);
 
 	return error_count;
 }
@@ -249,6 +333,170 @@ static unsigned int min_odd(unsigned int x, unsigned int y)
 	return val % 2 ? val : val - 1;
 }
 
+static char *verify_result_get_one(struct dmatest_verify_result *vr,
+		unsigned int i)
+{
+	struct dmatest_verify_buffer *vb = &vr->data[i];
+	u8 diff = vb->actual ^ vr->pattern;
+	static char buf[512];
+	char *msg;
+
+	if (vr->is_srcbuf)
+		msg = "srcbuf overwritten!";
+	else if ((vr->pattern & PATTERN_COPY)
+			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+		msg = "dstbuf not copied!";
+	else if (diff & PATTERN_SRC)
+		msg = "dstbuf was copied!";
+	else
+		msg = "dstbuf mismatch!";
+
+	snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg,
+		 vb->index, vb->expected, vb->actual);
+
+	return buf;
+}
+
+static char *thread_result_get(const char *name,
+		struct dmatest_thread_result *tr)
+{
+	static const char * const messages[] = {
+		[DMATEST_ET_OK]			= "No errors",
+		[DMATEST_ET_MAP_SRC]		= "src mapping error",
+		[DMATEST_ET_MAP_DST]		= "dst mapping error",
+		[DMATEST_ET_PREP]		= "prep error",
+		[DMATEST_ET_SUBMIT]		= "submit error",
+		[DMATEST_ET_TIMEOUT]		= "test timed out",
+		[DMATEST_ET_DMA_ERROR]		=
+			"got completion callback (DMA_ERROR)",
+		[DMATEST_ET_DMA_IN_PROGRESS]	=
+			"got completion callback (DMA_IN_PROGRESS)",
+		[DMATEST_ET_VERIFY]		= "errors",
+		[DMATEST_ET_VERIFY_BUF]		= "verify errors",
+	};
+	static char buf[512];
+
+	snprintf(buf, sizeof(buf) - 1,
+		 "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)",
+		 name, tr->n, messages[tr->type], tr->src_off, tr->dst_off,
+		 tr->len, tr->data);
+
+	return buf;
+}
+
+static int thread_result_add(struct dmatest_info *info,
+		struct dmatest_result *r, enum dmatest_error_type type,
+		unsigned int n, unsigned int src_off, unsigned int dst_off,
+		unsigned int len, unsigned long data)
+{
+	struct dmatest_thread_result *tr;
+
+	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+	if (!tr)
+		return -ENOMEM;
+
+	tr->type = type;
+	tr->n = n;
+	tr->src_off = src_off;
+	tr->dst_off = dst_off;
+	tr->len = len;
+	tr->data = data;
+
+	mutex_lock(&info->results_lock);
+	list_add_tail(&tr->node, &r->results);
+	mutex_unlock(&info->results_lock);
+
+	pr_warn("%s\n", thread_result_get(r->name, tr));
+	return 0;
+}
+
+static unsigned int verify_result_add(struct dmatest_info *info,
+		struct dmatest_result *r, unsigned int n,
+		unsigned int src_off, unsigned int dst_off, unsigned int len,
+		u8 **bufs, int whence, unsigned int counter, u8 pattern,
+		bool is_srcbuf)
+{
+	struct dmatest_verify_result *vr;
+	unsigned int error_count;
+	unsigned int buf_off = is_srcbuf ? src_off : dst_off;
+	unsigned int start, end;
+
+	if (whence < 0) {
+		start = 0;
+		end = buf_off;
+	} else if (whence > 0) {
+		start = buf_off + len;
+		end = info->params.buf_size;
+	} else {
+		start = buf_off;
+		end = buf_off + len;
+	}
+
+	vr = kmalloc(sizeof(*vr), GFP_KERNEL);
+	if (!vr) {
+		pr_warn("dmatest: No memory to store verify result\n");
+		return dmatest_verify(NULL, bufs, start, end, counter, pattern,
+				      is_srcbuf);
+	}
+
+	vr->pattern = pattern;
+	vr->is_srcbuf = is_srcbuf;
+
+	error_count = dmatest_verify(vr, bufs, start, end, counter, pattern,
+				     is_srcbuf);
+	if (error_count) {
+		vr->error_count = error_count;
+		thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off,
+				  dst_off, len, (unsigned long)vr);
+		return error_count;
+	}
+
+	kfree(vr);
+	return 0;
+}
+
+static void result_free(struct dmatest_info *info, const char *name)
+{
+	struct dmatest_result *r, *_r;
+
+	mutex_lock(&info->results_lock);
+	list_for_each_entry_safe(r, _r, &info->results, node) {
+		struct dmatest_thread_result *tr, *_tr;
+
+		if (name && strcmp(r->name, name))
+			continue;
+
+		list_for_each_entry_safe(tr, _tr, &r->results, node) {
+			if (tr->type == DMATEST_ET_VERIFY_BUF)
+				kfree(tr->vr);
+			list_del(&tr->node);
+			kfree(tr);
+		}
+
+		kfree(r->name);
+		list_del(&r->node);
+		kfree(r);
+	}
+
+	mutex_unlock(&info->results_lock);
+}
+
+static struct dmatest_result *result_init(struct dmatest_info *info,
+		const char *name)
+{
+	struct dmatest_result *r;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (r) {
+		r->name = kstrdup(name, GFP_KERNEL);
+		INIT_LIST_HEAD(&r->results);
+		mutex_lock(&info->results_lock);
+		list_add_tail(&r->node, &info->results);
+		mutex_unlock(&info->results_lock);
+	}
+	return r;
+}
+
 /*
  * This function repeatedly tests DMA transfers of various lengths and
  * offsets for a given operation type until it is told to exit by
@@ -268,6 +516,8 @@ static int dmatest_func(void *data)
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
 	struct dmatest_thread	*thread = data;
 	struct dmatest_done	done = { .wait = &done_wait };
+	struct dmatest_info	*info;
+	struct dmatest_params	*params;
 	struct dma_chan		*chan;
 	struct dma_device	*dev;
 	const char		*thread_name;
@@ -278,11 +528,12 @@ static int dmatest_func(void *data)
 	dma_cookie_t		cookie;
 	enum dma_status		status;
 	enum dma_ctrl_flags 	flags;
-	u8			pq_coefs[pq_sources + 1];
+	u8			*pq_coefs = NULL;
 	int			ret;
 	int			src_cnt;
 	int			dst_cnt;
 	int			i;
+	struct dmatest_result	*result;
 
 	thread_name = current->comm;
 	set_freezable();
@@ -290,28 +541,39 @@ static int dmatest_func(void *data)
 	ret = -ENOMEM;
 
 	smp_rmb();
+	info = thread->info;
+	params = &info->params;
 	chan = thread->chan;
 	dev = chan->device;
 	if (thread->type == DMA_MEMCPY)
 		src_cnt = dst_cnt = 1;
 	else if (thread->type == DMA_XOR) {
 		/* force odd to ensure dst = src */
-		src_cnt = min_odd(xor_sources | 1, dev->max_xor);
+		src_cnt = min_odd(params->xor_sources | 1, dev->max_xor);
 		dst_cnt = 1;
 	} else if (thread->type == DMA_PQ) {
 		/* force odd to ensure dst = src */
-		src_cnt = min_odd(pq_sources | 1, dma_maxpq(dev, 0));
+		src_cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
 		dst_cnt = 2;
+
+		pq_coefs = kmalloc(params->pq_sources+1, GFP_KERNEL);
+		if (!pq_coefs)
+			goto err_thread_type;
+
 		for (i = 0; i < src_cnt; i++)
 			pq_coefs[i] = 1;
 	} else
+		goto err_thread_type;
+
+	result = result_init(info, thread_name);
+	if (!result)
 		goto err_srcs;
 
 	thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
 	if (!thread->srcs)
 		goto err_srcs;
 	for (i = 0; i < src_cnt; i++) {
-		thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
+		thread->srcs[i] = kmalloc(params->buf_size, GFP_KERNEL);
 		if (!thread->srcs[i])
 			goto err_srcbuf;
 	}
@@ -321,7 +583,7 @@ static int dmatest_func(void *data)
 	if (!thread->dsts)
 		goto err_dsts;
 	for (i = 0; i < dst_cnt; i++) {
-		thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
+		thread->dsts[i] = kmalloc(params->buf_size, GFP_KERNEL);
 		if (!thread->dsts[i])
 			goto err_dstbuf;
 	}
@@ -337,7 +599,7 @@ static int dmatest_func(void *data)
 	      | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
 
 	while (!kthread_should_stop()
-	       && !(iterations && total_tests >= iterations)) {
+	       && !(params->iterations && total_tests >= params->iterations)) {
 		struct dma_async_tx_descriptor *tx = NULL;
 		dma_addr_t dma_srcs[src_cnt];
 		dma_addr_t dma_dsts[dst_cnt];
@@ -353,24 +615,24 @@ static int dmatest_func(void *data)
 		else if (thread->type == DMA_PQ)
 			align = dev->pq_align;
 
-		if (1 << align > test_buf_size) {
+		if (1 << align > params->buf_size) {
 			pr_err("%u-byte buffer too small for %d-byte alignment\n",
-			       test_buf_size, 1 << align);
+			       params->buf_size, 1 << align);
 			break;
 		}
 
-		len = dmatest_random() % test_buf_size + 1;
+		len = dmatest_random() % params->buf_size + 1;
 		len = (len >> align) << align;
 		if (!len)
 			len = 1 << align;
-		src_off = dmatest_random() % (test_buf_size - len + 1);
-		dst_off = dmatest_random() % (test_buf_size - len + 1);
+		src_off = dmatest_random() % (params->buf_size - len + 1);
+		dst_off = dmatest_random() % (params->buf_size - len + 1);
 
 		src_off = (src_off >> align) << align;
 		dst_off = (dst_off >> align) << align;
 
-		dmatest_init_srcs(thread->srcs, src_off, len);
-		dmatest_init_dsts(thread->dsts, dst_off, len);
+		dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size);
+		dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size);
 
 		for (i = 0; i < src_cnt; i++) {
 			u8 *buf = thread->srcs[i] + src_off;
@@ -380,10 +642,10 @@ static int dmatest_func(void *data)
 			ret = dma_mapping_error(dev->dev, dma_srcs[i]);
 			if (ret) {
 				unmap_src(dev->dev, dma_srcs, len, i);
-				pr_warn("%s: #%u: mapping error %d with "
-					"src_off=0x%x len=0x%x\n",
-					thread_name, total_tests - 1, ret,
-					src_off, len);
+				thread_result_add(info, result,
+						  DMATEST_ET_MAP_SRC,
+						  total_tests, src_off, dst_off,
+						  len, ret);
 				failed_tests++;
 				continue;
 			}
@@ -391,16 +653,17 @@ static int dmatest_func(void *data)
 		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
 		for (i = 0; i < dst_cnt; i++) {
 			dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
-						     test_buf_size,
+						     params->buf_size,
 						     DMA_BIDIRECTIONAL);
 			ret = dma_mapping_error(dev->dev, dma_dsts[i]);
 			if (ret) {
 				unmap_src(dev->dev, dma_srcs, len, src_cnt);
-				unmap_dst(dev->dev, dma_dsts, test_buf_size, i);
-				pr_warn("%s: #%u: mapping error %d with "
-					"dst_off=0x%x len=0x%x\n",
-					thread_name, total_tests - 1, ret,
-					dst_off, test_buf_size);
+				unmap_dst(dev->dev, dma_dsts, params->buf_size,
+					  i);
+				thread_result_add(info, result,
+						  DMATEST_ET_MAP_DST,
+						  total_tests, src_off, dst_off,
+						  len, ret);
 				failed_tests++;
 				continue;
 			}
@@ -428,11 +691,11 @@ static int dmatest_func(void *data)
 
 		if (!tx) {
 			unmap_src(dev->dev, dma_srcs, len, src_cnt);
-			unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt);
-			pr_warning("%s: #%u: prep error with src_off=0x%x "
-					"dst_off=0x%x len=0x%x\n",
-					thread_name, total_tests - 1,
-					src_off, dst_off, len);
+			unmap_dst(dev->dev, dma_dsts, params->buf_size,
+				  dst_cnt);
+			thread_result_add(info, result, DMATEST_ET_PREP,
+					  total_tests, src_off, dst_off,
+					  len, 0);
 			msleep(100);
 			failed_tests++;
 			continue;
@@ -444,18 +707,18 @@ static int dmatest_func(void *data)
 		cookie = tx->tx_submit(tx);
 
 		if (dma_submit_error(cookie)) {
-			pr_warning("%s: #%u: submit error %d with src_off=0x%x "
-					"dst_off=0x%x len=0x%x\n",
-					thread_name, total_tests - 1, cookie,
-					src_off, dst_off, len);
+			thread_result_add(info, result, DMATEST_ET_SUBMIT,
+					  total_tests, src_off, dst_off,
+					  len, cookie);
 			msleep(100);
 			failed_tests++;
 			continue;
 		}
 		dma_async_issue_pending(chan);
 
-		wait_event_freezable_timeout(done_wait, done.done,
-					     msecs_to_jiffies(timeout));
+		wait_event_freezable_timeout(done_wait,
+					     done.done || kthread_should_stop(),
+					     msecs_to_jiffies(params->timeout));
 
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 
@@ -468,56 +731,57 @@ static int dmatest_func(void *data)
 			 * free it this time?" dancing.  For now, just
 			 * leave it dangling.
 			 */
-			pr_warning("%s: #%u: test timed out\n",
-				   thread_name, total_tests - 1);
+			thread_result_add(info, result, DMATEST_ET_TIMEOUT,
+					  total_tests, src_off, dst_off,
+					  len, 0);
 			failed_tests++;
 			continue;
 		} else if (status != DMA_SUCCESS) {
-			pr_warning("%s: #%u: got completion callback,"
-				   " but status is \'%s\'\n",
-				   thread_name, total_tests - 1,
-				   status == DMA_ERROR ? "error" : "in progress");
+			enum dmatest_error_type type = (status == DMA_ERROR) ?
+				DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS;
+			thread_result_add(info, result, type,
+					  total_tests, src_off, dst_off,
+					  len, status);
 			failed_tests++;
 			continue;
 		}
 
 		/* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
-		unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt);
+		unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt);
 
 		error_count = 0;
 
 		pr_debug("%s: verifying source buffer...\n", thread_name);
-		error_count += dmatest_verify(thread->srcs, 0, src_off,
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->srcs, -1,
 				0, PATTERN_SRC, true);
-		error_count += dmatest_verify(thread->srcs, src_off,
-				src_off + len, src_off,
-				PATTERN_SRC | PATTERN_COPY, true);
-		error_count += dmatest_verify(thread->srcs, src_off + len,
-				test_buf_size, src_off + len,
-				PATTERN_SRC, true);
-
-		pr_debug("%s: verifying dest buffer...\n",
-				thread->task->comm);
-		error_count += dmatest_verify(thread->dsts, 0, dst_off,
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->srcs, 0,
+				src_off, PATTERN_SRC | PATTERN_COPY, true);
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->srcs, 1,
+				src_off + len, PATTERN_SRC, true);
+
+		pr_debug("%s: verifying dest buffer...\n", thread_name);
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->dsts, -1,
 				0, PATTERN_DST, false);
-		error_count += dmatest_verify(thread->dsts, dst_off,
-				dst_off + len, src_off,
-				PATTERN_SRC | PATTERN_COPY, false);
-		error_count += dmatest_verify(thread->dsts, dst_off + len,
-				test_buf_size, dst_off + len,
-				PATTERN_DST, false);
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->dsts, 0,
+				src_off, PATTERN_SRC | PATTERN_COPY, false);
+		error_count += verify_result_add(info, result, total_tests,
+				src_off, dst_off, len, thread->dsts, 1,
+				dst_off + len, PATTERN_DST, false);
 
 		if (error_count) {
-			pr_warning("%s: #%u: %u errors with "
-				"src_off=0x%x dst_off=0x%x len=0x%x\n",
-				thread_name, total_tests - 1, error_count,
-				src_off, dst_off, len);
+			thread_result_add(info, result, DMATEST_ET_VERIFY,
+					  total_tests, src_off, dst_off,
+					  len, error_count);
 			failed_tests++;
 		} else {
-			pr_debug("%s: #%u: No errors with "
-				"src_off=0x%x dst_off=0x%x len=0x%x\n",
-				thread_name, total_tests - 1,
-				src_off, dst_off, len);
+			thread_result_add(info, result, DMATEST_ET_OK,
+					  total_tests, src_off, dst_off,
+					  len, 0);
 		}
 	}
 
@@ -532,6 +796,8 @@ err_dsts:
 err_srcbuf:
 	kfree(thread->srcs);
 err_srcs:
+	kfree(pq_coefs);
+err_thread_type:
 	pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
 			thread_name, total_tests, failed_tests, ret);
 
@@ -539,7 +805,9 @@ err_srcs:
 	if (ret)
 		dmaengine_terminate_all(chan);
 
-	if (iterations > 0)
+	thread->done = true;
+
+	if (params->iterations > 0)
 		while (!kthread_should_stop()) {
 			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
 			interruptible_sleep_on(&wait_dmatest_exit);
@@ -568,8 +836,10 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
 	kfree(dtc);
 }
 
-static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type)
+static int dmatest_add_threads(struct dmatest_info *info,
+		struct dmatest_chan *dtc, enum dma_transaction_type type)
 {
+	struct dmatest_params *params = &info->params;
 	struct dmatest_thread *thread;
 	struct dma_chan *chan = dtc->chan;
 	char *op;
@@ -584,7 +854,7 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
 	else
 		return -EINVAL;
 
-	for (i = 0; i < threads_per_chan; i++) {
+	for (i = 0; i < params->threads_per_chan; i++) {
 		thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
 		if (!thread) {
 			pr_warning("dmatest: No memory for %s-%s%u\n",
@@ -592,6 +862,7 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
 
 			break;
 		}
+		thread->info = info;
 		thread->chan = dtc->chan;
 		thread->type = type;
 		smp_wmb();
@@ -612,7 +883,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
 	return i;
 }
 
-static int dmatest_add_channel(struct dma_chan *chan)
+static int dmatest_add_channel(struct dmatest_info *info,
+		struct dma_chan *chan)
 {
 	struct dmatest_chan	*dtc;
 	struct dma_device	*dma_dev = chan->device;
@@ -629,75 +901,418 @@ static int dmatest_add_channel(struct dma_chan *chan)
 	INIT_LIST_HEAD(&dtc->threads);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
-		cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
+		cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY);
 		thread_count += cnt > 0 ? cnt : 0;
 	}
 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		cnt = dmatest_add_threads(dtc, DMA_XOR);
+		cnt = dmatest_add_threads(info, dtc, DMA_XOR);
 		thread_count += cnt > 0 ? cnt : 0;
 	}
 	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
-		cnt = dmatest_add_threads(dtc, DMA_PQ);
+		cnt = dmatest_add_threads(info, dtc, DMA_PQ);
 		thread_count += cnt > 0 ? cnt : 0;
 	}
 
 	pr_info("dmatest: Started %u threads using %s\n",
 		thread_count, dma_chan_name(chan));
 
-	list_add_tail(&dtc->node, &dmatest_channels);
-	nr_channels++;
+	list_add_tail(&dtc->node, &info->channels);
+	info->nr_channels++;
 
 	return 0;
 }
 
 static bool filter(struct dma_chan *chan, void *param)
 {
-	if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device))
+	struct dmatest_params *params = param;
+
+	if (!dmatest_match_channel(params, chan) ||
+	    !dmatest_match_device(params, chan->device))
 		return false;
 	else
 		return true;
 }
 
-static int __init dmatest_init(void)
+static int __run_threaded_test(struct dmatest_info *info)
 {
 	dma_cap_mask_t mask;
 	struct dma_chan *chan;
+	struct dmatest_params *params = &info->params;
 	int err = 0;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_MEMCPY, mask);
 	for (;;) {
-		chan = dma_request_channel(mask, filter, NULL);
+		chan = dma_request_channel(mask, filter, params);
 		if (chan) {
-			err = dmatest_add_channel(chan);
+			err = dmatest_add_channel(info, chan);
 			if (err) {
 				dma_release_channel(chan);
 				break; /* add_channel failed, punt */
 			}
 		} else
 			break; /* no more channels available */
-		if (max_channels && nr_channels >= max_channels)
+		if (params->max_channels &&
+		    info->nr_channels >= params->max_channels)
 			break; /* we have all we need */
 	}
-
 	return err;
 }
-/* when compiled-in wait for drivers to load first */
-late_initcall(dmatest_init);
 
-static void __exit dmatest_exit(void)
+#ifndef MODULE
+static int run_threaded_test(struct dmatest_info *info)
+{
+	int ret;
+
+	mutex_lock(&info->lock);
+	ret = __run_threaded_test(info);
+	mutex_unlock(&info->lock);
+	return ret;
+}
+#endif
+
+static void __stop_threaded_test(struct dmatest_info *info)
 {
 	struct dmatest_chan *dtc, *_dtc;
 	struct dma_chan *chan;
 
-	list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+	list_for_each_entry_safe(dtc, _dtc, &info->channels, node) {
 		list_del(&dtc->node);
 		chan = dtc->chan;
 		dmatest_cleanup_channel(dtc);
-		pr_debug("dmatest: dropped channel %s\n",
-			 dma_chan_name(chan));
+		pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan));
 		dma_release_channel(chan);
 	}
+
+	info->nr_channels = 0;
+}
+
+static void stop_threaded_test(struct dmatest_info *info)
+{
+	mutex_lock(&info->lock);
+	__stop_threaded_test(info);
+	mutex_unlock(&info->lock);
+}
+
+static int __restart_threaded_test(struct dmatest_info *info, bool run)
+{
+	struct dmatest_params *params = &info->params;
+	int ret;
+
+	/* Stop any running test first */
+	__stop_threaded_test(info);
+
+	if (run == false)
+		return 0;
+
+	/* Clear results from previous run */
+	result_free(info, NULL);
+
+	/* Copy test parameters */
+	memcpy(params, &info->dbgfs_params, sizeof(*params));
+
+	/* Run test with new parameters */
+	ret = __run_threaded_test(info);
+	if (ret) {
+		__stop_threaded_test(info);
+		pr_err("dmatest: Can't run test\n");
+	}
+
+	return ret;
+}
+
+static ssize_t dtf_write_string(void *to, size_t available, loff_t *ppos,
+		const void __user *from, size_t count)
+{
+	char tmp[20];
+	ssize_t len;
+
+	len = simple_write_to_buffer(tmp, sizeof(tmp) - 1, ppos, from, count);
+	if (len >= 0) {
+		tmp[len] = '\0';
+		strlcpy(to, strim(tmp), available);
+	}
+
+	return len;
+}
+
+static ssize_t dtf_read_channel(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos,
+			info->dbgfs_params.channel,
+			strlen(info->dbgfs_params.channel));
+}
+
+static ssize_t dtf_write_channel(struct file *file, const char __user *buf,
+		size_t size, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	return dtf_write_string(info->dbgfs_params.channel,
+				sizeof(info->dbgfs_params.channel),
+				ppos, buf, size);
+}
+
+static const struct file_operations dtf_channel_fops = {
+	.read	= dtf_read_channel,
+	.write	= dtf_write_channel,
+	.open	= simple_open,
+	.llseek	= default_llseek,
+};
+
+static ssize_t dtf_read_device(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	return simple_read_from_buffer(buf, count, ppos,
+			info->dbgfs_params.device,
+			strlen(info->dbgfs_params.device));
+}
+
+static ssize_t dtf_write_device(struct file *file, const char __user *buf,
+		size_t size, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	return dtf_write_string(info->dbgfs_params.device,
+				sizeof(info->dbgfs_params.device),
+				ppos, buf, size);
+}
+
+static const struct file_operations dtf_device_fops = {
+	.read	= dtf_read_device,
+	.write	= dtf_write_device,
+	.open	= simple_open,
+	.llseek	= default_llseek,
+};
+
+static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
+		size_t count, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	char buf[3];
+	struct dmatest_chan *dtc;
+	bool alive = false;
+
+	mutex_lock(&info->lock);
+	list_for_each_entry(dtc, &info->channels, node) {
+		struct dmatest_thread *thread;
+
+		list_for_each_entry(thread, &dtc->threads, node) {
+			if (!thread->done) {
+				alive = true;
+				break;
+			}
+		}
+	}
+
+	if (alive) {
+		buf[0] = 'Y';
+	} else {
+		__stop_threaded_test(info);
+		buf[0] = 'N';
+	}
+
+	mutex_unlock(&info->lock);
+	buf[1] = '\n';
+	buf[2] = 0x00;
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t dtf_write_run(struct file *file, const char __user *user_buf,
+		size_t count, loff_t *ppos)
+{
+	struct dmatest_info *info = file->private_data;
+	char buf[16];
+	bool bv;
+	int ret = 0;
+
+	if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1))))
+		return -EFAULT;
+
+	if (strtobool(buf, &bv) == 0) {
+		mutex_lock(&info->lock);
+		ret = __restart_threaded_test(info, bv);
+		mutex_unlock(&info->lock);
+	}
+
+	return ret ? ret : count;
+}
+
+static const struct file_operations dtf_run_fops = {
+	.read	= dtf_read_run,
+	.write	= dtf_write_run,
+	.open	= simple_open,
+	.llseek	= default_llseek,
+};
+
+static int dtf_results_show(struct seq_file *sf, void *data)
+{
+	struct dmatest_info *info = sf->private;
+	struct dmatest_result *result;
+	struct dmatest_thread_result *tr;
+	unsigned int i;
+
+	mutex_lock(&info->results_lock);
+	list_for_each_entry(result, &info->results, node) {
+		list_for_each_entry(tr, &result->results, node) {
+			seq_printf(sf, "%s\n",
+				thread_result_get(result->name, tr));
+			if (tr->type == DMATEST_ET_VERIFY_BUF) {
+				for (i = 0; i < tr->vr->error_count; i++) {
+					seq_printf(sf, "\t%s\n",
+						verify_result_get_one(tr->vr, i));
+				}
+			}
+		}
+	}
+
+	mutex_unlock(&info->results_lock);
+	return 0;
+}
+
+static int dtf_results_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dtf_results_show, inode->i_private);
+}
+
+static const struct file_operations dtf_results_fops = {
+	.open		= dtf_results_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dmatest_register_dbgfs(struct dmatest_info *info)
+{
+	struct dentry *d;
+	struct dmatest_params *params = &info->dbgfs_params;
+	int ret = -ENOMEM;
+
+	d = debugfs_create_dir("dmatest", NULL);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+	if (!d)
+		goto err_root;
+
+	info->root = d;
+
+	/* Copy initial values */
+	memcpy(params, &info->params, sizeof(*params));
+
+	/* Test parameters */
+
+	d = debugfs_create_u32("test_buf_size", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->buf_size);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_file("channel", S_IRUGO | S_IWUSR, info->root,
+				info, &dtf_channel_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_file("device", S_IRUGO | S_IWUSR, info->root,
+				info, &dtf_device_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("threads_per_chan", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->threads_per_chan);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("max_channels", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->max_channels);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("iterations", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->iterations);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("xor_sources", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->xor_sources);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("pq_sources", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->pq_sources);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	d = debugfs_create_u32("timeout", S_IWUSR | S_IRUGO, info->root,
+			       (u32 *)&params->timeout);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	/* Run or stop threaded test */
+	d = debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root,
+				info, &dtf_run_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	/* Results of test in progress */
+	d = debugfs_create_file("results", S_IRUGO, info->root, info,
+				&dtf_results_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_node;
+
+	return 0;
+
+err_node:
+	debugfs_remove_recursive(info->root);
+err_root:
+	pr_err("dmatest: Failed to initialize debugfs\n");
+	return ret;
+}
+
+static int __init dmatest_init(void)
+{
+	struct dmatest_info *info = &test_info;
+	struct dmatest_params *params = &info->params;
+	int ret;
+
+	memset(info, 0, sizeof(*info));
+
+	mutex_init(&info->lock);
+	INIT_LIST_HEAD(&info->channels);
+
+	mutex_init(&info->results_lock);
+	INIT_LIST_HEAD(&info->results);
+
+	/* Set default parameters */
+	params->buf_size = test_buf_size;
+	strlcpy(params->channel, test_channel, sizeof(params->channel));
+	strlcpy(params->device, test_device, sizeof(params->device));
+	params->threads_per_chan = threads_per_chan;
+	params->max_channels = max_channels;
+	params->iterations = iterations;
+	params->xor_sources = xor_sources;
+	params->pq_sources = pq_sources;
+	params->timeout = timeout;
+
+	ret = dmatest_register_dbgfs(info);
+	if (ret)
+		return ret;
+
+#ifdef MODULE
+	return 0;
+#else
+	return run_threaded_test(info);
+#endif
+}
+/* when compiled-in wait for drivers to load first */
+late_initcall(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+	struct dmatest_info *info = &test_info;
+
+	debugfs_remove_recursive(info->root);
+	stop_threaded_test(info);
+	result_free(info, NULL);
 }
 module_exit(dmatest_exit);
 
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 43a5329d44837c4042687d6f93436b3caf0627c1..2e5deaa82b60579d24bd80fc42ac5327ac5e273f 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
 
 #include "dw_dmac_regs.h"
 #include "dmaengine.h"
@@ -49,29 +51,22 @@ static inline unsigned int dwc_get_sms(struct dw_dma_slave *slave)
 	return slave ? slave->src_master : 1;
 }
 
-#define SRC_MASTER	0
-#define DST_MASTER	1
-
-static inline unsigned int dwc_get_master(struct dma_chan *chan, int master)
+static inline void dwc_set_masters(struct dw_dma_chan *dwc)
 {
-	struct dw_dma *dw = to_dw_dma(chan->device);
-	struct dw_dma_slave *dws = chan->private;
-	unsigned int m;
-
-	if (master == SRC_MASTER)
-		m = dwc_get_sms(dws);
-	else
-		m = dwc_get_dms(dws);
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_dma_slave *dws = dwc->chan.private;
+	unsigned char mmax = dw->nr_masters - 1;
 
-	return min_t(unsigned int, dw->nr_masters - 1, m);
+	if (dwc->request_line == ~0) {
+		dwc->src_master = min_t(unsigned char, mmax, dwc_get_sms(dws));
+		dwc->dst_master = min_t(unsigned char, mmax, dwc_get_dms(dws));
+	}
 }
 
 #define DWC_DEFAULT_CTLLO(_chan) ({				\
 		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
 		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
 		bool _is_slave = is_slave_direction(_dwc->direction);	\
-		int _dms = dwc_get_master(_chan, DST_MASTER);		\
-		int _sms = dwc_get_master(_chan, SRC_MASTER);		\
 		u8 _smsize = _is_slave ? _sconfig->src_maxburst :	\
 			DW_DMA_MSIZE_16;			\
 		u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :	\
@@ -81,8 +76,8 @@ static inline unsigned int dwc_get_master(struct dma_chan *chan, int master)
 		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
 		 | DWC_CTLL_LLP_D_EN				\
 		 | DWC_CTLL_LLP_S_EN				\
-		 | DWC_CTLL_DMS(_dms)				\
-		 | DWC_CTLL_SMS(_sms));				\
+		 | DWC_CTLL_DMS(_dwc->dst_master)		\
+		 | DWC_CTLL_SMS(_dwc->src_master));		\
 	})
 
 /*
@@ -92,13 +87,6 @@ static inline unsigned int dwc_get_master(struct dma_chan *chan, int master)
  */
 #define NR_DESCS_PER_CHANNEL	64
 
-static inline unsigned int dwc_get_data_width(struct dma_chan *chan, int master)
-{
-	struct dw_dma *dw = to_dw_dma(chan->device);
-
-	return dw->data_width[dwc_get_master(chan, master)];
-}
-
 /*----------------------------------------------------------------------*/
 
 static struct device *chan2dev(struct dma_chan *chan)
@@ -172,13 +160,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 	if (dwc->initialized == true)
 		return;
 
-	if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) {
-		/* autoconfigure based on request line from DT */
-		if (dwc->direction == DMA_MEM_TO_DEV)
-			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
-		else if (dwc->direction == DMA_DEV_TO_MEM)
-			cfghi = DWC_CFGH_SRC_PER(dwc->request_line);
-	} else if (dws) {
+	if (dws) {
 		/*
 		 * We need controller-specific data to set up slave
 		 * transfers.
@@ -189,9 +171,9 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
 	} else {
 		if (dwc->direction == DMA_MEM_TO_DEV)
-			cfghi = DWC_CFGH_DST_PER(dwc->dma_sconfig.slave_id);
+			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
 		else if (dwc->direction == DMA_DEV_TO_MEM)
-			cfghi = DWC_CFGH_SRC_PER(dwc->dma_sconfig.slave_id);
+			cfghi = DWC_CFGH_SRC_PER(dwc->request_line);
 	}
 
 	channel_writel(dwc, CFG_LO, cfglo);
@@ -473,16 +455,16 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 			(unsigned long long)llp);
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
-		/* initial residue value */
+		/* Initial residue value */
 		dwc->residue = desc->total_len;
 
-		/* check first descriptors addr */
+		/* Check first descriptors addr */
 		if (desc->txd.phys == llp) {
 			spin_unlock_irqrestore(&dwc->lock, flags);
 			return;
 		}
 
-		/* check first descriptors llp */
+		/* Check first descriptors llp */
 		if (desc->lli.llp == llp) {
 			/* This one is currently in progress */
 			dwc->residue -= dwc_get_sent(dwc);
@@ -588,7 +570,7 @@ inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan)
 }
 EXPORT_SYMBOL(dw_dma_get_dst_addr);
 
-/* called with dwc->lock held and all DMAC interrupts disabled */
+/* Called with dwc->lock held and all DMAC interrupts disabled */
 static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 		u32 status_err, u32 status_xfer)
 {
@@ -626,7 +608,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 
 		dwc_chan_disable(dw, dwc);
 
-		/* make sure DMA does not restart by loading a new list */
+		/* Make sure DMA does not restart by loading a new list */
 		channel_writel(dwc, LLP, 0);
 		channel_writel(dwc, CTL_LO, 0);
 		channel_writel(dwc, CTL_HI, 0);
@@ -745,6 +727,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
 	struct dw_desc		*desc;
 	struct dw_desc		*first;
 	struct dw_desc		*prev;
@@ -767,8 +750,8 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 	dwc->direction = DMA_MEM_TO_MEM;
 
-	data_width = min_t(unsigned int, dwc_get_data_width(chan, SRC_MASTER),
-			   dwc_get_data_width(chan, DST_MASTER));
+	data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
+			   dw->data_width[dwc->dst_master]);
 
 	src_width = dst_width = min_t(unsigned int, data_width,
 				      dwc_fast_fls(src | dest | len));
@@ -826,6 +809,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned long flags, void *context)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
 	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
@@ -859,7 +843,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
 			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
 
-		data_width = dwc_get_data_width(chan, SRC_MASTER);
+		data_width = dw->data_width[dwc->src_master];
 
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
@@ -919,7 +903,7 @@ slave_sg_todev_fill_desc:
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
 			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
-		data_width = dwc_get_data_width(chan, DST_MASTER);
+		data_width = dw->data_width[dwc->dst_master];
 
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
@@ -1001,13 +985,6 @@ static inline void convert_burst(u32 *maxburst)
 		*maxburst = 0;
 }
 
-static inline void convert_slave_id(struct dw_dma_chan *dwc)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-
-	dwc->dma_sconfig.slave_id -= dw->request_line_base;
-}
-
 static int
 set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 {
@@ -1020,9 +997,12 @@ set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
 	dwc->direction = sconfig->direction;
 
+	/* Take the request line from slave_id member */
+	if (dwc->request_line == ~0)
+		dwc->request_line = sconfig->slave_id;
+
 	convert_burst(&dwc->dma_sconfig.src_maxburst);
 	convert_burst(&dwc->dma_sconfig.dst_maxburst);
-	convert_slave_id(dwc);
 
 	return 0;
 }
@@ -1030,10 +1010,11 @@ set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 static inline void dwc_chan_pause(struct dw_dma_chan *dwc)
 {
 	u32 cfglo = channel_readl(dwc, CFG_LO);
+	unsigned int count = 20;	/* timeout iterations */
 
 	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
-	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY))
-		cpu_relax();
+	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+		udelay(2);
 
 	dwc->paused = true;
 }
@@ -1169,6 +1150,8 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 	 * doesn't mean what you think it means), and status writeback.
 	 */
 
+	dwc_set_masters(dwc);
+
 	spin_lock_irqsave(&dwc->lock, flags);
 	i = dwc->descs_allocated;
 	while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
@@ -1226,6 +1209,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	list_splice_init(&dwc->free_list, &list);
 	dwc->descs_allocated = 0;
 	dwc->initialized = false;
+	dwc->request_line = ~0;
 
 	/* Disable interrupts */
 	channel_clear_bit(dw, MASK.XFER, dwc->mask);
@@ -1241,42 +1225,36 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-struct dw_dma_filter_args {
+/*----------------------------------------------------------------------*/
+
+struct dw_dma_of_filter_args {
 	struct dw_dma *dw;
 	unsigned int req;
 	unsigned int src;
 	unsigned int dst;
 };
 
-static bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
+static bool dw_dma_of_filter(struct dma_chan *chan, void *param)
 {
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
-	struct dw_dma *dw = to_dw_dma(chan->device);
-	struct dw_dma_filter_args *fargs = param;
-	struct dw_dma_slave *dws = &dwc->slave;
+	struct dw_dma_of_filter_args *fargs = param;
 
-	/* ensure the device matches our channel */
+	/* Ensure the device matches our channel */
         if (chan->device != &fargs->dw->dma)
                 return false;
 
-	dws->dma_dev	= dw->dma.dev;
-	dws->cfg_hi	= ~0;
-	dws->cfg_lo	= ~0;
-	dws->src_master	= fargs->src;
-	dws->dst_master	= fargs->dst;
-
 	dwc->request_line = fargs->req;
-
-	chan->private = dws;
+	dwc->src_master	= fargs->src;
+	dwc->dst_master	= fargs->dst;
 
 	return true;
 }
 
-static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
-					 struct of_dma *ofdma)
+static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
 {
 	struct dw_dma *dw = ofdma->of_dma_data;
-	struct dw_dma_filter_args fargs = {
+	struct dw_dma_of_filter_args fargs = {
 		.dw = dw,
 	};
 	dma_cap_mask_t cap;
@@ -1297,8 +1275,48 @@ static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
 	dma_cap_set(DMA_SLAVE, cap);
 
 	/* TODO: there should be a simpler way to do this */
-	return dma_request_channel(cap, dw_dma_generic_filter, &fargs);
+	return dma_request_channel(cap, dw_dma_of_filter, &fargs);
+}
+
+#ifdef CONFIG_ACPI
+static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	struct acpi_dma_spec *dma_spec = param;
+
+	if (chan->device->dev != dma_spec->dev ||
+	    chan->chan_id != dma_spec->chan_id)
+		return false;
+
+	dwc->request_line = dma_spec->slave_id;
+	dwc->src_master = dwc_get_sms(NULL);
+	dwc->dst_master = dwc_get_dms(NULL);
+
+	return true;
+}
+
+static void dw_dma_acpi_controller_register(struct dw_dma *dw)
+{
+	struct device *dev = dw->dma.dev;
+	struct acpi_dma_filter_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return;
+
+	dma_cap_zero(info->dma_cap);
+	dma_cap_set(DMA_SLAVE, info->dma_cap);
+	info->filter_fn = dw_dma_acpi_filter;
+
+	ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate,
+						info);
+	if (ret)
+		dev_err(dev, "could not register acpi_dma_controller\n");
 }
+#else /* !CONFIG_ACPI */
+static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {}
+#endif /* !CONFIG_ACPI */
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
 
@@ -1322,7 +1340,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	/* assert channel is idle */
+	/* Assert channel is idle */
 	if (dma_readl(dw, CH_EN) & dwc->mask) {
 		dev_err(chan2dev(&dwc->chan),
 			"BUG: Attempted to start non-idle channel\n");
@@ -1334,7 +1352,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
-	/* setup DMAC channel registers */
+	/* Setup DMAC channel registers */
 	channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys);
 	channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
 	channel_writel(dwc, CTL_HI, 0);
@@ -1501,7 +1519,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		last = desc;
 	}
 
-	/* lets make a cyclic list */
+	/* Let's make a cyclic list */
 	last->lli.llp = cdesc->desc[0]->txd.phys;
 
 	dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%llx len %zu "
@@ -1636,7 +1654,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 
 static int dw_probe(struct platform_device *pdev)
 {
-	const struct platform_device_id *match;
 	struct dw_dma_platform_data *pdata;
 	struct resource		*io;
 	struct dw_dma		*dw;
@@ -1706,7 +1723,7 @@ static int dw_probe(struct platform_device *pdev)
 
 	dw->regs = regs;
 
-	/* get hardware configuration parameters */
+	/* Get hardware configuration parameters */
 	if (autocfg) {
 		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
 
@@ -1720,18 +1737,13 @@ static int dw_probe(struct platform_device *pdev)
 		memcpy(dw->data_width, pdata->data_width, 4);
 	}
 
-	/* Get the base request line if set */
-	match = platform_get_device_id(pdev);
-	if (match)
-		dw->request_line_base = (unsigned int)match->driver_data;
-
 	/* Calculate all channel mask before DMA setup */
 	dw->all_chan_mask = (1 << nr_channels) - 1;
 
-	/* force dma off, just in case */
+	/* Force dma off, just in case */
 	dw_dma_off(dw);
 
-	/* disable BLOCK interrupts as well */
+	/* Disable BLOCK interrupts as well */
 	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 
 	err = devm_request_irq(&pdev->dev, irq, dw_dma_interrupt, 0,
@@ -1741,7 +1753,7 @@ static int dw_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dw);
 
-	/* create a pool of consistent memory blocks for hardware descriptors */
+	/* Create a pool of consistent memory blocks for hardware descriptors */
 	dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", &pdev->dev,
 					 sizeof(struct dw_desc), 4, 0);
 	if (!dw->desc_pool) {
@@ -1781,8 +1793,9 @@ static int dw_probe(struct platform_device *pdev)
 		channel_clear_bit(dw, CH_EN, dwc->mask);
 
 		dwc->direction = DMA_TRANS_NONE;
+		dwc->request_line = ~0;
 
-		/* hardware configuration */
+		/* Hardware configuration */
 		if (autocfg) {
 			unsigned int dwc_params;
 
@@ -1842,12 +1855,15 @@ static int dw_probe(struct platform_device *pdev)
 
 	if (pdev->dev.of_node) {
 		err = of_dma_controller_register(pdev->dev.of_node,
-						 dw_dma_xlate, dw);
-		if (err && err != -ENODEV)
+						 dw_dma_of_xlate, dw);
+		if (err)
 			dev_err(&pdev->dev,
 				"could not register of_dma_controller\n");
 	}
 
+	if (ACPI_HANDLE(&pdev->dev))
+		dw_dma_acpi_controller_register(dw);
+
 	return 0;
 }
 
@@ -1912,18 +1928,19 @@ static const struct dev_pm_ops dw_dev_pm_ops = {
 };
 
 #ifdef CONFIG_OF
-static const struct of_device_id dw_dma_id_table[] = {
+static const struct of_device_id dw_dma_of_id_table[] = {
 	{ .compatible = "snps,dma-spear1340" },
 	{}
 };
-MODULE_DEVICE_TABLE(of, dw_dma_id_table);
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
 #endif
 
-static const struct platform_device_id dw_dma_ids[] = {
-	/* Name,	Request Line Base */
-	{ "INTL9C60",	(kernel_ulong_t)16 },
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+	{ "INTL9C60", 0 },
 	{ }
 };
+#endif
 
 static struct platform_driver dw_driver = {
 	.probe		= dw_probe,
@@ -1932,9 +1949,9 @@ static struct platform_driver dw_driver = {
 	.driver = {
 		.name	= "dw_dmac",
 		.pm	= &dw_dev_pm_ops,
-		.of_match_table = of_match_ptr(dw_dma_id_table),
+		.of_match_table = of_match_ptr(dw_dma_of_id_table),
+		.acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table),
 	},
-	.id_table	= dw_dma_ids,
 };
 
 static int __init dw_init(void)
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 4d02c3669b75bcf023a7f03916d02d3749ec2e1e..9d417200bd57f714b5493a589da7703252458658 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -212,8 +212,11 @@ struct dw_dma_chan {
 	/* hardware configuration */
 	unsigned int		block_size;
 	bool			nollp;
+
+	/* custom slave configuration */
 	unsigned int		request_line;
-	struct dw_dma_slave	slave;
+	unsigned char		src_master;
+	unsigned char		dst_master;
 
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
@@ -247,7 +250,6 @@ struct dw_dma {
 	/* hardware configuration */
 	unsigned char		nr_masters;
 	unsigned char		data_width[4];
-	unsigned int		request_line_base;
 
 	struct dw_dma_chan	chan[0];
 };
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 70b8975d107ee185e9b4b3bcf6e220ff9b4d555e..f28583370d00f7d102b47b39241d1b5d63885a80 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -859,8 +859,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 
 	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
 
-	if (imxdmac->sg_list)
-		kfree(imxdmac->sg_list);
+	kfree(imxdmac->sg_list);
 
 	imxdmac->sg_list = kcalloc(periods + 1,
 			sizeof(struct scatterlist), GFP_KERNEL);
@@ -1145,7 +1144,7 @@ err:
 	return ret;
 }
 
-static int __exit imxdma_remove(struct platform_device *pdev)
+static int imxdma_remove(struct platform_device *pdev)
 {
 	struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
 
@@ -1162,7 +1161,7 @@ static struct platform_driver imxdma_driver = {
 		.name	= "imx-dma",
 	},
 	.id_table	= imx_dma_devtype,
-	.remove		= __exit_p(imxdma_remove),
+	.remove		= imxdma_remove,
 };
 
 static int __init imxdma_module_init(void)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index f082aa3a918c9afa82fa459849c96d63f81242b6..092867bf795c0d939b57ef3a9e4622c833f463a1 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1462,7 +1462,7 @@ err_irq:
 	return ret;
 }
 
-static int __exit sdma_remove(struct platform_device *pdev)
+static int sdma_remove(struct platform_device *pdev)
 {
 	return -EBUSY;
 }
@@ -1473,7 +1473,7 @@ static struct platform_driver sdma_driver = {
 		.of_match_table = sdma_dt_ids,
 	},
 	.id_table	= sdma_devtypes,
-	.remove		= __exit_p(sdma_remove),
+	.remove		= sdma_remove,
 };
 
 static int __init sdma_module_init(void)
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1879a5942bfc73ffb74e498cdcb7229ee0a52aaf..17a2393b3e25048fb495b9049941fe1be8f4cf36 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -892,7 +892,7 @@ MODULE_PARM_DESC(ioat_interrupt_style,
  * ioat_dma_setup_interrupts - setup interrupt handler
  * @device: ioat device
  */
-static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+int ioat_dma_setup_interrupts(struct ioatdma_device *device)
 {
 	struct ioat_chan_common *chan;
 	struct pci_dev *pdev = device->pdev;
@@ -941,6 +941,7 @@ msix:
 		}
 	}
 	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	device->irq_mode = IOAT_MSIX;
 	goto done;
 
 msix_single_vector:
@@ -956,6 +957,7 @@ msix_single_vector:
 		pci_disable_msix(pdev);
 		goto msi;
 	}
+	device->irq_mode = IOAT_MSIX_SINGLE;
 	goto done;
 
 msi:
@@ -969,6 +971,7 @@ msi:
 		pci_disable_msi(pdev);
 		goto intx;
 	}
+	device->irq_mode = IOAT_MSIX;
 	goto done;
 
 intx:
@@ -977,6 +980,7 @@ intx:
 	if (err)
 		goto err_no_irq;
 
+	device->irq_mode = IOAT_INTX;
 done:
 	if (device->intr_quirk)
 		device->intr_quirk(device);
@@ -987,9 +991,11 @@ done:
 err_no_irq:
 	/* Disable all interrupt generation */
 	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	device->irq_mode = IOAT_NOIRQ;
 	dev_err(dev, "no usable interrupts\n");
 	return err;
 }
+EXPORT_SYMBOL(ioat_dma_setup_interrupts);
 
 static void ioat_disable_interrupts(struct ioatdma_device *device)
 {
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 53a4cbb78f4790f24be88100f6661805942671f5..54fb7b9ff9aaa4afb88c823b3a129a22440a9320 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -39,6 +39,7 @@
 #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
 #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
 #define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev)
 
 #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
 
@@ -48,6 +49,14 @@
  */
 #define NULL_DESC_BUFFER_SIZE 1
 
+enum ioat_irq_mode {
+	IOAT_NOIRQ = 0,
+	IOAT_MSIX,
+	IOAT_MSIX_SINGLE,
+	IOAT_MSI,
+	IOAT_INTX
+};
+
 /**
  * struct ioatdma_device - internal representation of a IOAT device
  * @pdev: PCI-Express device
@@ -72,11 +81,16 @@ struct ioatdma_device {
 	void __iomem *reg_base;
 	struct pci_pool *dma_pool;
 	struct pci_pool *completion_pool;
+#define MAX_SED_POOLS	5
+	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
+	struct kmem_cache *sed_pool;
 	struct dma_device common;
 	u8 version;
 	struct msix_entry msix_entries[4];
 	struct ioat_chan_common *idx[4];
 	struct dca_provider *dca;
+	enum ioat_irq_mode irq_mode;
+	u32 cap;
 	void (*intr_quirk)(struct ioatdma_device *device);
 	int (*enumerate_channels)(struct ioatdma_device *device);
 	int (*reset_hw)(struct ioat_chan_common *chan);
@@ -131,6 +145,20 @@ struct ioat_dma_chan {
 	u16 active;
 };
 
+/**
+ * struct ioat_sed_ent - wrapper around super extended hardware descriptor
+ * @hw: hardware SED
+ * @sed_dma: dma address for the SED
+ * @list: list member
+ * @parent: point to the dma descriptor that's the parent
+ */
+struct ioat_sed_ent {
+	struct ioat_sed_raw_descriptor *hw;
+	dma_addr_t dma;
+	struct ioat_ring_ent *parent;
+	unsigned int hw_pool;
+};
+
 static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
 {
 	return container_of(c, struct ioat_chan_common, common);
@@ -179,7 +207,7 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
 	struct device *dev = to_dev(chan);
 
 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
-		" ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+		" ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id,
 		(unsigned long long) tx->phys,
 		(unsigned long long) hw->next, tx->cookie, tx->flags,
 		hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
@@ -201,7 +229,7 @@ ioat_chan_by_index(struct ioatdma_device *device, int index)
 	return device->idx[index];
 }
 
-static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+static inline u64 ioat_chansts_32(struct ioat_chan_common *chan)
 {
 	u8 ver = chan->device->version;
 	u64 status;
@@ -218,6 +246,26 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan)
 	return status;
 }
 
+#if BITS_PER_LONG == 64
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+	u8 ver = chan->device->version;
+	u64 status;
+
+	 /* With IOAT v3.3 the status register is 64bit.  */
+	if (ver >= IOAT_VER_3_3)
+		status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver));
+	else
+		status = ioat_chansts_32(chan);
+
+	return status;
+}
+
+#else
+#define ioat_chansts ioat_chansts_32
+#endif
+
 static inline void ioat_start(struct ioat_chan_common *chan)
 {
 	u8 ver = chan->device->version;
@@ -321,6 +369,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
 			   dma_addr_t *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
 void ioat_kobject_del(struct ioatdma_device *device);
+int ioat_dma_setup_interrupts(struct ioatdma_device *device);
 extern const struct sysfs_ops ioat_sysfs_ops;
 extern struct ioat_sysfs_entry ioat_version_attr;
 extern struct ioat_sysfs_entry ioat_cap_attr;
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index e100f644e3446449d09cc09dbc639d096d75d305..29bf9448035d321305e9c8791265e6c744c5a677 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -137,6 +137,7 @@ struct ioat_ring_ent {
 	#ifdef DEBUG
 	int id;
 	#endif
+	struct ioat_sed_ent *sed;
 };
 
 static inline struct ioat_ring_ent *
@@ -157,6 +158,7 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
 
 int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
 int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+void ioat3_dma_remove(struct ioatdma_device *dev);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index e8336cce360b8f03a504890dc5ee426a2179f3ff..ca6ea9b3551b3f0307b1440a55dcd92a64e2f466 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -55,7 +55,7 @@
 /*
  * Support routines for v3+ hardware
  */
-
+#include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
 #include <linux/dmaengine.h>
@@ -70,6 +70,10 @@
 /* ioat hardware assumes at least two sources for raid operations */
 #define src_cnt_to_sw(x) ((x) + 2)
 #define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
 
 /* provide a lookup table for setting the source address in the base or
  * extended descriptor of an xor or pq descriptor
@@ -77,7 +81,20 @@
 static const u8 xor_idx_to_desc = 0xe0;
 static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
 static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+				       2, 2, 2, 2, 2, 2, 2 };
 static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+					0, 1, 2, 3, 4, 5, 6 };
+
+/*
+ * technically sources 1 and 2 do not require SED, but the op will have
+ * at least 9 descriptors so that's irrelevant.
+ */
+static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
+				      1, 1, 1, 1, 1, 1, 1 };
+
+static void ioat3_eh(struct ioat2_dma_chan *ioat);
 
 static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
 {
@@ -101,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
 	return raw->field[pq_idx_to_field[idx]];
 }
 
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	return raw->field[pq16_idx_to_field[idx]];
+}
+
 static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 		       dma_addr_t addr, u32 offset, u8 coef, int idx)
 {
@@ -111,6 +135,167 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
 	pq->coef[idx] = coef;
 }
 
+static int sed_get_pq16_pool_idx(int src_cnt)
+{
+
+	return pq16_idx_to_sed[src_cnt];
+}
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_ivb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_hsw_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_xeon_cb32(struct pci_dev *pdev)
+{
+	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
+		is_hsw_ioat(pdev);
+}
+
+static bool is_bwd_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_bwd_noraid(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
+			dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
+	struct ioat_pq16a_descriptor *pq16 =
+		(struct ioat_pq16a_descriptor *)desc[1];
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	raw->field[pq16_idx_to_field[idx]] = addr + offset;
+
+	if (idx < 8)
+		pq->coef[idx] = coef;
+	else
+		pq16->coef[idx - 8] = coef;
+}
+
+static struct ioat_sed_ent *
+ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
+{
+	struct ioat_sed_ent *sed;
+	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
+
+	sed = kmem_cache_alloc(device->sed_pool, flags);
+	if (!sed)
+		return NULL;
+
+	sed->hw_pool = hw_pool;
+	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
+				 flags, &sed->dma);
+	if (!sed->hw) {
+		kmem_cache_free(device->sed_pool, sed);
+		return NULL;
+	}
+
+	return sed;
+}
+
+static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed)
+{
+	if (!sed)
+		return;
+
+	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
+	kmem_cache_free(device->sed_pool, sed);
+}
+
 static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
 			    struct ioat_ring_ent *desc, int idx)
 {
@@ -223,6 +408,54 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
 		}
 		break;
 	}
+	case IOAT_OP_PQ_16S:
+	case IOAT_OP_PQ_VAL_16S: {
+		struct ioat_pq_descriptor *pq = desc->pq;
+		int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+		struct ioat_raw_descriptor *descs[4];
+		int i;
+
+		/* in the 'continue' case don't unmap the dests as sources */
+		if (dmaf_p_disabled_continue(flags))
+			src_cnt--;
+		else if (dmaf_continue(flags))
+			src_cnt -= 3;
+
+		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+			descs[0] = (struct ioat_raw_descriptor *)pq;
+			descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
+			descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
+			for (i = 0; i < src_cnt; i++) {
+				dma_addr_t src = pq16_get_src(descs, i);
+
+				ioat_unmap(pdev, src - offset, len,
+					   PCI_DMA_TODEVICE, flags, 0);
+			}
+
+			/* the dests are sources in pq validate operations */
+			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+				if (!(flags & DMA_PREP_PQ_DISABLE_P))
+					ioat_unmap(pdev, pq->p_addr - offset,
+						   len, PCI_DMA_TODEVICE,
+						   flags, 0);
+				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+					ioat_unmap(pdev, pq->q_addr - offset,
+						   len, PCI_DMA_TODEVICE,
+						   flags, 0);
+				break;
+			}
+		}
+
+		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+			if (!(flags & DMA_PREP_PQ_DISABLE_P))
+				ioat_unmap(pdev, pq->p_addr - offset, len,
+					   PCI_DMA_BIDIRECTIONAL, flags, 1);
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				ioat_unmap(pdev, pq->q_addr - offset, len,
+					   PCI_DMA_BIDIRECTIONAL, flags, 1);
+		}
+		break;
+	}
 	default:
 		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
 			__func__, desc->hw->ctl_f.op);
@@ -250,6 +483,63 @@ static bool desc_has_ext(struct ioat_ring_ent *desc)
 	return false;
 }
 
+static u64 ioat3_get_current_completion(struct ioat_chan_common *chan)
+{
+	u64 phys_complete;
+	u64 completion;
+
+	completion = *chan->completion;
+	phys_complete = ioat_chansts_to_addr(completion);
+
+	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+		(unsigned long long) phys_complete);
+
+	return phys_complete;
+}
+
+static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan,
+				   u64 *phys_complete)
+{
+	*phys_complete = ioat3_get_current_completion(chan);
+	if (*phys_complete == chan->last_completion)
+		return false;
+
+	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	return true;
+}
+
+static void
+desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc)
+{
+	struct ioat_dma_descriptor *hw = desc->hw;
+
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+	{
+		struct ioat_pq_descriptor *pq = desc->pq;
+
+		/* check if there's error written */
+		if (!pq->dwbes_f.wbes)
+			return;
+
+		/* need to set a chanerr var for checking to clear later */
+
+		if (pq->dwbes_f.p_val_err)
+			*desc->result |= SUM_CHECK_P_RESULT;
+
+		if (pq->dwbes_f.q_val_err)
+			*desc->result |= SUM_CHECK_Q_RESULT;
+
+		return;
+	}
+	default:
+		return;
+	}
+}
+
 /**
  * __cleanup - reclaim used descriptors
  * @ioat: channel (ring) to clean
@@ -260,6 +550,7 @@ static bool desc_has_ext(struct ioat_ring_ent *desc)
 static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 {
 	struct ioat_chan_common *chan = &ioat->base;
+	struct ioatdma_device *device = chan->device;
 	struct ioat_ring_ent *desc;
 	bool seen_current = false;
 	int idx = ioat->tail, i;
@@ -268,6 +559,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
 		__func__, ioat->head, ioat->tail, ioat->issued);
 
+	/*
+	 * At restart of the channel, the completion address and the
+	 * channel status will be 0 due to starting a new chain. Since
+	 * it's new chain and the first descriptor "fails", there is
+	 * nothing to clean up. We do not want to reap the entire submitted
+	 * chain due to this 0 address value and then BUG.
+	 */
+	if (!phys_complete)
+		return;
+
 	active = ioat2_ring_active(ioat);
 	for (i = 0; i < active && !seen_current; i++) {
 		struct dma_async_tx_descriptor *tx;
@@ -276,6 +577,11 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
 		desc = ioat2_get_ring_ent(ioat, idx + i);
 		dump_desc_dbg(ioat, desc);
+
+		/* set err stat if we are using dwbes */
+		if (device->cap & IOAT_CAP_DWBES)
+			desc_get_errstat(ioat, desc);
+
 		tx = &desc->txd;
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
@@ -294,6 +600,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 			BUG_ON(i + 1 >= active);
 			i++;
 		}
+
+		/* cleanup super extended descriptors */
+		if (desc->sed) {
+			ioat3_free_sed(device, desc->sed);
+			desc->sed = NULL;
+		}
 	}
 	smp_mb(); /* finish all descriptor reads before incrementing tail */
 	ioat->tail = idx + i;
@@ -314,11 +626,22 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
 static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
+	u64 phys_complete;
 
 	spin_lock_bh(&chan->cleanup_lock);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
+
+	if (ioat3_cleanup_preamble(chan, &phys_complete))
 		__cleanup(ioat, phys_complete);
+
+	if (is_ioat_halted(*chan->completion)) {
+		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
+			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+			ioat3_eh(ioat);
+		}
+	}
+
 	spin_unlock_bh(&chan->cleanup_lock);
 }
 
@@ -333,15 +656,78 @@ static void ioat3_cleanup_event(unsigned long data)
 static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
+	u64 phys_complete;
 
 	ioat2_quiesce(chan, 0);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
+	if (ioat3_cleanup_preamble(chan, &phys_complete))
 		__cleanup(ioat, phys_complete);
 
 	__ioat2_restart_chan(ioat);
 }
 
+static void ioat3_eh(struct ioat2_dma_chan *ioat)
+{
+	struct ioat_chan_common *chan = &ioat->base;
+	struct pci_dev *pdev = to_pdev(chan);
+	struct ioat_dma_descriptor *hw;
+	u64 phys_complete;
+	struct ioat_ring_ent *desc;
+	u32 err_handled = 0;
+	u32 chanerr_int;
+	u32 chanerr;
+
+	/* cleanup so tail points to descriptor that caused the error */
+	if (ioat3_cleanup_preamble(chan, &phys_complete))
+		__cleanup(ioat, phys_complete);
+
+	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
+
+	dev_dbg(to_dev(chan), "%s: error = %x:%x\n",
+		__func__, chanerr, chanerr_int);
+
+	desc = ioat2_get_ring_ent(ioat, ioat->tail);
+	hw = desc->hw;
+	dump_desc_dbg(ioat, desc);
+
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_XOR_VAL:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		break;
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
+			*desc->result |= SUM_CHECK_Q_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
+		}
+		break;
+	}
+
+	/* fault on unhandled error or spurious halt */
+	if (chanerr ^ err_handled || chanerr == 0) {
+		dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n",
+			__func__, chanerr, err_handled);
+		BUG();
+	}
+
+	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
+	/* mark faulting descriptor as complete */
+	*chan->completion = desc->txd.phys;
+
+	spin_lock_bh(&ioat->prep_lock);
+	ioat3_restart_channel(ioat);
+	spin_unlock_bh(&ioat->prep_lock);
+}
+
 static void check_active(struct ioat2_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
@@ -605,7 +991,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct
 	int i;
 
 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
-		" sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
 		desc_id(desc), (unsigned long long) desc->txd.phys,
 		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
 		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
@@ -617,6 +1004,42 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct
 			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
 	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
 	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
+}
+
+static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat,
+			       struct ioat_ring_ent *desc)
+{
+	struct device *dev = to_dev(&ioat->base);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_raw_descriptor *descs[] = { (void *)pq,
+						(void *)pq,
+						(void *)pq };
+	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	if (desc->sed) {
+		descs[1] = (void *)desc->sed->hw;
+		descs[2] = (void *)desc->sed->hw + 64;
+	}
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) pq->next,
+		desc->txd.flags, pq->size, pq->ctl,
+		pq->ctl_f.op, pq->ctl_f.int_en,
+		pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++) {
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq16_get_src(descs, i),
+			pq->coef[i]);
+	}
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
 }
 
 static struct dma_async_tx_descriptor *
@@ -627,6 +1050,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 {
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 	struct ioat_chan_common *chan = &ioat->base;
+	struct ioatdma_device *device = chan->device;
 	struct ioat_ring_ent *compl_desc;
 	struct ioat_ring_ent *desc;
 	struct ioat_ring_ent *ext;
@@ -637,6 +1061,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 	u32 offset = 0;
 	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
 	int i, s, idx, with_ext, num_descs;
+	int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0;
 
 	dev_dbg(to_dev(chan), "%s\n", __func__);
 	/* the engine requires at least two sources (we provide
@@ -662,7 +1087,7 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 	 * order.
 	 */
 	if (likely(num_descs) &&
-	    ioat2_check_space_lock(ioat, num_descs+1) == 0)
+	    ioat2_check_space_lock(ioat, num_descs + cb32) == 0)
 		idx = ioat->head;
 	else
 		return NULL;
@@ -700,6 +1125,9 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 		pq->q_addr = dst[1] + offset;
 		pq->ctl = 0;
 		pq->ctl_f.op = op;
+		/* we turn on descriptor write back error status */
+		if (device->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
 		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
 		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
 		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
@@ -716,26 +1144,140 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
 	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
 	dump_pq_desc_dbg(ioat, desc, ext);
 
-	/* completion descriptor carries interrupt bit */
-	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
-	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
-	hw = compl_desc->hw;
-	hw->ctl = 0;
-	hw->ctl_f.null = 1;
-	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-	hw->ctl_f.compl_write = 1;
-	hw->size = NULL_DESC_BUFFER_SIZE;
-	dump_desc_dbg(ioat, compl_desc);
+	if (!cb32) {
+		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		pq->ctl_f.compl_write = 1;
+		compl_desc = desc;
+	} else {
+		/* completion descriptor carries interrupt bit */
+		compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+		hw = compl_desc->hw;
+		hw->ctl = 0;
+		hw->ctl_f.null = 1;
+		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		hw->ctl_f.compl_write = 1;
+		hw->size = NULL_DESC_BUFFER_SIZE;
+		dump_desc_dbg(ioat, compl_desc);
+	}
+
 
 	/* we leave the channel locked to ensure in order submission */
 	return &compl_desc->txd;
 }
 
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
+		       const dma_addr_t *dst, const dma_addr_t *src,
+		       unsigned int src_cnt, const unsigned char *scf,
+		       size_t len, unsigned long flags)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	struct ioat_chan_common *chan = &ioat->base;
+	struct ioatdma_device *device = chan->device;
+	struct ioat_ring_ent *desc;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	u32 offset = 0;
+	u8 op;
+	int i, s, idx, num_descs;
+
+	/* this function only handles src_cnt 9 - 16 */
+	BUG_ON(src_cnt < 9);
+
+	/* this function is only called with 9-16 sources */
+	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
+
+	dev_dbg(to_dev(chan), "%s\n", __func__);
+
+	num_descs = ioat2_xferlen_to_descs(ioat, len);
+
+	/*
+	 * 16 source pq is only available on cb3.3 and has no completion
+	 * write hw bug.
+	 */
+	if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0)
+		idx = ioat->head;
+	else
+		return NULL;
+
+	i = 0;
+
+	do {
+		struct ioat_raw_descriptor *descs[4];
+		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+		desc = ioat2_get_ring_ent(ioat, idx + i);
+		pq = desc->pq;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+
+		desc->sed = ioat3_alloc_sed(device,
+					    sed_get_pq16_pool_idx(src_cnt));
+		if (!desc->sed) {
+			dev_err(to_dev(chan),
+				"%s: no free sed entries\n", __func__);
+			return NULL;
+		}
+
+		pq->sed_addr = desc->sed->dma;
+		desc->sed->parent = desc;
+
+		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
+		descs[2] = (void *)descs[1] + 64;
+
+		for (s = 0; s < src_cnt; s++)
+			pq16_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq16_set_src(descs, dst[0], offset, 0, s++);
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+			pq16_set_src(descs, dst[1], offset, 0, s++);
+		}
+
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
+		/* we turn on descriptor write back error status */
+		if (device->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while (++i < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* with cb3.3 we should be able to do completion w/o a null desc */
+	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	pq->ctl_f.compl_write = 1;
+
+	dump_pq16_desc_dbg(ioat, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
 static struct dma_async_tx_descriptor *
 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 	      unsigned int src_cnt, const unsigned char *scf, size_t len,
 	      unsigned long flags)
 {
+	struct dma_device *dma = chan->device;
+
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		dst[0] = dst[1];
@@ -755,11 +1297,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 		single_source_coef[0] = scf[0];
 		single_source_coef[1] = 0;
 
-		return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
-					    single_source_coef, len, flags);
-	} else
-		return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
-					    len, flags);
+		return (src_cnt > 8) && (dma->max_pq > 8) ?
+			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
+					       2, single_source_coef, len,
+					       flags) :
+			__ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+					     single_source_coef, len, flags);
+
+	} else {
+		return (src_cnt > 8) && (dma->max_pq > 8) ?
+			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
+					       scf, len, flags) :
+			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
+					     scf, len, flags);
+	}
 }
 
 struct dma_async_tx_descriptor *
@@ -767,6 +1318,8 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 		  unsigned int src_cnt, const unsigned char *scf, size_t len,
 		  enum sum_check_flags *pqres, unsigned long flags)
 {
+	struct dma_device *dma = chan->device;
+
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		pq[0] = pq[1];
@@ -778,14 +1331,18 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
 	 */
 	*pqres = 0;
 
-	return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
-				    flags);
+	return (src_cnt > 8) && (dma->max_pq > 8) ?
+		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				     flags);
 }
 
 static struct dma_async_tx_descriptor *
 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 		 unsigned int src_cnt, size_t len, unsigned long flags)
 {
+	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -794,8 +1351,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = dst; /* specify valid address for disabled result */
 
-	return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
-				    flags);
+	return (src_cnt > 8) && (dma->max_pq > 8) ?
+		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				     flags);
 }
 
 struct dma_async_tx_descriptor *
@@ -803,6 +1363,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 		     unsigned int src_cnt, size_t len,
 		     enum sum_check_flags *result, unsigned long flags)
 {
+	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -816,8 +1377,12 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = pq[0]; /* specify valid address for disabled result */
 
-	return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
-				    len, flags);
+
+	return (src_cnt > 8) && (dma->max_pq > 8) ?
+		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
+				       scf, len, flags) :
+		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
+				     scf, len, flags);
 }
 
 static struct dma_async_tx_descriptor *
@@ -1167,6 +1732,56 @@ static int ioat3_dma_self_test(struct ioatdma_device *device)
 	return 0;
 }
 
+static int ioat3_irq_reinit(struct ioatdma_device *device)
+{
+	int msixcnt = device->common.chancnt;
+	struct pci_dev *pdev = device->pdev;
+	int i;
+	struct msix_entry *msix;
+	struct ioat_chan_common *chan;
+	int err = 0;
+
+	switch (device->irq_mode) {
+	case IOAT_MSIX:
+
+		for (i = 0; i < msixcnt; i++) {
+			msix = &device->msix_entries[i];
+			chan = ioat_chan_by_index(device, i);
+			devm_free_irq(&pdev->dev, msix->vector, chan);
+		}
+
+		pci_disable_msix(pdev);
+		break;
+
+	case IOAT_MSIX_SINGLE:
+		msix = &device->msix_entries[0];
+		chan = ioat_chan_by_index(device, 0);
+		devm_free_irq(&pdev->dev, msix->vector, chan);
+		pci_disable_msix(pdev);
+		break;
+
+	case IOAT_MSI:
+		chan = ioat_chan_by_index(device, 0);
+		devm_free_irq(&pdev->dev, pdev->irq, chan);
+		pci_disable_msi(pdev);
+		break;
+
+	case IOAT_INTX:
+		chan = ioat_chan_by_index(device, 0);
+		devm_free_irq(&pdev->dev, pdev->irq, chan);
+		break;
+
+	default:
+		return 0;
+	}
+
+	device->irq_mode = IOAT_NOIRQ;
+
+	err = ioat_dma_setup_interrupts(device);
+
+	return err;
+}
+
 static int ioat3_reset_hw(struct ioat_chan_common *chan)
 {
 	/* throw away whatever the channel was doing and get it
@@ -1183,80 +1798,65 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan)
 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
 
-	/* clear any pending errors */
-	err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+	if (device->version < IOAT_VER_3_3) {
+		/* clear any pending errors */
+		err = pci_read_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+		if (err) {
+			dev_err(&pdev->dev,
+				"channel error register unreachable\n");
+			return err;
+		}
+		pci_write_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+		 * (workaround for spurious config parity error after restart)
+		 */
+		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+			pci_write_config_dword(pdev,
+					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
+					       0x10);
+		}
+	}
+
+	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
 	if (err) {
-		dev_err(&pdev->dev, "channel error register unreachable\n");
+		dev_err(&pdev->dev, "Failed to reset!\n");
 		return err;
 	}
-	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
 
-	/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
-	 * (workaround for spurious config parity error after restart)
-	 */
-	pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
-	if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
-		pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+	if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+		err = ioat3_irq_reinit(device);
 
-	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+	return err;
 }
 
-static bool is_jf_ioat(struct pci_dev *pdev)
+static void ioat3_intr_quirk(struct ioatdma_device *device)
 {
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
-		return true;
-	default:
-		return false;
-	}
-}
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioat_chan_common *chan;
+	u32 errmask;
 
-static bool is_snb_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
-		return true;
-	default:
-		return false;
-	}
-}
+	dma = &device->common;
 
-static bool is_ivb_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
-		return true;
-	default:
-		return false;
+	/*
+	 * if we have descriptor write back error status, we mask the
+	 * error interrupts
+	 */
+	if (device->cap & IOAT_CAP_DWBES) {
+		list_for_each_entry(c, &dma->channels, device_node) {
+			chan = to_chan_common(c);
+			errmask = readl(chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
+				   IOAT_CHANERR_XOR_Q_ERR;
+			writel(errmask, chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+		}
 	}
-
 }
 
 int ioat3_dma_probe(struct ioatdma_device *device, int dca)
@@ -1268,30 +1868,33 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 	struct ioat_chan_common *chan;
 	bool is_raid_device = false;
 	int err;
-	u32 cap;
 
 	device->enumerate_channels = ioat2_enumerate_channels;
 	device->reset_hw = ioat3_reset_hw;
 	device->self_test = ioat3_dma_self_test;
+	device->intr_quirk = ioat3_intr_quirk;
 	dma = &device->common;
 	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
 	dma->device_issue_pending = ioat2_issue_pending;
 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
 	dma->device_free_chan_resources = ioat2_free_chan_resources;
 
-	if (is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev))
+	if (is_xeon_cb32(pdev))
 		dma->copy_align = 6;
 
 	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
 	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
 
-	cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+	device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+
+	if (is_bwd_noraid(pdev))
+		device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
 
 	/* dca is incompatible with raid operations */
-	if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
-		cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+	if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+		device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
 
-	if (cap & IOAT_CAP_XOR) {
+	if (device->cap & IOAT_CAP_XOR) {
 		is_raid_device = true;
 		dma->max_xor = 8;
 		dma->xor_align = 6;
@@ -1302,53 +1905,86 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
 		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
 	}
-	if (cap & IOAT_CAP_PQ) {
+
+	if (device->cap & IOAT_CAP_PQ) {
 		is_raid_device = true;
-		dma_set_maxpq(dma, 8, 0);
-		dma->pq_align = 6;
 
-		dma_cap_set(DMA_PQ, dma->cap_mask);
 		dma->device_prep_dma_pq = ioat3_prep_pq;
-
-		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
 		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+		dma_cap_set(DMA_PQ, dma->cap_mask);
+		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
 
-		if (!(cap & IOAT_CAP_XOR)) {
-			dma->max_xor = 8;
-			dma->xor_align = 6;
+		if (device->cap & IOAT_CAP_RAID16SS) {
+			dma_set_maxpq(dma, 16, 0);
+			dma->pq_align = 0;
+		} else {
+			dma_set_maxpq(dma, 8, 0);
+			if (is_xeon_cb32(pdev))
+				dma->pq_align = 6;
+			else
+				dma->pq_align = 0;
+		}
 
-			dma_cap_set(DMA_XOR, dma->cap_mask);
+		if (!(device->cap & IOAT_CAP_XOR)) {
 			dma->device_prep_dma_xor = ioat3_prep_pqxor;
-
-			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
 			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+			dma_cap_set(DMA_XOR, dma->cap_mask);
+			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+
+			if (device->cap & IOAT_CAP_RAID16SS) {
+				dma->max_xor = 16;
+				dma->xor_align = 0;
+			} else {
+				dma->max_xor = 8;
+				if (is_xeon_cb32(pdev))
+					dma->xor_align = 6;
+				else
+					dma->xor_align = 0;
+			}
 		}
 	}
-	if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+
+	if (is_raid_device && (device->cap & IOAT_CAP_FILL_BLOCK)) {
 		dma_cap_set(DMA_MEMSET, dma->cap_mask);
 		dma->device_prep_dma_memset = ioat3_prep_memset_lock;
 	}
 
 
-	if (is_raid_device) {
-		dma->device_tx_status = ioat3_tx_status;
-		device->cleanup_fn = ioat3_cleanup_event;
-		device->timer_fn = ioat3_timer_event;
-	} else {
-		dma->device_tx_status = ioat_dma_tx_status;
-		device->cleanup_fn = ioat2_cleanup_event;
-		device->timer_fn = ioat2_timer_event;
+	dma->device_tx_status = ioat3_tx_status;
+	device->cleanup_fn = ioat3_cleanup_event;
+	device->timer_fn = ioat3_timer_event;
+
+	if (is_xeon_cb32(pdev)) {
+		dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
+		dma->device_prep_dma_xor_val = NULL;
+
+		dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
+		dma->device_prep_dma_pq_val = NULL;
 	}
 
-	#ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
-	dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
-	dma->device_prep_dma_pq_val = NULL;
-	#endif
+	/* starting with CB3.3 super extended descriptors are supported */
+	if (device->cap & IOAT_CAP_RAID16SS) {
+		char pool_name[14];
+		int i;
+
+		/* allocate sw descriptor pool for SED */
+		device->sed_pool = kmem_cache_create("ioat_sed",
+				sizeof(struct ioat_sed_ent), 0, 0, NULL);
+		if (!device->sed_pool)
+			return -ENOMEM;
+
+		for (i = 0; i < MAX_SED_POOLS; i++) {
+			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
 
-	#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
-	dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
-	dma->device_prep_dma_xor_val = NULL;
-	#endif
+			/* allocate SED DMA pool */
+			device->sed_hw_pool[i] = dma_pool_create(pool_name,
+					&pdev->dev,
+					SED_SIZE * (i + 1), 64, 0);
+			if (!device->sed_hw_pool[i])
+				goto sed_pool_cleanup;
+
+		}
+	}
 
 	err = ioat_probe(device);
 	if (err)
@@ -1371,4 +2007,28 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 		device->dca = ioat3_dca_init(pdev, device->reg_base);
 
 	return 0;
+
+sed_pool_cleanup:
+	if (device->sed_pool) {
+		int i;
+		kmem_cache_destroy(device->sed_pool);
+
+		for (i = 0; i < MAX_SED_POOLS; i++)
+			if (device->sed_hw_pool[i])
+				dma_pool_destroy(device->sed_hw_pool[i]);
+	}
+
+	return -ENOMEM;
+}
+
+void ioat3_dma_remove(struct ioatdma_device *device)
+{
+	if (device->sed_pool) {
+		int i;
+		kmem_cache_destroy(device->sed_pool);
+
+		for (i = 0; i < MAX_SED_POOLS; i++)
+			if (device->sed_hw_pool[i])
+				dma_pool_destroy(device->sed_hw_pool[i]);
+	}
 }
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 7cb74c62c7192f5bcae7d5fe6999eb17cb21dfd3..5ee57d402a6ef0361ef9317a3daeb225d95f75f7 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -30,11 +30,6 @@
 #define IOAT_PCI_DID_SCNB       0x65FF
 #define IOAT_PCI_DID_SNB        0x402F
 
-#define IOAT_VER_1_2            0x12    /* Version 1.2 */
-#define IOAT_VER_2_0            0x20    /* Version 2.0 */
-#define IOAT_VER_3_0            0x30    /* Version 3.0 */
-#define IOAT_VER_3_2            0x32    /* Version 3.2 */
-
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
@@ -46,6 +41,29 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB8	0x0e2e
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB9	0x0e2f
 
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0	0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1	0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2	0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3	0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4	0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5	0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6	0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7	0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8	0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9	0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0	0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1	0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2	0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3	0x0C53
+
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+#define IOAT_VER_3_3            0x33    /* Version 3.3 */
+
+
 int system_has_dca_enabled(struct pci_dev *pdev);
 
 struct ioat_dma_descriptor {
@@ -147,7 +165,17 @@ struct ioat_xor_ext_descriptor {
 };
 
 struct ioat_pq_descriptor {
-	uint32_t	size;
+	union {
+		uint32_t	size;
+		uint32_t	dwbes;
+		struct {
+			unsigned int rsvd:25;
+			unsigned int p_val_err:1;
+			unsigned int q_val_err:1;
+			unsigned int rsvd1:4;
+			unsigned int wbes:1;
+		} dwbes_f;
+	};
 	union {
 		uint32_t ctl;
 		struct {
@@ -162,9 +190,14 @@ struct ioat_pq_descriptor {
 			unsigned int hint:1;
 			unsigned int p_disable:1;
 			unsigned int q_disable:1;
-			unsigned int rsvd:11;
+			unsigned int rsvd2:2;
+			unsigned int wb_en:1;
+			unsigned int prl_en:1;
+			unsigned int rsvd3:7;
 			#define IOAT_OP_PQ 0x89
 			#define IOAT_OP_PQ_VAL 0x8a
+			#define IOAT_OP_PQ_16S 0xa0
+			#define IOAT_OP_PQ_VAL_16S 0xa1
 			unsigned int op:8;
 		} ctl_f;
 	};
@@ -172,7 +205,10 @@ struct ioat_pq_descriptor {
 	uint64_t	p_addr;
 	uint64_t	next;
 	uint64_t	src_addr2;
-	uint64_t	src_addr3;
+	union {
+		uint64_t	src_addr3;
+		uint64_t	sed_addr;
+	};
 	uint8_t		coef[8];
 	uint64_t	q_addr;
 };
@@ -221,4 +257,40 @@ struct ioat_pq_update_descriptor {
 struct ioat_raw_descriptor {
 	uint64_t	field[8];
 };
+
+struct ioat_pq16a_descriptor {
+	uint8_t coef[8];
+	uint64_t src_addr3;
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+	uint64_t src_addr6;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t src_addr9;
+};
+
+struct ioat_pq16b_descriptor {
+	uint64_t src_addr10;
+	uint64_t src_addr11;
+	uint64_t src_addr12;
+	uint64_t src_addr13;
+	uint64_t src_addr14;
+	uint64_t src_addr15;
+	uint64_t src_addr16;
+	uint64_t rsvd;
+};
+
+union ioat_sed_pq_descriptor {
+	struct ioat_pq16a_descriptor a;
+	struct ioat_pq16b_descriptor b;
+};
+
+#define SED_SIZE	64
+
+struct ioat_sed_raw_descriptor {
+	uint64_t	a[8];
+	uint64_t	b[8];
+	uint64_t	c[8];
+};
+
 #endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 71c7ecd80fac8d3b9b33dd909ba2533da17e2e83..2c8d560e6334123097627ab59ac166a47cb1f0d6 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -94,6 +94,23 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
 
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
+
+	/* I/OAT v3.3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
@@ -190,6 +207,9 @@ static void ioat_remove(struct pci_dev *pdev)
 	if (!device)
 		return;
 
+	if (device->version >= IOAT_VER_3_0)
+		ioat3_dma_remove(device);
+
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
 		unregister_dca_provider(device->dca, &pdev->dev);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 1391798542b66756b9ac0e811e64a683a65b77e8..2f1cfa0f1f475bfa7992c5daf944d093e7970232 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -79,6 +79,8 @@
 #define IOAT_CAP_APIC				0x00000080
 #define IOAT_CAP_XOR				0x00000100
 #define IOAT_CAP_PQ				0x00000200
+#define IOAT_CAP_DWBES				0x00002000
+#define IOAT_CAP_RAID16SS			0x00020000
 
 #define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
 
@@ -93,6 +95,8 @@
 #define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
 #define IOAT_CHANCTRL_INT_REARM			0x0001
 #define IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
+						 IOAT_CHANCTRL_ERR_INT_EN |\
+						 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
 						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
 
 #define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 8c61d17a86bf182b750d14df3b57f51708a2b326..d39c2cd0795d71437935d22ce90ca636c8fae0aa 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1642,7 +1642,7 @@ static int __init ipu_idmac_init(struct ipu *ipu)
 	return dma_async_device_register(&idmac->dma);
 }
 
-static void __exit ipu_idmac_exit(struct ipu *ipu)
+static void ipu_idmac_exit(struct ipu *ipu)
 {
 	int i;
 	struct idmac *idmac = &ipu->idmac;
@@ -1756,7 +1756,7 @@ err_noirq:
 	return ret;
 }
 
-static int __exit ipu_remove(struct platform_device *pdev)
+static int ipu_remove(struct platform_device *pdev)
 {
 	struct ipu *ipu = platform_get_drvdata(pdev);
 
@@ -1781,7 +1781,7 @@ static struct platform_driver ipu_platform_driver = {
 		.name	= "ipu-core",
 		.owner	= THIS_MODULE,
 	},
-	.remove		= __exit_p(ipu_remove),
+	.remove		= ipu_remove,
 };
 
 static int __init ipu_init(void)
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index 69d04d28b1efbe78cb871b22af17bb851071e606..7aa0864cd487a4a084dcf3b326f9ef6ba91d3d86 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -13,43 +13,31 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/rculist.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
 
 static LIST_HEAD(of_dma_list);
-static DEFINE_SPINLOCK(of_dma_lock);
+static DEFINE_MUTEX(of_dma_lock);
 
 /**
- * of_dma_get_controller - Get a DMA controller in DT DMA helpers list
+ * of_dma_find_controller - Get a DMA controller in DT DMA helpers list
  * @dma_spec:	pointer to DMA specifier as found in the device tree
  *
  * Finds a DMA controller with matching device node and number for dma cells
- * in a list of registered DMA controllers. If a match is found the use_count
- * variable is increased and a valid pointer to the DMA data stored is retuned.
- * A NULL pointer is returned if no match is found.
+ * in a list of registered DMA controllers. If a match is found a valid pointer
+ * to the DMA data stored is retuned. A NULL pointer is returned if no match is
+ * found.
  */
-static struct of_dma *of_dma_get_controller(struct of_phandle_args *dma_spec)
+static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
 {
 	struct of_dma *ofdma;
 
-	spin_lock(&of_dma_lock);
-
-	if (list_empty(&of_dma_list)) {
-		spin_unlock(&of_dma_lock);
-		return NULL;
-	}
-
 	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
 		if ((ofdma->of_node == dma_spec->np) &&
-		    (ofdma->of_dma_nbcells == dma_spec->args_count)) {
-			ofdma->use_count++;
-			spin_unlock(&of_dma_lock);
+		    (ofdma->of_dma_nbcells == dma_spec->args_count))
 			return ofdma;
-		}
-
-	spin_unlock(&of_dma_lock);
 
 	pr_debug("%s: can't find DMA controller %s\n", __func__,
 		 dma_spec->np->full_name);
@@ -57,22 +45,6 @@ static struct of_dma *of_dma_get_controller(struct of_phandle_args *dma_spec)
 	return NULL;
 }
 
-/**
- * of_dma_put_controller - Decrement use count for a registered DMA controller
- * @of_dma:	pointer to DMA controller data
- *
- * Decrements the use_count variable in the DMA data structure. This function
- * should be called only when a valid pointer is returned from
- * of_dma_get_controller() and no further accesses to data referenced by that
- * pointer are needed.
- */
-static void of_dma_put_controller(struct of_dma *ofdma)
-{
-	spin_lock(&of_dma_lock);
-	ofdma->use_count--;
-	spin_unlock(&of_dma_lock);
-}
-
 /**
  * of_dma_controller_register - Register a DMA controller to DT DMA helpers
  * @np:			device node of DMA controller
@@ -93,6 +65,7 @@ int of_dma_controller_register(struct device_node *np,
 {
 	struct of_dma	*ofdma;
 	int		nbcells;
+	const __be32	*prop;
 
 	if (!np || !of_dma_xlate) {
 		pr_err("%s: not enough information provided\n", __func__);
@@ -103,8 +76,11 @@ int of_dma_controller_register(struct device_node *np,
 	if (!ofdma)
 		return -ENOMEM;
 
-	nbcells = be32_to_cpup(of_get_property(np, "#dma-cells", NULL));
-	if (!nbcells) {
+	prop = of_get_property(np, "#dma-cells", NULL);
+	if (prop)
+		nbcells = be32_to_cpup(prop);
+
+	if (!prop || !nbcells) {
 		pr_err("%s: #dma-cells property is missing or invalid\n",
 		       __func__);
 		kfree(ofdma);
@@ -115,12 +91,11 @@ int of_dma_controller_register(struct device_node *np,
 	ofdma->of_dma_nbcells = nbcells;
 	ofdma->of_dma_xlate = of_dma_xlate;
 	ofdma->of_dma_data = data;
-	ofdma->use_count = 0;
 
 	/* Now queue of_dma controller structure in list */
-	spin_lock(&of_dma_lock);
+	mutex_lock(&of_dma_lock);
 	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
-	spin_unlock(&of_dma_lock);
+	mutex_unlock(&of_dma_lock);
 
 	return 0;
 }
@@ -132,32 +107,20 @@ EXPORT_SYMBOL_GPL(of_dma_controller_register);
  *
  * Memory allocated by of_dma_controller_register() is freed here.
  */
-int of_dma_controller_free(struct device_node *np)
+void of_dma_controller_free(struct device_node *np)
 {
 	struct of_dma *ofdma;
 
-	spin_lock(&of_dma_lock);
-
-	if (list_empty(&of_dma_list)) {
-		spin_unlock(&of_dma_lock);
-		return -ENODEV;
-	}
+	mutex_lock(&of_dma_lock);
 
 	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
 		if (ofdma->of_node == np) {
-			if (ofdma->use_count) {
-				spin_unlock(&of_dma_lock);
-				return -EBUSY;
-			}
-
 			list_del(&ofdma->of_dma_controllers);
-			spin_unlock(&of_dma_lock);
 			kfree(ofdma);
-			return 0;
+			break;
 		}
 
-	spin_unlock(&of_dma_lock);
-	return -ENODEV;
+	mutex_unlock(&of_dma_lock);
 }
 EXPORT_SYMBOL_GPL(of_dma_controller_free);
 
@@ -172,8 +135,8 @@ EXPORT_SYMBOL_GPL(of_dma_controller_free);
  * specifiers, matches the name provided. Returns 0 if the name matches and
  * a valid pointer to the DMA specifier is found. Otherwise returns -ENODEV.
  */
-static int of_dma_match_channel(struct device_node *np, char *name, int index,
-				struct of_phandle_args *dma_spec)
+static int of_dma_match_channel(struct device_node *np, const char *name,
+				int index, struct of_phandle_args *dma_spec)
 {
 	const char *s;
 
@@ -198,7 +161,7 @@ static int of_dma_match_channel(struct device_node *np, char *name, int index,
  * Returns pointer to appropriate dma channel on success or NULL on error.
  */
 struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
-					      char *name)
+					      const char *name)
 {
 	struct of_phandle_args	dma_spec;
 	struct of_dma		*ofdma;
@@ -220,14 +183,15 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 		if (of_dma_match_channel(np, name, i, &dma_spec))
 			continue;
 
-		ofdma = of_dma_get_controller(&dma_spec);
-
-		if (!ofdma)
-			continue;
+		mutex_lock(&of_dma_lock);
+		ofdma = of_dma_find_controller(&dma_spec);
 
-		chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
+		if (ofdma)
+			chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
+		else
+			chan = NULL;
 
-		of_dma_put_controller(ofdma);
+		mutex_unlock(&of_dma_lock);
 
 		of_node_put(dma_spec.np);
 
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 08b43bf3715816951d23cc71dd49ca9b810b55f1..ec3fc4fd9160e8aeddf16054cd405f35b43bfef7 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -16,6 +16,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
 
 #include "virt-dma.h"
 
@@ -67,6 +69,10 @@ static const unsigned es_bytes[] = {
 	[OMAP_DMA_DATA_TYPE_S32] = 4,
 };
 
+static struct of_dma_filter_info omap_dma_info = {
+	.filter_fn = omap_dma_filter_fn,
+};
+
 static inline struct omap_dmadev *to_omap_dma_dev(struct dma_device *d)
 {
 	return container_of(d, struct omap_dmadev, ddev);
@@ -629,8 +635,22 @@ static int omap_dma_probe(struct platform_device *pdev)
 		pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
 			rc);
 		omap_dma_free(od);
-	} else {
-		platform_set_drvdata(pdev, od);
+		return rc;
+	}
+
+	platform_set_drvdata(pdev, od);
+
+	if (pdev->dev.of_node) {
+		omap_dma_info.dma_cap = od->ddev.cap_mask;
+
+		/* Device-tree DMA controller registration */
+		rc = of_dma_controller_register(pdev->dev.of_node,
+				of_dma_simple_xlate, &omap_dma_info);
+		if (rc) {
+			pr_warn("OMAP-DMA: failed to register DMA controller\n");
+			dma_async_device_unregister(&od->ddev);
+			omap_dma_free(od);
+		}
 	}
 
 	dev_info(&pdev->dev, "OMAP DMA engine driver\n");
@@ -642,18 +662,32 @@ static int omap_dma_remove(struct platform_device *pdev)
 {
 	struct omap_dmadev *od = platform_get_drvdata(pdev);
 
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
 	dma_async_device_unregister(&od->ddev);
 	omap_dma_free(od);
 
 	return 0;
 }
 
+static const struct of_device_id omap_dma_match[] = {
+	{ .compatible = "ti,omap2420-sdma", },
+	{ .compatible = "ti,omap2430-sdma", },
+	{ .compatible = "ti,omap3430-sdma", },
+	{ .compatible = "ti,omap3630-sdma", },
+	{ .compatible = "ti,omap4430-sdma", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_dma_match);
+
 static struct platform_driver omap_dma_driver = {
 	.probe	= omap_dma_probe,
 	.remove	= omap_dma_remove,
 	.driver = {
 		.name = "omap-dma-engine",
 		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(omap_dma_match),
 	},
 };
 
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index d01faeb0f27c15ee9a55e78d257caf58136ff8d9..ce3dc3e9688c86ea30e3be7757094b78749a6ec9 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -476,7 +476,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
 	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
 
 	if (!ret) {
-		ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO);
+		ret = pdc_alloc_desc(&pd_chan->chan, GFP_ATOMIC);
 		if (ret) {
 			spin_lock(&pd_chan->lock);
 			pd_chan->descs_allocated++;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 5dbc5946c4c3d9931585ac2440f64274f477fe02..a17553f7c02809325b06830d6eb1e36a4c95e435 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -26,6 +26,7 @@
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
+#include <linux/err.h>
 
 #include "dmaengine.h"
 #define PL330_MAX_CHAN		8
@@ -2288,13 +2289,12 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
 
 		/* If already submitted */
 		if (desc->status == BUSY)
-			break;
+			continue;
 
 		ret = pl330_submit_req(pch->pl330_chid,
 						&desc->req);
 		if (!ret) {
 			desc->status = BUSY;
-			break;
 		} else if (ret == -EAGAIN) {
 			/* QFull or DMAC Dying */
 			break;
@@ -2904,9 +2904,9 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
-	pi->base = devm_request_and_ioremap(&adev->dev, res);
-	if (!pi->base)
-		return -ENXIO;
+	pi->base = devm_ioremap_resource(&adev->dev, res);
+	if (IS_ERR(pi->base))
+		return PTR_ERR(pi->base);
 
 	amba_set_drvdata(adev, pdmac);
 
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
new file mode 100644
index 0000000000000000000000000000000000000000..5c1dee20c13ed5021ae24715ad08a84c23879506
--- /dev/null
+++ b/drivers/dma/sh/Kconfig
@@ -0,0 +1,24 @@
+#
+# DMA engine configuration for sh
+#
+
+config SH_DMAE_BASE
+	bool "Renesas SuperH DMA Engine support"
+	depends on (SUPERH && SH_DMA) || (ARM && ARCH_SHMOBILE)
+	depends on !SH_DMA_API
+	default y
+	select DMA_ENGINE
+	help
+	  Enable support for the Renesas SuperH DMA controllers.
+
+config SH_DMAE
+	tristate "Renesas SuperH DMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas SuperH DMA controllers.
+
+config SUDMAC
+	tristate "Renesas SUDMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas SUDMAC controllers.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index 54ae9572b0ac1f2735d7a8fe3e47adb4c87a6373..c07ca4612e460ef4b20c9869ed78f87edc8f6926 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -1,2 +1,3 @@
-obj-$(CONFIG_SH_DMAE) += shdma-base.o
+obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o
 obj-$(CONFIG_SH_DMAE) += shdma.o
+obj-$(CONFIG_SUDMAC) += sudmac.o
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
new file mode 100644
index 0000000000000000000000000000000000000000..e7c94bbddb536b8e44822e684e7301487bb9ee94
--- /dev/null
+++ b/drivers/dma/sh/sudmac.c
@@ -0,0 +1,428 @@
+/*
+ * Renesas SUDMAC support
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * based on drivers/dma/sh/shdma.c:
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/sudmac.h>
+
+struct sudmac_chan {
+	struct shdma_chan shdma_chan;
+	void __iomem *base;
+	char dev_id[16];	/* unique name per DMAC of channel */
+
+	u32 offset;		/* for CFG, BA, BBC, CA, CBC, DEN */
+	u32 cfg;
+	u32 dint_end_bit;
+};
+
+struct sudmac_device {
+	struct shdma_dev shdma_dev;
+	struct sudmac_pdata *pdata;
+	void __iomem *chan_reg;
+};
+
+struct sudmac_regs {
+	u32 base_addr;
+	u32 base_byte_count;
+};
+
+struct sudmac_desc {
+	struct sudmac_regs hw;
+	struct shdma_desc shdma_desc;
+};
+
+#define to_chan(schan) container_of(schan, struct sudmac_chan, shdma_chan)
+#define to_desc(sdesc) container_of(sdesc, struct sudmac_desc, shdma_desc)
+#define to_sdev(sc) container_of(sc->shdma_chan.dma_chan.device, \
+				 struct sudmac_device, shdma_dev.dma_dev)
+
+/* SUDMAC register */
+#define SUDMAC_CH0CFG		0x00
+#define SUDMAC_CH0BA		0x10
+#define SUDMAC_CH0BBC		0x18
+#define SUDMAC_CH0CA		0x20
+#define SUDMAC_CH0CBC		0x28
+#define SUDMAC_CH0DEN		0x30
+#define SUDMAC_DSTSCLR		0x38
+#define SUDMAC_DBUFCTRL		0x3C
+#define SUDMAC_DINTCTRL		0x40
+#define SUDMAC_DINTSTS		0x44
+#define SUDMAC_DINTSTSCLR	0x48
+#define SUDMAC_CH0SHCTRL	0x50
+
+/* Definitions for the sudmac_channel.config */
+#define SUDMAC_SENDBUFM	0x1000 /* b12: Transmit Buffer Mode */
+#define SUDMAC_RCVENDM	0x0100 /* b8: Receive Data Transfer End Mode */
+#define SUDMAC_LBA_WAIT	0x0030 /* b5-4: Local Bus Access Wait */
+
+/* Definitions for the sudmac_channel.dint_end_bit */
+#define SUDMAC_CH1ENDE	0x0002 /* b1: Ch1 DMA Transfer End Int Enable */
+#define SUDMAC_CH0ENDE	0x0001 /* b0: Ch0 DMA Transfer End Int Enable */
+
+#define SUDMAC_DRV_NAME "sudmac"
+
+static void sudmac_writel(struct sudmac_chan *sc, u32 data, u32 reg)
+{
+	iowrite32(data, sc->base + reg);
+}
+
+static u32 sudmac_readl(struct sudmac_chan *sc, u32 reg)
+{
+	return ioread32(sc->base + reg);
+}
+
+static bool sudmac_is_busy(struct sudmac_chan *sc)
+{
+	u32 den = sudmac_readl(sc, SUDMAC_CH0DEN + sc->offset);
+
+	if (den)
+		return true; /* working */
+
+	return false; /* waiting */
+}
+
+static void sudmac_set_reg(struct sudmac_chan *sc, struct sudmac_regs *hw,
+			   struct shdma_desc *sdesc)
+{
+	sudmac_writel(sc, sc->cfg, SUDMAC_CH0CFG + sc->offset);
+	sudmac_writel(sc, hw->base_addr, SUDMAC_CH0BA + sc->offset);
+	sudmac_writel(sc, hw->base_byte_count, SUDMAC_CH0BBC + sc->offset);
+}
+
+static void sudmac_start(struct sudmac_chan *sc)
+{
+	u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL);
+
+	sudmac_writel(sc, dintctrl | sc->dint_end_bit, SUDMAC_DINTCTRL);
+	sudmac_writel(sc, 1, SUDMAC_CH0DEN + sc->offset);
+}
+
+static void sudmac_start_xfer(struct shdma_chan *schan,
+			      struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+
+	sudmac_set_reg(sc, &sd->hw, sdesc);
+	sudmac_start(sc);
+}
+
+static bool sudmac_channel_busy(struct shdma_chan *schan)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+
+	return sudmac_is_busy(sc);
+}
+
+static void sudmac_setup_xfer(struct shdma_chan *schan, int slave_id)
+{
+}
+
+static const struct sudmac_slave_config *sudmac_find_slave(
+	struct sudmac_chan *sc, int slave_id)
+{
+	struct sudmac_device *sdev = to_sdev(sc);
+	struct sudmac_pdata *pdata = sdev->pdata;
+	const struct sudmac_slave_config *cfg;
+	int i;
+
+	for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+		if (cfg->slave_id == slave_id)
+			return cfg;
+
+	return NULL;
+}
+
+static int sudmac_set_slave(struct shdma_chan *schan, int slave_id, bool try)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	const struct sudmac_slave_config *cfg = sudmac_find_slave(sc, slave_id);
+
+	if (!cfg)
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline void sudmac_dma_halt(struct sudmac_chan *sc)
+{
+	u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL);
+
+	sudmac_writel(sc, 0, SUDMAC_CH0DEN + sc->offset);
+	sudmac_writel(sc, dintctrl & ~sc->dint_end_bit, SUDMAC_DINTCTRL);
+	sudmac_writel(sc, sc->dint_end_bit, SUDMAC_DINTSTSCLR);
+}
+
+static int sudmac_desc_setup(struct shdma_chan *schan,
+			     struct shdma_desc *sdesc,
+			     dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+
+	dev_dbg(sc->shdma_chan.dev, "%s: src=%x, dst=%x, len=%d\n",
+		__func__, src, dst, *len);
+
+	if (*len > schan->max_xfer_len)
+		*len = schan->max_xfer_len;
+
+	if (dst)
+		sd->hw.base_addr = dst;
+	else if (src)
+		sd->hw.base_addr = src;
+	sd->hw.base_byte_count = *len;
+
+	return 0;
+}
+
+static void sudmac_halt(struct shdma_chan *schan)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+
+	sudmac_dma_halt(sc);
+}
+
+static bool sudmac_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	u32 dintsts = sudmac_readl(sc, SUDMAC_DINTSTS);
+
+	if (!(dintsts & sc->dint_end_bit))
+		return false;
+
+	/* DMA stop */
+	sudmac_dma_halt(sc);
+
+	return true;
+}
+
+static size_t sudmac_get_partial(struct shdma_chan *schan,
+				 struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+	u32 current_byte_count = sudmac_readl(sc, SUDMAC_CH0CBC + sc->offset);
+
+	return sd->hw.base_byte_count - current_byte_count;
+}
+
+static bool sudmac_desc_completed(struct shdma_chan *schan,
+				  struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+	u32 current_addr = sudmac_readl(sc, SUDMAC_CH0CA + sc->offset);
+
+	return sd->hw.base_addr + sd->hw.base_byte_count == current_addr;
+}
+
+static int sudmac_chan_probe(struct sudmac_device *su_dev, int id, int irq,
+			     unsigned long flags)
+{
+	struct shdma_dev *sdev = &su_dev->shdma_dev;
+	struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev);
+	struct sudmac_chan *sc;
+	struct shdma_chan *schan;
+	int err;
+
+	sc = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_chan), GFP_KERNEL);
+	if (!sc) {
+		dev_err(sdev->dma_dev.dev,
+			"No free memory for allocating dma channels!\n");
+		return -ENOMEM;
+	}
+
+	schan = &sc->shdma_chan;
+	schan->max_xfer_len = 64 * 1024 * 1024 - 1;
+
+	shdma_chan_probe(sdev, schan, id);
+
+	sc->base = su_dev->chan_reg;
+
+	/* get platform_data */
+	sc->offset = su_dev->pdata->channel->offset;
+	if (su_dev->pdata->channel->config & SUDMAC_TX_BUFFER_MODE)
+		sc->cfg |= SUDMAC_SENDBUFM;
+	if (su_dev->pdata->channel->config & SUDMAC_RX_END_MODE)
+		sc->cfg |= SUDMAC_RCVENDM;
+	sc->cfg |= (su_dev->pdata->channel->wait << 4) & SUDMAC_LBA_WAIT;
+
+	if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH0)
+		sc->dint_end_bit |= SUDMAC_CH0ENDE;
+	if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH1)
+		sc->dint_end_bit |= SUDMAC_CH1ENDE;
+
+	/* set up channel irq */
+	if (pdev->id >= 0)
+		snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d.%d",
+			 pdev->id, id);
+	else
+		snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d", id);
+
+	err = shdma_request_irq(schan, irq, flags, sc->dev_id);
+	if (err) {
+		dev_err(sdev->dma_dev.dev,
+			"DMA channel %d request_irq failed %d\n", id, err);
+		goto err_no_irq;
+	}
+
+	return 0;
+
+err_no_irq:
+	/* remove from dmaengine device node */
+	shdma_chan_remove(schan);
+	return err;
+}
+
+static void sudmac_chan_remove(struct sudmac_device *su_dev)
+{
+	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &su_dev->shdma_dev, i) {
+		struct sudmac_chan *sc = to_chan(schan);
+
+		BUG_ON(!schan);
+
+		shdma_free_irq(&sc->shdma_chan);
+		shdma_chan_remove(schan);
+	}
+	dma_dev->chancnt = 0;
+}
+
+static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan)
+{
+	/* SUDMAC doesn't need the address */
+	return 0;
+}
+
+static struct shdma_desc *sudmac_embedded_desc(void *buf, int i)
+{
+	return &((struct sudmac_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops sudmac_shdma_ops = {
+	.desc_completed = sudmac_desc_completed,
+	.halt_channel = sudmac_halt,
+	.channel_busy = sudmac_channel_busy,
+	.slave_addr = sudmac_slave_addr,
+	.desc_setup = sudmac_desc_setup,
+	.set_slave = sudmac_set_slave,
+	.setup_xfer = sudmac_setup_xfer,
+	.start_xfer = sudmac_start_xfer,
+	.embedded_desc = sudmac_embedded_desc,
+	.chan_irq = sudmac_chan_irq,
+	.get_partial = sudmac_get_partial,
+};
+
+static int sudmac_probe(struct platform_device *pdev)
+{
+	struct sudmac_pdata *pdata = pdev->dev.platform_data;
+	int err, i;
+	struct sudmac_device *su_dev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *irq_res;
+
+	/* get platform data */
+	if (!pdata)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!chan || !irq_res)
+		return -ENODEV;
+
+	err = -ENOMEM;
+	su_dev = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_device),
+			      GFP_KERNEL);
+	if (!su_dev) {
+		dev_err(&pdev->dev, "Not enough memory\n");
+		return err;
+	}
+
+	dma_dev = &su_dev->shdma_dev.dma_dev;
+
+	su_dev->chan_reg = devm_request_and_ioremap(&pdev->dev, chan);
+	if (!su_dev->chan_reg)
+		return err;
+
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	su_dev->shdma_dev.ops = &sudmac_shdma_ops;
+	su_dev->shdma_dev.desc_size = sizeof(struct sudmac_desc);
+	err = shdma_init(&pdev->dev, &su_dev->shdma_dev, pdata->channel_num);
+	if (err < 0)
+		return err;
+
+	/* platform data */
+	su_dev->pdata = pdev->dev.platform_data;
+
+	platform_set_drvdata(pdev, su_dev);
+
+	/* Create DMA Channel */
+	for (i = 0; i < pdata->channel_num; i++) {
+		err = sudmac_chan_probe(su_dev, i, irq_res->start, IRQF_SHARED);
+		if (err)
+			goto chan_probe_err;
+	}
+
+	err = dma_async_device_register(&su_dev->shdma_dev.dma_dev);
+	if (err < 0)
+		goto chan_probe_err;
+
+	return err;
+
+chan_probe_err:
+	sudmac_chan_remove(su_dev);
+
+	platform_set_drvdata(pdev, NULL);
+	shdma_cleanup(&su_dev->shdma_dev);
+
+	return err;
+}
+
+static int sudmac_remove(struct platform_device *pdev)
+{
+	struct sudmac_device *su_dev = platform_get_drvdata(pdev);
+	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
+
+	dma_async_device_unregister(dma_dev);
+	sudmac_chan_remove(su_dev);
+	shdma_cleanup(&su_dev->shdma_dev);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver sudmac_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= SUDMAC_DRV_NAME,
+	},
+	.probe		= sudmac_probe,
+	.remove		= sudmac_remove,
+};
+module_platform_driver(sudmac_driver);
+
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("Renesas SUDMAC driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" SUDMAC_DRV_NAME);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 1d627e2391f495ef2b8b9fd2e6b5359da940df2d..1765a0a2736d0d729041dc5dc7dbb110f1d1c5e4 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -16,6 +16,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
+#include <linux/clk.h>
 #include <linux/sirfsoc_dma.h>
 
 #include "dmaengine.h"
@@ -78,6 +79,7 @@ struct sirfsoc_dma {
 	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
 	void __iomem			*base;
 	int				irq;
+	struct clk			*clk;
 	bool				is_marco;
 };
 
@@ -639,6 +641,12 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 		return -EINVAL;
 	}
 
+	sdma->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(sdma->clk)) {
+		dev_err(dev, "failed to get a clock.\n");
+		return PTR_ERR(sdma->clk);
+	}
+
 	ret = of_address_to_resource(dn, 0, &res);
 	if (ret) {
 		dev_err(dev, "Error parsing memory region!\n");
@@ -698,6 +706,8 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 
 	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
 
+	clk_prepare_enable(sdma->clk);
+
 	/* Register DMA engine */
 	dev_set_drvdata(dev, sdma);
 	ret = dma_async_device_register(dma);
@@ -720,6 +730,7 @@ static int sirfsoc_dma_remove(struct platform_device *op)
 	struct device *dev = &op->dev;
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
+	clk_disable_unprepare(sdma->clk);
 	dma_async_device_unregister(&sdma->dma);
 	free_irq(sdma->irq, sdma);
 	irq_dispose_mapping(sdma->irq);
@@ -742,7 +753,18 @@ static struct platform_driver sirfsoc_dma_driver = {
 	},
 };
 
-module_platform_driver(sirfsoc_dma_driver);
+static __init int sirfsoc_dma_init(void)
+{
+	return platform_driver_register(&sirfsoc_dma_driver);
+}
+
+static void __exit sirfsoc_dma_exit(void)
+{
+	platform_driver_unregister(&sirfsoc_dma_driver);
+}
+
+subsys_initcall(sirfsoc_dma_init);
+module_exit(sirfsoc_dma_exit);
 
 MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
 	"Barry Song <baohua.song@csr.com>");
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index fcee27eae1f6d35d9635a009e3fe991fc87a0eeb..ce193409ebd32345e997dad798625c41565b438e 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -30,6 +30,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/clk/tegra.h>
@@ -199,6 +200,7 @@ struct tegra_dma_channel {
 
 	/* Channel-slave specific configuration */
 	struct dma_slave_config dma_sconfig;
+	struct tegra_dma_channel_regs	channel_reg;
 };
 
 /* tegra_dma: Tegra DMA specific information */
@@ -1213,7 +1215,6 @@ static const struct tegra_dma_chip_data tegra20_dma_chip_data = {
 	.support_channel_pause	= false,
 };
 
-#if defined(CONFIG_OF)
 /* Tegra30 specific DMA controller information */
 static const struct tegra_dma_chip_data tegra30_dma_chip_data = {
 	.nr_channels		= 32,
@@ -1243,7 +1244,6 @@ static const struct of_device_id tegra_dma_of_match[] = {
 	},
 };
 MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
-#endif
 
 static int tegra_dma_probe(struct platform_device *pdev)
 {
@@ -1252,20 +1252,14 @@ static int tegra_dma_probe(struct platform_device *pdev)
 	int ret;
 	int i;
 	const struct tegra_dma_chip_data *cdata = NULL;
+	const struct of_device_id *match;
 
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_device(of_match_ptr(tegra_dma_of_match),
-					&pdev->dev);
-		if (!match) {
-			dev_err(&pdev->dev, "Error: No device match found\n");
-			return -ENODEV;
-		}
-		cdata = match->data;
-	} else {
-		/* If no device tree then fallback to tegra20 */
-		cdata = &tegra20_dma_chip_data;
+	match = of_match_device(tegra_dma_of_match, &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
 	}
+	cdata = match->data;
 
 	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
 			sizeof(struct tegra_dma_channel), GFP_KERNEL);
@@ -1448,11 +1442,74 @@ static int tegra_dma_runtime_resume(struct device *dev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int tegra_dma_pm_suspend(struct device *dev)
+{
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
+	int i;
+	int ret;
+
+	/* Enable clock before accessing register */
+	ret = tegra_dma_runtime_resume(dev);
+	if (ret < 0)
+		return ret;
+
+	tdma->reg_gen = tdma_read(tdma, TEGRA_APBDMA_GENERAL);
+	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
+
+		ch_reg->csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
+		ch_reg->ahb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR);
+		ch_reg->apb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBPTR);
+		ch_reg->ahb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBSEQ);
+		ch_reg->apb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBSEQ);
+	}
+
+	/* Disable clock */
+	tegra_dma_runtime_suspend(dev);
+	return 0;
+}
+
+static int tegra_dma_pm_resume(struct device *dev)
+{
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
+	int i;
+	int ret;
+
+	/* Enable clock before accessing register */
+	ret = tegra_dma_runtime_resume(dev);
+	if (ret < 0)
+		return ret;
+
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, tdma->reg_gen);
+	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+
+	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
+
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_reg->apb_seq);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_reg->apb_ptr);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_reg->ahb_seq);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_reg->ahb_ptr);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+			(ch_reg->csr & ~TEGRA_APBDMA_CSR_ENB));
+	}
+
+	/* Disable clock */
+	tegra_dma_runtime_suspend(dev);
+	return 0;
+}
+#endif
+
 static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
 #ifdef CONFIG_PM_RUNTIME
 	.runtime_suspend = tegra_dma_runtime_suspend,
 	.runtime_resume = tegra_dma_runtime_resume,
 #endif
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume)
 };
 
 static struct platform_driver tegra_dmac_driver = {
@@ -1460,7 +1517,7 @@ static struct platform_driver tegra_dmac_driver = {
 		.name	= "tegra-apbdma",
 		.owner = THIS_MODULE,
 		.pm	= &tegra_dma_dev_pm_ops,
-		.of_match_table = of_match_ptr(tegra_dma_of_match),
+		.of_match_table = tegra_dma_of_match,
 	},
 	.probe		= tegra_dma_probe,
 	.remove		= tegra_dma_remove,
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 952f823901a6cb5d168ce0b90f2429383ed5bfc3..26107ba6edb33a2fb7d9165aa4ef874bf02f8fbe 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -823,7 +823,7 @@ static struct platform_driver td_driver = {
 		.owner  = THIS_MODULE,
 	},
 	.probe	= td_probe,
-	.remove	= __exit_p(td_remove),
+	.remove	= td_remove,
 };
 
 module_platform_driver(td_driver);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 913f55c76c9915bfb08bde041afdc2d9a7d7590f..a59fb4841d4c18283eae911c076c43dc042f0748 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -1190,7 +1190,7 @@ static int __init txx9dmac_chan_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit txx9dmac_chan_remove(struct platform_device *pdev)
+static int txx9dmac_chan_remove(struct platform_device *pdev)
 {
 	struct txx9dmac_chan *dc = platform_get_drvdata(pdev);
 
@@ -1252,7 +1252,7 @@ static int __init txx9dmac_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit txx9dmac_remove(struct platform_device *pdev)
+static int txx9dmac_remove(struct platform_device *pdev)
 {
 	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
 
@@ -1299,14 +1299,14 @@ static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
 };
 
 static struct platform_driver txx9dmac_chan_driver = {
-	.remove		= __exit_p(txx9dmac_chan_remove),
+	.remove		= txx9dmac_chan_remove,
 	.driver = {
 		.name	= "txx9dmac-chan",
 	},
 };
 
 static struct platform_driver txx9dmac_driver = {
-	.remove		= __exit_p(txx9dmac_remove),
+	.remove		= txx9dmac_remove,
 	.shutdown	= txx9dmac_shutdown,
 	.driver = {
 		.name	= "txx9dmac",
diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h
new file mode 100644
index 0000000000000000000000000000000000000000..d09deabc7bf64773eff4223d631830dc74ef45b3
--- /dev/null
+++ b/include/linux/acpi_dma.h
@@ -0,0 +1,116 @@
+/*
+ * ACPI helpers for DMA request / controller
+ *
+ * Based on of_dma.h
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_ACPI_DMA_H
+#define __LINUX_ACPI_DMA_H
+
+#include <linux/list.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct acpi_dma_spec - slave device DMA resources
+ * @chan_id:	channel unique id
+ * @slave_id:	request line unique id
+ * @dev:	struct device of the DMA controller to be used in the filter
+ *		function
+ */
+struct acpi_dma_spec {
+	int		chan_id;
+	int		slave_id;
+	struct device	*dev;
+};
+
+/**
+ * struct acpi_dma - representation of the registered DMAC
+ * @dma_controllers:	linked list node
+ * @dev:		struct device of this controller
+ * @acpi_dma_xlate:	callback function to find a suitable channel
+ * @data:		private data used by a callback function
+ */
+struct acpi_dma {
+	struct list_head	dma_controllers;
+	struct device		*dev;
+	struct dma_chan		*(*acpi_dma_xlate)
+				(struct acpi_dma_spec *, struct acpi_dma *);
+	void			*data;
+};
+
+/* Used with acpi_dma_simple_xlate() */
+struct acpi_dma_filter_info {
+	dma_cap_mask_t	dma_cap;
+	dma_filter_fn	filter_fn;
+};
+
+#ifdef CONFIG_DMA_ACPI
+
+int acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data);
+int acpi_dma_controller_free(struct device *dev);
+int devm_acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data);
+void devm_acpi_dma_controller_free(struct device *dev);
+
+struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
+						      size_t index);
+struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev,
+						     const char *name);
+
+struct dma_chan *acpi_dma_simple_xlate(struct acpi_dma_spec *dma_spec,
+				       struct acpi_dma *adma);
+#else
+
+static inline int acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	return -ENODEV;
+}
+static inline int acpi_dma_controller_free(struct device *dev)
+{
+	return -ENODEV;
+}
+static inline int devm_acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	return -ENODEV;
+}
+static inline void devm_acpi_dma_controller_free(struct device *dev)
+{
+}
+
+static inline struct dma_chan *acpi_dma_request_slave_chan_by_index(
+		struct device *dev, size_t index)
+{
+	return NULL;
+}
+static inline struct dma_chan *acpi_dma_request_slave_chan_by_name(
+		struct device *dev, const char *name)
+{
+	return NULL;
+}
+
+#define acpi_dma_simple_xlate	NULL
+
+#endif
+
+#define acpi_dma_request_slave_channel	acpi_dma_request_slave_chan_by_index
+
+#endif /* __LINUX_ACPI_DMA_H */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 91ac8da2502017a0463f73e2394c2778bd78376d..96d3e4ab11a91a4ea28d0f864d1cfbee6e249127 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -967,8 +967,9 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 #ifdef CONFIG_DMA_ENGINE
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
-struct dma_chan *dma_request_slave_channel(struct device *dev, char *name);
+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+					dma_filter_fn fn, void *fn_param);
+struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
 void dma_release_channel(struct dma_chan *chan);
 #else
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
@@ -978,13 +979,13 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
 static inline void dma_issue_pending_all(void)
 {
 }
-static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
+static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
 					      dma_filter_fn fn, void *fn_param)
 {
 	return NULL;
 }
 static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
-							 char *name)
+							 const char *name)
 {
 	return NULL;
 }
@@ -1005,9 +1006,9 @@ struct dma_chan *net_dma_find_channel(void);
 	__dma_request_slave_channel_compat(&(mask), x, y, dev, name)
 
 static inline struct dma_chan
-*__dma_request_slave_channel_compat(dma_cap_mask_t *mask, dma_filter_fn fn,
-				  void *fn_param, struct device *dev,
-				  char *name)
+*__dma_request_slave_channel_compat(const dma_cap_mask_t *mask,
+				  dma_filter_fn fn, void *fn_param,
+				  struct device *dev, char *name)
 {
 	struct dma_chan *chan;
 
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index d15073e080dd7130458c971c26d0a462746046a6..364dda734877d2b9a1d045d8082de2542420a475 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -25,7 +25,6 @@ struct of_dma {
 	struct dma_chan		*(*of_dma_xlate)
 				(struct of_phandle_args *, struct of_dma *);
 	void			*of_dma_data;
-	int			use_count;
 };
 
 struct of_dma_filter_info {
@@ -38,9 +37,9 @@ extern int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
 		void *data);
-extern int of_dma_controller_free(struct device_node *np);
+extern void of_dma_controller_free(struct device_node *np);
 extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
-						     char *name);
+						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
 #else
@@ -52,13 +51,12 @@ static inline int of_dma_controller_register(struct device_node *np,
 	return -ENODEV;
 }
 
-static inline int of_dma_controller_free(struct device_node *np)
+static inline void of_dma_controller_free(struct device_node *np)
 {
-	return -ENODEV;
 }
 
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
-						     char *name)
+						     const char *name)
 {
 	return NULL;
 }
diff --git a/include/linux/sudmac.h b/include/linux/sudmac.h
new file mode 100644
index 0000000000000000000000000000000000000000..377b8a5788fa29dd6a3213807cc191c9a2c000fd
--- /dev/null
+++ b/include/linux/sudmac.h
@@ -0,0 +1,52 @@
+/*
+ * Header for the SUDMAC driver
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+#ifndef SUDMAC_H
+#define SUDMAC_H
+
+#include <linux/dmaengine.h>
+#include <linux/shdma-base.h>
+#include <linux/types.h>
+
+/* Used by slave DMA clients to request DMA to/from a specific peripheral */
+struct sudmac_slave {
+	struct shdma_slave	shdma_slave;	/* Set by the platform */
+};
+
+/*
+ * Supplied by platforms to specify, how a DMA channel has to be configured for
+ * a certain peripheral
+ */
+struct sudmac_slave_config {
+	int		slave_id;
+};
+
+struct sudmac_channel {
+	unsigned long	offset;
+	unsigned long	config;
+	unsigned long	wait;		/* The configuable range is 0 to 3 */
+	unsigned long	dint_end_bit;
+};
+
+struct sudmac_pdata {
+	const struct sudmac_slave_config *slave;
+	int slave_num;
+	const struct sudmac_channel *channel;
+	int channel_num;
+};
+
+/* Definitions for the sudmac_channel.config */
+#define SUDMAC_TX_BUFFER_MODE	BIT(0)
+#define SUDMAC_RX_END_MODE	BIT(1)
+
+/* Definitions for the sudmac_channel.dint_end_bit */
+#define SUDMAC_DMA_BIT_CH0	BIT(0)
+#define SUDMAC_DMA_BIT_CH1	BIT(1)
+
+#endif