diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 8ffbc25376a0f59cfcde7b10157a0d58b235a748..840f7d64d4835f5c93227ea9f948ac7f83116b31 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -165,3 +165,21 @@ Description:
 
 		Not all drivers support this attribute.  If it isn't supported,
 		attempts to read or write it will yield I/O errors.
+
+What:		/sys/devices/.../power/pm_qos_latency_us
+Date:		March 2012
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../power/pm_qos_resume_latency_us attribute
+		contains the PM QoS resume latency limit for the given device,
+		which is the maximum allowed time it can take to resume the
+		device, after it has been suspended at run time, from a resume
+		request to the moment the device will be ready to process I/O,
+		in microseconds.  If it is equal to 0, however, this means that
+		the PM QoS resume latency may be arbitrary.
+
+		Not all drivers support this attribute.  If it isn't supported,
+		it is not present.
+
+		This attribute has no effect on system-wide suspend/resume and
+		hibernation.
diff --git a/Documentation/devicetree/bindings/arm/exynos/power_domain.txt b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6528e215c5fe54e3c7e74ba2f909342e3773b635
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt
@@ -0,0 +1,21 @@
+* Samsung Exynos Power Domains
+
+Exynos processors include support for multiple power domains which are used
+to gate power to one or more peripherals on the processor.
+
+Required Properties:
+- compatiable: should be one of the following.
+    * samsung,exynos4210-pd - for exynos4210 type power domain.
+- reg: physical base address of the controller and length of memory mapped
+    region.
+
+Optional Properties:
+- samsung,exynos4210-pd-off: Specifies that the power domain is in turned-off
+    state during boot and remains to be turned-off until explicitly turned-on.
+
+Example:
+
+	lcd0: power-domain-lcd0 {
+		compatible = "samsung,exynos4210-pd";
+		reg = <0x10023C00 0x10>;
+	};
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 20af7def23c8e652ac0012b6f6fc731be0ee200d..872815cd41d30bcc8e57df31b6b283a4872a2690 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -96,6 +96,12 @@ struct dev_pm_ops {
 	int (*thaw)(struct device *dev);
 	int (*poweroff)(struct device *dev);
 	int (*restore)(struct device *dev);
+	int (*suspend_late)(struct device *dev);
+	int (*resume_early)(struct device *dev);
+	int (*freeze_late)(struct device *dev);
+	int (*thaw_early)(struct device *dev);
+	int (*poweroff_late)(struct device *dev);
+	int (*restore_early)(struct device *dev);
 	int (*suspend_noirq)(struct device *dev);
 	int (*resume_noirq)(struct device *dev);
 	int (*freeze_noirq)(struct device *dev);
@@ -305,7 +311,7 @@ Entering System Suspend
 -----------------------
 When the system goes into the standby or memory sleep state, the phases are:
 
-		prepare, suspend, suspend_noirq.
+		prepare, suspend, suspend_late, suspend_noirq.
 
     1.	The prepare phase is meant to prevent races by preventing new devices
 	from being registered; the PM core would never know that all the
@@ -324,7 +330,12 @@ When the system goes into the standby or memory sleep state, the phases are:
 	appropriate low-power state, depending on the bus type the device is on,
 	and they may enable wakeup events.
 
-    3.	The suspend_noirq phase occurs after IRQ handlers have been disabled,
+    3	For a number of devices it is convenient to split suspend into the
+	"quiesce device" and "save device state" phases, in which cases
+	suspend_late is meant to do the latter.  It is always executed after
+	runtime power management has been disabled for all devices.
+
+    4.	The suspend_noirq phase occurs after IRQ handlers have been disabled,
 	which means that the driver's interrupt handler will not be called while
 	the callback method is running.  The methods should save the values of
 	the device's registers that weren't saved previously and finally put the
@@ -359,7 +370,7 @@ Leaving System Suspend
 ----------------------
 When resuming from standby or memory sleep, the phases are:
 
-		resume_noirq, resume, complete.
+		resume_noirq, resume_early, resume, complete.
 
     1.	The resume_noirq callback methods should perform any actions needed
 	before the driver's interrupt handlers are invoked.  This generally
@@ -375,14 +386,18 @@ When resuming from standby or memory sleep, the phases are:
 	device driver's ->pm.resume_noirq() method to perform device-specific
 	actions.
 
-    2.	The resume methods should bring the the device back to its operating
+    2.	The resume_early methods should prepare devices for the execution of
+	the resume methods.  This generally involves undoing the actions of the
+	preceding suspend_late phase.
+
+    3	The resume methods should bring the the device back to its operating
 	state, so that it can perform normal I/O.  This generally involves
 	undoing the actions of the suspend phase.
 
-    3.	The complete phase uses only a bus callback.  The method should undo the
-	actions of the prepare phase.  Note, however, that new children may be
-	registered below the device as soon as the resume callbacks occur; it's
-	not necessary to wait until the complete phase.
+    4.	The complete phase should undo the actions of the prepare phase.  Note,
+	however, that new children may be registered below the device as soon as
+	the resume callbacks occur; it's not necessary to wait until the
+	complete phase.
 
 At the end of these phases, drivers should be as functional as they were before
 suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
@@ -429,8 +444,8 @@ an image of the system memory while everything is stable, reactivate all
 devices (thaw), write the image to permanent storage, and finally shut down the
 system (poweroff).  The phases used to accomplish this are:
 
-	prepare, freeze, freeze_noirq, thaw_noirq, thaw, complete,
-	prepare, poweroff, poweroff_noirq
+	prepare, freeze, freeze_late, freeze_noirq, thaw_noirq, thaw_early,
+	thaw, complete, prepare, poweroff, poweroff_late, poweroff_noirq
 
     1.	The prepare phase is discussed in the "Entering System Suspend" section
 	above.
@@ -441,7 +456,11 @@ system (poweroff).  The phases used to accomplish this are:
 	save time it's best not to do so.  Also, the device should not be
 	prepared to generate wakeup events.
 
-    3.	The freeze_noirq phase is analogous to the suspend_noirq phase discussed
+    3.	The freeze_late phase is analogous to the suspend_late phase described
+	above, except that the device should not be put in a low-power state and
+	should not be allowed to generate wakeup events by it.
+
+    4.	The freeze_noirq phase is analogous to the suspend_noirq phase discussed
 	above, except again that the device should not be put in a low-power
 	state and should not be allowed to generate wakeup events.
 
@@ -449,15 +468,19 @@ At this point the system image is created.  All devices should be inactive and
 the contents of memory should remain undisturbed while this happens, so that the
 image forms an atomic snapshot of the system state.
 
-    4.	The thaw_noirq phase is analogous to the resume_noirq phase discussed
+    5.	The thaw_noirq phase is analogous to the resume_noirq phase discussed
 	above.  The main difference is that its methods can assume the device is
 	in the same state as at the end of the freeze_noirq phase.
 
-    5.	The thaw phase is analogous to the resume phase discussed above.  Its
+    6.	The thaw_early phase is analogous to the resume_early phase described
+	above.  Its methods should undo the actions of the preceding
+	freeze_late, if necessary.
+
+    7.	The thaw phase is analogous to the resume phase discussed above.  Its
 	methods should bring the device back to an operating state, so that it
 	can be used for saving the image if necessary.
 
-    6.	The complete phase is discussed in the "Leaving System Suspend" section
+    8.	The complete phase is discussed in the "Leaving System Suspend" section
 	above.
 
 At this point the system image is saved, and the devices then need to be
@@ -465,16 +488,19 @@ prepared for the upcoming system shutdown.  This is much like suspending them
 before putting the system into the standby or memory sleep state, and the phases
 are similar.
 
-    7.	The prepare phase is discussed above.
+    9.	The prepare phase is discussed above.
+
+    10.	The poweroff phase is analogous to the suspend phase.
 
-    8.	The poweroff phase is analogous to the suspend phase.
+    11.	The poweroff_late phase is analogous to the suspend_late phase.
 
-    9.	The poweroff_noirq phase is analogous to the suspend_noirq phase.
+    12.	The poweroff_noirq phase is analogous to the suspend_noirq phase.
 
-The poweroff and poweroff_noirq callbacks should do essentially the same things
-as the suspend and suspend_noirq callbacks.  The only notable difference is that
-they need not store the device register values, because the registers should
-already have been stored during the freeze or freeze_noirq phases.
+The poweroff, poweroff_late and poweroff_noirq callbacks should do essentially
+the same things as the suspend, suspend_late and suspend_noirq callbacks,
+respectively.  The only notable difference is that they need not store the
+device register values, because the registers should already have been stored
+during the freeze, freeze_late or freeze_noirq phases.
 
 
 Leaving Hibernation
@@ -518,22 +544,25 @@ To achieve this, the image kernel must restore the devices' pre-hibernation
 functionality.  The operation is much like waking up from the memory sleep
 state, although it involves different phases:
 
-	restore_noirq, restore, complete
+	restore_noirq, restore_early, restore, complete
 
     1.	The restore_noirq phase is analogous to the resume_noirq phase.
 
-    2.	The restore phase is analogous to the resume phase.
+    2.	The restore_early phase is analogous to the resume_early phase.
+
+    3.	The restore phase is analogous to the resume phase.
 
-    3.	The complete phase is discussed above.
+    4.	The complete phase is discussed above.
 
-The main difference from resume[_noirq] is that restore[_noirq] must assume the
-device has been accessed and reconfigured by the boot loader or the boot kernel.
-Consequently the state of the device may be different from the state remembered
-from the freeze and freeze_noirq phases.  The device may even need to be reset
-and completely re-initialized.  In many cases this difference doesn't matter, so
-the resume[_noirq] and restore[_norq] method pointers can be set to the same
-routines.  Nevertheless, different callback pointers are used in case there is a
-situation where it actually matters.
+The main difference from resume[_early|_noirq] is that restore[_early|_noirq]
+must assume the device has been accessed and reconfigured by the boot loader or
+the boot kernel.  Consequently the state of the device may be different from the
+state remembered from the freeze, freeze_late and freeze_noirq phases.  The
+device may even need to be reset and completely re-initialized.  In many cases
+this difference doesn't matter, so the resume[_early|_noirq] and
+restore[_early|_norq] method pointers can be set to the same routines.
+Nevertheless, different callback pointers are used in case there is a situation
+where it actually does matter.
 
 
 Device Power Management Domains
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index ebd7490ef1df8eefdbaeb94ad656aeb091f9368f..ec715cd78fbb7028bde53c0664a513701b162858 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -63,6 +63,27 @@ devices have been reinitialized, the function thaw_processes() is called in
 order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
 have been frozen leave __refrigerator() and continue running.
 
+
+Rationale behind the functions dealing with freezing and thawing of tasks:
+-------------------------------------------------------------------------
+
+freeze_processes():
+  - freezes only userspace tasks
+
+freeze_kernel_threads():
+  - freezes all tasks (including kernel threads) because we can't freeze
+    kernel threads without freezing userspace tasks
+
+thaw_kernel_threads():
+  - thaws only kernel threads; this is particularly useful if we need to do
+    anything special in between thawing of kernel threads and thawing of
+    userspace tasks, or if we want to postpone the thawing of userspace tasks
+
+thaw_processes():
+  - thaws all tasks (including kernel threads) because we can't thaw userspace
+    tasks without thawing kernel threads
+
+
 III. Which kernel threads are freezable?
 
 Kernel threads are not freezable by default.  However, a kernel thread may clear
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 5d602f68a0e88735d5cee2bf0e2b7f3da4fc95f2..dfad6538b2737f5acdc4c2644342627d2d6010f2 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -34,6 +34,7 @@ config CPU_EXYNOS4210
 	select ARM_CPU_SUSPEND if PM
 	select S5P_PM if PM
 	select S5P_SLEEP if PM
+	select PM_GENERIC_DOMAINS
 	help
 	  Enable EXYNOS4210 CPU support
 
@@ -74,11 +75,6 @@ config EXYNOS4_SETUP_FIMD0
 	help
 	  Common setup code for FIMD0.
 
-config EXYNOS4_DEV_PD
-	bool
-	help
-	  Compile in platform device definitions for Power Domain
-
 config EXYNOS4_DEV_SYSMMU
 	bool
 	help
@@ -195,7 +191,6 @@ config MACH_SMDKV310
 	select EXYNOS4_DEV_AHCI
 	select SAMSUNG_DEV_KEYPAD
 	select EXYNOS4_DEV_DMA
-	select EXYNOS4_DEV_PD
 	select SAMSUNG_DEV_PWM
 	select EXYNOS4_DEV_USB_OHCI
 	select EXYNOS4_DEV_SYSMMU
@@ -243,7 +238,6 @@ config MACH_UNIVERSAL_C210
 	select S5P_DEV_ONENAND
 	select S5P_DEV_TV
 	select EXYNOS4_DEV_DMA
-	select EXYNOS4_DEV_PD
 	select EXYNOS4_SETUP_FIMD0
 	select EXYNOS4_SETUP_I2C1
 	select EXYNOS4_SETUP_I2C3
@@ -277,7 +271,6 @@ config MACH_NURI
 	select S5P_DEV_USB_EHCI
 	select S5P_SETUP_MIPIPHY
 	select EXYNOS4_DEV_DMA
-	select EXYNOS4_DEV_PD
 	select EXYNOS4_SETUP_FIMC
 	select EXYNOS4_SETUP_FIMD0
 	select EXYNOS4_SETUP_I2C1
@@ -310,7 +303,6 @@ config MACH_ORIGEN
 	select SAMSUNG_DEV_BACKLIGHT
 	select SAMSUNG_DEV_PWM
 	select EXYNOS4_DEV_DMA
-	select EXYNOS4_DEV_PD
 	select EXYNOS4_DEV_USB_OHCI
 	select EXYNOS4_SETUP_FIMD0
 	select EXYNOS4_SETUP_SDHCI
diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
index 5fc202cdfdb668483a227f1348dd12be77cf7f40..d9191f9a7af8560c69d60f39a703e687cd9bfca6 100644
--- a/arch/arm/mach-exynos/Makefile
+++ b/arch/arm/mach-exynos/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_CPU_EXYNOS4210)	+= clock-exynos4210.o
 obj-$(CONFIG_SOC_EXYNOS4212)	+= clock-exynos4212.o
 
 obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 
 obj-$(CONFIG_ARCH_EXYNOS4)	+= pmu.o
@@ -45,7 +46,6 @@ obj-$(CONFIG_MACH_EXYNOS4_DT)		+= mach-exynos4-dt.o
 
 obj-$(CONFIG_ARCH_EXYNOS4)		+= dev-audio.o
 obj-$(CONFIG_EXYNOS4_DEV_AHCI)		+= dev-ahci.o
-obj-$(CONFIG_EXYNOS4_DEV_PD)		+= dev-pd.o
 obj-$(CONFIG_EXYNOS4_DEV_SYSMMU)	+= dev-sysmmu.o
 obj-$(CONFIG_EXYNOS4_DEV_DWMCI)		+= dev-dwmci.o
 obj-$(CONFIG_EXYNOS4_DEV_DMA)		+= dma.o
diff --git a/arch/arm/mach-exynos/dev-pd.c b/arch/arm/mach-exynos/dev-pd.c
deleted file mode 100644
index 3273f25d6a75596ee24939ebcaf73ede893d87a0..0000000000000000000000000000000000000000
--- a/arch/arm/mach-exynos/dev-pd.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/* linux/arch/arm/mach-exynos4/dev-pd.c
- *
- * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * EXYNOS4 - Power Domain support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-
-#include <mach/regs-pmu.h>
-
-#include <plat/pd.h>
-
-static int exynos4_pd_enable(struct device *dev)
-{
-	struct samsung_pd_info *pdata =  dev->platform_data;
-	u32 timeout;
-
-	__raw_writel(S5P_INT_LOCAL_PWR_EN, pdata->base);
-
-	/* Wait max 1ms */
-	timeout = 10;
-	while ((__raw_readl(pdata->base + 0x4) & S5P_INT_LOCAL_PWR_EN)
-		!= S5P_INT_LOCAL_PWR_EN) {
-		if (timeout == 0) {
-			printk(KERN_ERR "Power domain %s enable failed.\n",
-				dev_name(dev));
-			return -ETIMEDOUT;
-		}
-		timeout--;
-		udelay(100);
-	}
-
-	return 0;
-}
-
-static int exynos4_pd_disable(struct device *dev)
-{
-	struct samsung_pd_info *pdata =  dev->platform_data;
-	u32 timeout;
-
-	__raw_writel(0, pdata->base);
-
-	/* Wait max 1ms */
-	timeout = 10;
-	while (__raw_readl(pdata->base + 0x4) & S5P_INT_LOCAL_PWR_EN) {
-		if (timeout == 0) {
-			printk(KERN_ERR "Power domain %s disable failed.\n",
-				dev_name(dev));
-			return -ETIMEDOUT;
-		}
-		timeout--;
-		udelay(100);
-	}
-
-	return 0;
-}
-
-struct platform_device exynos4_device_pd[] = {
-	{
-		.name		= "samsung-pd",
-		.id		= 0,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_MFC_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 1,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_G3D_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 2,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_LCD0_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 3,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_LCD1_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 4,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_TV_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 5,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_CAM_CONF,
-			},
-		},
-	}, {
-		.name		= "samsung-pd",
-		.id		= 6,
-		.dev = {
-			.platform_data = &(struct samsung_pd_info) {
-				.enable		= exynos4_pd_enable,
-				.disable	= exynos4_pd_disable,
-				.base		= S5P_PMU_GPS_CONF,
-			},
-		},
-	},
-};
diff --git a/arch/arm/mach-exynos/mach-nuri.c b/arch/arm/mach-exynos/mach-nuri.c
index 435261f83f46d5d8ada440e17bdc65a0b1e19a1f..aa37179d776c6fd3ae0bd8daeec51a80ee134bc7 100644
--- a/arch/arm/mach-exynos/mach-nuri.c
+++ b/arch/arm/mach-exynos/mach-nuri.c
@@ -1263,9 +1263,6 @@ static struct platform_device *nuri_devices[] __initdata = {
 	&s5p_device_mfc,
 	&s5p_device_mfc_l,
 	&s5p_device_mfc_r,
-	&exynos4_device_pd[PD_MFC],
-	&exynos4_device_pd[PD_LCD0],
-	&exynos4_device_pd[PD_CAM],
 	&s5p_device_fimc_md,
 
 	/* NURI Devices */
@@ -1315,14 +1312,6 @@ static void __init nuri_machine_init(void)
 
 	/* Last */
 	platform_add_devices(nuri_devices, ARRAY_SIZE(nuri_devices));
-	s5p_device_mfc.dev.parent = &exynos4_device_pd[PD_MFC].dev;
-	s5p_device_fimd0.dev.parent = &exynos4_device_pd[PD_LCD0].dev;
-
-	s5p_device_fimc0.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc1.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc2.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc3.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_mipi_csis0.dev.parent = &exynos4_device_pd[PD_CAM].dev;
 }
 
 MACHINE_START(NURI, "NURI")
diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c
index 0679b8ad2d1e1a4263521f805d7afd246334a037..fa5c4a59b0aadfcd3022c304c5d0cb49fe7fa5ad 100644
--- a/arch/arm/mach-exynos/mach-origen.c
+++ b/arch/arm/mach-exynos/mach-origen.c
@@ -621,13 +621,6 @@ static struct platform_device *origen_devices[] __initdata = {
 	&s5p_device_mfc_r,
 	&s5p_device_mixer,
 	&exynos4_device_ohci,
-	&exynos4_device_pd[PD_LCD0],
-	&exynos4_device_pd[PD_TV],
-	&exynos4_device_pd[PD_G3D],
-	&exynos4_device_pd[PD_LCD1],
-	&exynos4_device_pd[PD_CAM],
-	&exynos4_device_pd[PD_GPS],
-	&exynos4_device_pd[PD_MFC],
 	&origen_device_gpiokeys,
 	&origen_lcd_hv070wsa,
 };
@@ -695,13 +688,6 @@ static void __init origen_machine_init(void)
 
 	platform_add_devices(origen_devices, ARRAY_SIZE(origen_devices));
 
-	s5p_device_fimd0.dev.parent = &exynos4_device_pd[PD_LCD0].dev;
-
-	s5p_device_hdmi.dev.parent = &exynos4_device_pd[PD_TV].dev;
-	s5p_device_mixer.dev.parent = &exynos4_device_pd[PD_TV].dev;
-
-	s5p_device_mfc.dev.parent = &exynos4_device_pd[PD_MFC].dev;
-
 	samsung_bl_set(&origen_bl_gpio_info, &origen_bl_data);
 }
 
diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c
index b2c5557f50e42e1c39c3b0d2f7cfc8f4035a44fd..5258b85636765bd24e9e5e478ec7fb586bf55cc9 100644
--- a/arch/arm/mach-exynos/mach-smdkv310.c
+++ b/arch/arm/mach-exynos/mach-smdkv310.c
@@ -277,13 +277,6 @@ static struct platform_device *smdkv310_devices[] __initdata = {
 	&s5p_device_mfc,
 	&s5p_device_mfc_l,
 	&s5p_device_mfc_r,
-	&exynos4_device_pd[PD_MFC],
-	&exynos4_device_pd[PD_G3D],
-	&exynos4_device_pd[PD_LCD0],
-	&exynos4_device_pd[PD_LCD1],
-	&exynos4_device_pd[PD_CAM],
-	&exynos4_device_pd[PD_TV],
-	&exynos4_device_pd[PD_GPS],
 	&exynos4_device_spdif,
 	&exynos4_device_sysmmu,
 	&samsung_asoc_dma,
@@ -336,10 +329,6 @@ static void s5p_tv_setup(void)
 	WARN_ON(gpio_request_one(EXYNOS4_GPX3(7), GPIOF_IN, "hpd-plug"));
 	s3c_gpio_cfgpin(EXYNOS4_GPX3(7), S3C_GPIO_SFN(0x3));
 	s3c_gpio_setpull(EXYNOS4_GPX3(7), S3C_GPIO_PULL_NONE);
-
-	/* setup dependencies between TV devices */
-	s5p_device_hdmi.dev.parent = &exynos4_device_pd[PD_TV].dev;
-	s5p_device_mixer.dev.parent = &exynos4_device_pd[PD_TV].dev;
 }
 
 static void __init smdkv310_map_io(void)
@@ -379,7 +368,6 @@ static void __init smdkv310_machine_init(void)
 	clk_xusbxti.rate = 24000000;
 
 	platform_add_devices(smdkv310_devices, ARRAY_SIZE(smdkv310_devices));
-	s5p_device_mfc.dev.parent = &exynos4_device_pd[PD_MFC].dev;
 }
 
 MACHINE_START(SMDKV310, "SMDKV310")
diff --git a/arch/arm/mach-exynos/mach-universal_c210.c b/arch/arm/mach-exynos/mach-universal_c210.c
index 38939956c34f7097958eda9fcd8690feac4e84ba..b2d495b31094f5f2a82498c122dfc30a64398ec9 100644
--- a/arch/arm/mach-exynos/mach-universal_c210.c
+++ b/arch/arm/mach-exynos/mach-universal_c210.c
@@ -971,7 +971,6 @@ static struct platform_device *universal_devices[] __initdata = {
 	&s3c_device_i2c5,
 	&s5p_device_i2c_hdmiphy,
 	&hdmi_fixed_voltage,
-	&exynos4_device_pd[PD_TV],
 	&s5p_device_hdmi,
 	&s5p_device_sdo,
 	&s5p_device_mixer,
@@ -984,9 +983,6 @@ static struct platform_device *universal_devices[] __initdata = {
 	&s5p_device_mfc,
 	&s5p_device_mfc_l,
 	&s5p_device_mfc_r,
-	&exynos4_device_pd[PD_MFC],
-	&exynos4_device_pd[PD_LCD0],
-	&exynos4_device_pd[PD_CAM],
 	&cam_i_core_fixed_reg_dev,
 	&cam_s_if_fixed_reg_dev,
 	&s5p_device_fimc_md,
@@ -1005,10 +1001,6 @@ void s5p_tv_setup(void)
 	gpio_request_one(EXYNOS4_GPX3(7), GPIOF_IN, "hpd-plug");
 	s3c_gpio_cfgpin(EXYNOS4_GPX3(7), S3C_GPIO_SFN(0x3));
 	s3c_gpio_setpull(EXYNOS4_GPX3(7), S3C_GPIO_PULL_NONE);
-
-	/* setup dependencies between TV devices */
-	s5p_device_hdmi.dev.parent = &exynos4_device_pd[PD_TV].dev;
-	s5p_device_mixer.dev.parent = &exynos4_device_pd[PD_TV].dev;
 }
 
 static void __init universal_reserve(void)
@@ -1042,15 +1034,6 @@ static void __init universal_machine_init(void)
 
 	/* Last */
 	platform_add_devices(universal_devices, ARRAY_SIZE(universal_devices));
-
-	s5p_device_mfc.dev.parent = &exynos4_device_pd[PD_MFC].dev;
-	s5p_device_fimd0.dev.parent = &exynos4_device_pd[PD_LCD0].dev;
-
-	s5p_device_fimc0.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc1.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc2.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_fimc3.dev.parent = &exynos4_device_pd[PD_CAM].dev;
-	s5p_device_mipi_csis0.dev.parent = &exynos4_device_pd[PD_CAM].dev;
 }
 
 MACHINE_START(UNIVERSAL_C210, "UNIVERSAL_C210")
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
new file mode 100644
index 0000000000000000000000000000000000000000..0b04af2b13ccdb0cb49d646d56f146ab1f32f88e
--- /dev/null
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -0,0 +1,195 @@
+/*
+ * Exynos Generic power domain support.
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Implementation of Exynos specific power domain control which is used in
+ * conjunction with runtime-pm. Support for both device-tree and non-device-tree
+ * based power domain support is included.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/pm_domain.h>
+#include <linux/delay.h>
+#include <linux/of_address.h>
+
+#include <mach/regs-pmu.h>
+#include <plat/devs.h>
+
+/*
+ * Exynos specific wrapper around the generic power domain
+ */
+struct exynos_pm_domain {
+	void __iomem *base;
+	char const *name;
+	bool is_off;
+	struct generic_pm_domain pd;
+};
+
+static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on)
+{
+	struct exynos_pm_domain *pd;
+	void __iomem *base;
+	u32 timeout, pwr;
+	char *op;
+
+	pd = container_of(domain, struct exynos_pm_domain, pd);
+	base = pd->base;
+
+	pwr = power_on ? S5P_INT_LOCAL_PWR_EN : 0;
+	__raw_writel(pwr, base);
+
+	/* Wait max 1ms */
+	timeout = 10;
+
+	while ((__raw_readl(base + 0x4) & S5P_INT_LOCAL_PWR_EN)	!= pwr) {
+		if (!timeout) {
+			op = (power_on) ? "enable" : "disable";
+			pr_err("Power domain %s %s failed\n", domain->name, op);
+			return -ETIMEDOUT;
+		}
+		timeout--;
+		cpu_relax();
+		usleep_range(80, 100);
+	}
+	return 0;
+}
+
+static int exynos_pd_power_on(struct generic_pm_domain *domain)
+{
+	return exynos_pd_power(domain, true);
+}
+
+static int exynos_pd_power_off(struct generic_pm_domain *domain)
+{
+	return exynos_pd_power(domain, false);
+}
+
+#define EXYNOS_GPD(PD, BASE, NAME)			\
+static struct exynos_pm_domain PD = {			\
+	.base = (void __iomem *)BASE,			\
+	.name = NAME,					\
+	.pd = {						\
+		.power_off = exynos_pd_power_off,	\
+		.power_on = exynos_pd_power_on,	\
+	},						\
+}
+
+#ifdef CONFIG_OF
+static __init int exynos_pm_dt_parse_domains(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") {
+		struct exynos_pm_domain *pd;
+
+		pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+		if (!pd) {
+			pr_err("%s: failed to allocate memory for domain\n",
+					__func__);
+			return -ENOMEM;
+		}
+
+		if (of_get_property(np, "samsung,exynos4210-pd-off", NULL))
+			pd->is_off = true;
+		pd->name = np->name;
+		pd->base = of_iomap(np, 0);
+		pd->pd.power_off = exynos_pd_power_off;
+		pd->pd.power_on = exynos_pd_power_on;
+		pd->pd.of_node = np;
+		pm_genpd_init(&pd->pd, NULL, false);
+	}
+	return 0;
+}
+#else
+static __init int exynos_pm_dt_parse_domains(void)
+{
+	return 0;
+}
+#endif /* CONFIG_OF */
+
+static __init void exynos_pm_add_dev_to_genpd(struct platform_device *pdev,
+						struct exynos_pm_domain *pd)
+{
+	if (pdev->dev.bus) {
+		if (pm_genpd_add_device(&pd->pd, &pdev->dev))
+			pr_info("%s: error in adding %s device to %s power"
+				"domain\n", __func__, dev_name(&pdev->dev),
+				pd->name);
+	}
+}
+
+EXYNOS_GPD(exynos4_pd_mfc, S5P_PMU_MFC_CONF, "pd-mfc");
+EXYNOS_GPD(exynos4_pd_g3d, S5P_PMU_G3D_CONF, "pd-g3d");
+EXYNOS_GPD(exynos4_pd_lcd0, S5P_PMU_LCD0_CONF, "pd-lcd0");
+EXYNOS_GPD(exynos4_pd_lcd1, S5P_PMU_LCD1_CONF, "pd-lcd1");
+EXYNOS_GPD(exynos4_pd_tv, S5P_PMU_TV_CONF, "pd-tv");
+EXYNOS_GPD(exynos4_pd_cam, S5P_PMU_CAM_CONF, "pd-cam");
+EXYNOS_GPD(exynos4_pd_gps, S5P_PMU_GPS_CONF, "pd-gps");
+
+static struct exynos_pm_domain *exynos4_pm_domains[] = {
+	&exynos4_pd_mfc,
+	&exynos4_pd_g3d,
+	&exynos4_pd_lcd0,
+	&exynos4_pd_lcd1,
+	&exynos4_pd_tv,
+	&exynos4_pd_cam,
+	&exynos4_pd_gps,
+};
+
+static __init int exynos4_pm_init_power_domain(void)
+{
+	int idx;
+
+	if (of_have_populated_dt())
+		return exynos_pm_dt_parse_domains();
+
+	for (idx = 0; idx < ARRAY_SIZE(exynos4_pm_domains); idx++)
+		pm_genpd_init(&exynos4_pm_domains[idx]->pd, NULL,
+				exynos4_pm_domains[idx]->is_off);
+
+#ifdef CONFIG_S5P_DEV_FIMD0
+	exynos_pm_add_dev_to_genpd(&s5p_device_fimd0, &exynos4_pd_lcd0);
+#endif
+#ifdef CONFIG_S5P_DEV_TV
+	exynos_pm_add_dev_to_genpd(&s5p_device_hdmi, &exynos4_pd_tv);
+	exynos_pm_add_dev_to_genpd(&s5p_device_mixer, &exynos4_pd_tv);
+#endif
+#ifdef CONFIG_S5P_DEV_MFC
+	exynos_pm_add_dev_to_genpd(&s5p_device_mfc, &exynos4_pd_mfc);
+#endif
+#ifdef CONFIG_S5P_DEV_FIMC0
+	exynos_pm_add_dev_to_genpd(&s5p_device_fimc0, &exynos4_pd_cam);
+#endif
+#ifdef CONFIG_S5P_DEV_FIMC1
+	exynos_pm_add_dev_to_genpd(&s5p_device_fimc1, &exynos4_pd_cam);
+#endif
+#ifdef CONFIG_S5P_DEV_FIMC2
+	exynos_pm_add_dev_to_genpd(&s5p_device_fimc2, &exynos4_pd_cam);
+#endif
+#ifdef CONFIG_S5P_DEV_FIMC3
+	exynos_pm_add_dev_to_genpd(&s5p_device_fimc3, &exynos4_pd_cam);
+#endif
+#ifdef CONFIG_S5P_DEV_CSIS0
+	exynos_pm_add_dev_to_genpd(&s5p_device_mipi_csis0, &exynos4_pd_cam);
+#endif
+#ifdef CONFIG_S5P_DEV_CSIS1
+	exynos_pm_add_dev_to_genpd(&s5p_device_mipi_csis1, &exynos4_pd_cam);
+#endif
+	return 0;
+}
+arch_initcall(exynos4_pm_init_power_domain);
+
+static __init int exynos_pm_late_initcall(void)
+{
+	pm_genpd_poweroff_unused();
+	return 0;
+}
+late_initcall(exynos_pm_late_initcall);
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index a83cf51fc09906a13018472b4f563baebaf7bcb5..cccf91b8fae196e56cf4770b078f0f08cda9b538 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -1043,6 +1043,8 @@ void __init sh7372_add_standard_devices(void)
 	sh7372_add_device_to_domain(&sh7372_a4r, &veu2_device);
 	sh7372_add_device_to_domain(&sh7372_a4r, &veu3_device);
 	sh7372_add_device_to_domain(&sh7372_a4r, &jpu_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &tmu00_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &tmu01_device);
 }
 
 void __init sh7372_add_early_devices(void)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f76623cbe263ee0f95d847845118f84f56974be9..5d56931a15b352b70262968db9b94a7ab3ac9ea6 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1234,8 +1234,7 @@ static int suspend(int vetoable)
 	struct apm_user	*as;
 
 	dpm_suspend_start(PMSG_SUSPEND);
-
-	dpm_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_end(PMSG_SUSPEND);
 
 	local_irq_disable();
 	syscore_suspend();
@@ -1259,9 +1258,9 @@ static int suspend(int vetoable)
 	syscore_resume();
 	local_irq_enable();
 
-	dpm_resume_noirq(PMSG_RESUME);
-
+	dpm_resume_start(PMSG_RESUME);
 	dpm_resume_end(PMSG_RESUME);
+
 	queue_event(APM_NORMAL_RESUME, NULL);
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1276,7 @@ static void standby(void)
 {
 	int err;
 
-	dpm_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_end(PMSG_SUSPEND);
 
 	local_irq_disable();
 	syscore_suspend();
@@ -1291,7 +1290,7 @@ static void standby(void)
 	syscore_resume();
 	local_irq_enable();
 
-	dpm_resume_noirq(PMSG_RESUME);
+	dpm_resume_start(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 978bbf7ac6af03bb26eed17329d3a24f6bf5337e..73ce9fbe983961598d53f3bbeef0057cda084d4b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -366,7 +366,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 	not_suspended = 0;
 	list_for_each_entry(pdd, &genpd->dev_list, list_node)
 		if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev)
-		    || pdd->dev->power.irq_safe))
+		    || pdd->dev->power.irq_safe || to_gpd_data(pdd)->always_on))
 			not_suspended++;
 
 	if (not_suspended > genpd->in_progress)
@@ -503,6 +503,9 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 
 	might_sleep_if(!genpd->dev_irq_safe);
 
+	if (dev_gpd_data(dev)->always_on)
+		return -EBUSY;
+
 	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
 	if (stop_ok && !stop_ok(dev))
 		return -EBUSY;
@@ -764,8 +767,10 @@ static int pm_genpd_prepare(struct device *dev)
 
 	genpd_acquire_lock(genpd);
 
-	if (genpd->prepared_count++ == 0)
+	if (genpd->prepared_count++ == 0) {
+		genpd->suspended_count = 0;
 		genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF;
+	}
 
 	genpd_release_lock(genpd);
 
@@ -820,17 +825,16 @@ static int pm_genpd_suspend(struct device *dev)
 }
 
 /**
- * pm_genpd_suspend_noirq - Late suspend of a device from an I/O PM domain.
+ * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain.
  * @dev: Device to suspend.
  *
  * Carry out a late suspend of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_suspend_noirq(struct device *dev)
+static int pm_genpd_suspend_late(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -838,14 +842,28 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
+	return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev);
+}
 
-	ret = genpd_suspend_late(genpd, dev);
-	if (ret)
-		return ret;
+/**
+ * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
+ * @dev: Device to suspend.
+ *
+ * Stop the device and remove power from the domain if all devices in it have
+ * been stopped.
+ */
+static int pm_genpd_suspend_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
 
-	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	if (genpd->suspend_power_off || dev_gpd_data(dev)->always_on
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
 		return 0;
 
 	genpd_stop_dev(genpd, dev);
@@ -862,13 +880,10 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 }
 
 /**
- * pm_genpd_resume_noirq - Early resume of a device from an I/O power domain.
+ * pm_genpd_resume_noirq - Start of resume of device in an I/O PM domain.
  * @dev: Device to resume.
  *
- * Carry out an early resume of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
+ * Restore power to the device's PM domain, if necessary, and start the device.
  */
 static int pm_genpd_resume_noirq(struct device *dev)
 {
@@ -880,7 +895,8 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
+	if (genpd->suspend_power_off || dev_gpd_data(dev)->always_on
+	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
 		return 0;
 
 	/*
@@ -890,13 +906,34 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	 */
 	pm_genpd_poweron(genpd);
 	genpd->suspended_count--;
-	genpd_start_dev(genpd, dev);
 
-	return genpd_resume_early(genpd, dev);
+	return genpd_start_dev(genpd, dev);
 }
 
 /**
- * pm_genpd_resume - Resume a device belonging to an I/O power domain.
+ * pm_genpd_resume_early - Early resume of a device in an I/O PM domain.
+ * @dev: Device to resume.
+ *
+ * Carry out an early resume of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_resume_early(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev);
+}
+
+/**
+ * pm_genpd_resume - Resume of device in an I/O PM domain.
  * @dev: Device to resume.
  *
  * Resume a device under the assumption that its pm_domain field points to the
@@ -917,7 +954,7 @@ static int pm_genpd_resume(struct device *dev)
 }
 
 /**
- * pm_genpd_freeze - Freeze a device belonging to an I/O power domain.
+ * pm_genpd_freeze - Freezing a device in an I/O PM domain.
  * @dev: Device to freeze.
  *
  * Freeze a device under the assumption that its pm_domain field points to the
@@ -938,7 +975,29 @@ static int pm_genpd_freeze(struct device *dev)
 }
 
 /**
- * pm_genpd_freeze_noirq - Late freeze of a device from an I/O power domain.
+ * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain.
+ * @dev: Device to freeze.
+ *
+ * Carry out a late freeze of a device under the assumption that its
+ * pm_domain field points to the domain member of an object of type
+ * struct generic_pm_domain representing a power domain consisting of I/O
+ * devices.
+ */
+static int pm_genpd_freeze_late(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev);
+}
+
+/**
+ * pm_genpd_freeze_noirq - Completion of freezing a device in an I/O PM domain.
  * @dev: Device to freeze.
  *
  * Carry out a late freeze of a device under the assumption that its
@@ -949,7 +1008,6 @@ static int pm_genpd_freeze(struct device *dev)
 static int pm_genpd_freeze_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -957,20 +1015,33 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
+	return genpd->suspend_power_off || dev_gpd_data(dev)->always_on ?
+		0 : genpd_stop_dev(genpd, dev);
+}
 
-	ret = genpd_freeze_late(genpd, dev);
-	if (ret)
-		return ret;
+/**
+ * pm_genpd_thaw_noirq - Early thaw of device in an I/O PM domain.
+ * @dev: Device to thaw.
+ *
+ * Start the device, unless power has been removed from the domain already
+ * before the system transition.
+ */
+static int pm_genpd_thaw_noirq(struct device *dev)
+{
+	struct generic_pm_domain *genpd;
 
-	genpd_stop_dev(genpd, dev);
+	dev_dbg(dev, "%s()\n", __func__);
 
-	return 0;
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return -EINVAL;
+
+	return genpd->suspend_power_off || dev_gpd_data(dev)->always_on ?
+		0 : genpd_start_dev(genpd, dev);
 }
 
 /**
- * pm_genpd_thaw_noirq - Early thaw of a device from an I/O power domain.
+ * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain.
  * @dev: Device to thaw.
  *
  * Carry out an early thaw of a device under the assumption that its
@@ -978,7 +1049,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
  * struct generic_pm_domain representing a power domain consisting of I/O
  * devices.
  */
-static int pm_genpd_thaw_noirq(struct device *dev)
+static int pm_genpd_thaw_early(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 
@@ -988,12 +1059,7 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off)
-		return 0;
-
-	genpd_start_dev(genpd, dev);
-
-	return genpd_thaw_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev);
 }
 
 /**
@@ -1018,13 +1084,11 @@ static int pm_genpd_thaw(struct device *dev)
 }
 
 /**
- * pm_genpd_restore_noirq - Early restore of a device from an I/O power domain.
+ * pm_genpd_restore_noirq - Start of restore of device in an I/O PM domain.
  * @dev: Device to resume.
  *
- * Carry out an early restore of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
+ * Make sure the domain will be in the same power state as before the
+ * hibernation the system is resuming from and start the device if necessary.
  */
 static int pm_genpd_restore_noirq(struct device *dev)
 {
@@ -1040,23 +1104,35 @@ static int pm_genpd_restore_noirq(struct device *dev)
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
 	 * guaranteed that this function will never run twice in parallel for
 	 * the same PM domain, so it is not necessary to use locking here.
+	 *
+	 * At this point suspended_count == 0 means we are being run for the
+	 * first time for the given domain in the present cycle.
 	 */
-	genpd->status = GPD_STATE_POWER_OFF;
-	if (genpd->suspend_power_off) {
+	if (genpd->suspended_count++ == 0) {
 		/*
-		 * The boot kernel might put the domain into the power on state,
-		 * so make sure it really is powered off.
+		 * The boot kernel might put the domain into arbitrary state,
+		 * so make it appear as powered off to pm_genpd_poweron(), so
+		 * that it tries to power it on in case it was really off.
 		 */
-		if (genpd->power_off)
-			genpd->power_off(genpd);
-		return 0;
+		genpd->status = GPD_STATE_POWER_OFF;
+		if (genpd->suspend_power_off) {
+			/*
+			 * If the domain was off before the hibernation, make
+			 * sure it will be off going forward.
+			 */
+			if (genpd->power_off)
+				genpd->power_off(genpd);
+
+			return 0;
+		}
 	}
 
+	if (genpd->suspend_power_off)
+		return 0;
+
 	pm_genpd_poweron(genpd);
-	genpd->suspended_count--;
-	genpd_start_dev(genpd, dev);
 
-	return genpd_resume_early(genpd, dev);
+	return dev_gpd_data(dev)->always_on ? 0 : genpd_start_dev(genpd, dev);
 }
 
 /**
@@ -1099,11 +1175,15 @@ static void pm_genpd_complete(struct device *dev)
 
 #define pm_genpd_prepare		NULL
 #define pm_genpd_suspend		NULL
+#define pm_genpd_suspend_late		NULL
 #define pm_genpd_suspend_noirq		NULL
+#define pm_genpd_resume_early		NULL
 #define pm_genpd_resume_noirq		NULL
 #define pm_genpd_resume			NULL
 #define pm_genpd_freeze			NULL
+#define pm_genpd_freeze_late		NULL
 #define pm_genpd_freeze_noirq		NULL
+#define pm_genpd_thaw_early		NULL
 #define pm_genpd_thaw_noirq		NULL
 #define pm_genpd_thaw			NULL
 #define pm_genpd_restore_noirq		NULL
@@ -1170,6 +1250,38 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	return ret;
 }
 
+/**
+ * __pm_genpd_of_add_device - Add a device to an I/O PM domain.
+ * @genpd_node: Device tree node pointer representing a PM domain to which the
+ *   the device is added to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
+ */
+int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev,
+			     struct gpd_timing_data *td)
+{
+	struct generic_pm_domain *genpd = NULL, *gpd;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (IS_ERR_OR_NULL(genpd_node) || IS_ERR_OR_NULL(dev))
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (gpd->of_node == genpd_node) {
+			genpd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	if (!genpd)
+		return -EINVAL;
+
+	return __pm_genpd_add_device(genpd, dev, td);
+}
+
 /**
  * pm_genpd_remove_device - Remove a device from an I/O PM domain.
  * @genpd: PM domain to remove the device from.
@@ -1215,6 +1327,26 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 	return ret;
 }
 
+/**
+ * pm_genpd_dev_always_on - Set/unset the "always on" flag for a given device.
+ * @dev: Device to set/unset the flag for.
+ * @val: The new value of the device's "always on" flag.
+ */
+void pm_genpd_dev_always_on(struct device *dev, bool val)
+{
+	struct pm_subsys_data *psd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	psd = dev_to_psd(dev);
+	if (psd && psd->domain_data)
+		to_gpd_data(psd->domain_data)->always_on = val;
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_dev_always_on);
+
 /**
  * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
  * @genpd: Master PM domain to add the subdomain to.
@@ -1450,7 +1582,7 @@ static int pm_genpd_default_suspend_late(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late;
 
-	return cb ? cb(dev) : pm_generic_suspend_noirq(dev);
+	return cb ? cb(dev) : pm_generic_suspend_late(dev);
 }
 
 /**
@@ -1461,7 +1593,7 @@ static int pm_genpd_default_resume_early(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early;
 
-	return cb ? cb(dev) : pm_generic_resume_noirq(dev);
+	return cb ? cb(dev) : pm_generic_resume_early(dev);
 }
 
 /**
@@ -1494,7 +1626,7 @@ static int pm_genpd_default_freeze_late(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
 
-	return cb ? cb(dev) : pm_generic_freeze_noirq(dev);
+	return cb ? cb(dev) : pm_generic_freeze_late(dev);
 }
 
 /**
@@ -1505,7 +1637,7 @@ static int pm_genpd_default_thaw_early(struct device *dev)
 {
 	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
 
-	return cb ? cb(dev) : pm_generic_thaw_noirq(dev);
+	return cb ? cb(dev) : pm_generic_thaw_early(dev);
 }
 
 /**
@@ -1557,23 +1689,28 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->poweroff_task = NULL;
 	genpd->resume_count = 0;
 	genpd->device_count = 0;
-	genpd->suspended_count = 0;
 	genpd->max_off_time_ns = -1;
 	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
 	genpd->domain.ops.runtime_idle = pm_generic_runtime_idle;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
 	genpd->domain.ops.suspend = pm_genpd_suspend;
+	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
 	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
+	genpd->domain.ops.resume_early = pm_genpd_resume_early;
 	genpd->domain.ops.resume = pm_genpd_resume;
 	genpd->domain.ops.freeze = pm_genpd_freeze;
+	genpd->domain.ops.freeze_late = pm_genpd_freeze_late;
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
+	genpd->domain.ops.thaw_early = pm_genpd_thaw_early;
 	genpd->domain.ops.thaw = pm_genpd_thaw;
 	genpd->domain.ops.poweroff = pm_genpd_suspend;
+	genpd->domain.ops.poweroff_late = pm_genpd_suspend_late;
 	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
+	genpd->domain.ops.restore_early = pm_genpd_resume_early;
 	genpd->domain.ops.restore = pm_genpd_resume;
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 10bdd793f0bd86f48814caf66fd3f1a533183154..d03d290f31c25b773da06c111b8cd1440f974c51 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -92,59 +92,28 @@ int pm_generic_prepare(struct device *dev)
 }
 
 /**
- * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
- * @dev: Device to handle.
- * @event: PM transition of the system under way.
- * @bool: Whether or not this is the "noirq" stage.
- *
- * Execute the PM callback corresponding to @event provided by the driver of
- * @dev, if defined, and return its error code.    Return 0 if the callback is
- * not present.
+ * pm_generic_suspend_noirq - Generic suspend_noirq callback for subsystems.
+ * @dev: Device to suspend.
  */
-static int __pm_generic_call(struct device *dev, int event, bool noirq)
+int pm_generic_suspend_noirq(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int (*callback)(struct device *);
-
-	if (!pm)
-		return 0;
-
-	switch (event) {
-	case PM_EVENT_SUSPEND:
-		callback = noirq ? pm->suspend_noirq : pm->suspend;
-		break;
-	case PM_EVENT_FREEZE:
-		callback = noirq ? pm->freeze_noirq : pm->freeze;
-		break;
-	case PM_EVENT_HIBERNATE:
-		callback = noirq ? pm->poweroff_noirq : pm->poweroff;
-		break;
-	case PM_EVENT_RESUME:
-		callback = noirq ? pm->resume_noirq : pm->resume;
-		break;
-	case PM_EVENT_THAW:
-		callback = noirq ? pm->thaw_noirq : pm->thaw;
-		break;
-	case PM_EVENT_RESTORE:
-		callback = noirq ? pm->restore_noirq : pm->restore;
-		break;
-	default:
-		callback = NULL;
-		break;
-	}
 
-	return callback ? callback(dev) : 0;
+	return pm && pm->suspend_noirq ? pm->suspend_noirq(dev) : 0;
 }
+EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
 
 /**
- * pm_generic_suspend_noirq - Generic suspend_noirq callback for subsystems.
+ * pm_generic_suspend_late - Generic suspend_late callback for subsystems.
  * @dev: Device to suspend.
  */
-int pm_generic_suspend_noirq(struct device *dev)
+int pm_generic_suspend_late(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_SUSPEND, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->suspend_late ? pm->suspend_late(dev) : 0;
 }
-EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
+EXPORT_SYMBOL_GPL(pm_generic_suspend_late);
 
 /**
  * pm_generic_suspend - Generic suspend callback for subsystems.
@@ -152,7 +121,9 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend_noirq);
  */
 int pm_generic_suspend(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_SUSPEND, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->suspend ? pm->suspend(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_suspend);
 
@@ -162,17 +133,33 @@ EXPORT_SYMBOL_GPL(pm_generic_suspend);
  */
 int pm_generic_freeze_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_FREEZE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_noirq ? pm->freeze_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq);
 
+/**
+ * pm_generic_freeze_late - Generic freeze_late callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze_late ? pm->freeze_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze_late);
+
 /**
  * pm_generic_freeze - Generic freeze callback for subsystems.
  * @dev: Device to freeze.
  */
 int pm_generic_freeze(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_FREEZE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->freeze ? pm->freeze(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_freeze);
 
@@ -182,17 +169,33 @@ EXPORT_SYMBOL_GPL(pm_generic_freeze);
  */
 int pm_generic_poweroff_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_HIBERNATE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_noirq ? pm->poweroff_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff_noirq);
 
+/**
+ * pm_generic_poweroff_late - Generic poweroff_late callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff_late(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff_late ? pm->poweroff_late(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff_late);
+
 /**
  * pm_generic_poweroff - Generic poweroff callback for subsystems.
  * @dev: Device to handle.
  */
 int pm_generic_poweroff(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_HIBERNATE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->poweroff ? pm->poweroff(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_poweroff);
 
@@ -202,17 +205,33 @@ EXPORT_SYMBOL_GPL(pm_generic_poweroff);
  */
 int pm_generic_thaw_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_THAW, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_noirq ? pm->thaw_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq);
 
+/**
+ * pm_generic_thaw_early - Generic thaw_early callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw_early ? pm->thaw_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw_early);
+
 /**
  * pm_generic_thaw - Generic thaw callback for subsystems.
  * @dev: Device to thaw.
  */
 int pm_generic_thaw(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_THAW, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->thaw ? pm->thaw(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_thaw);
 
@@ -222,17 +241,33 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw);
  */
 int pm_generic_resume_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESUME, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume_noirq ? pm->resume_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
 
+/**
+ * pm_generic_resume_early - Generic resume_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_resume_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume_early ? pm->resume_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_resume_early);
+
 /**
  * pm_generic_resume - Generic resume callback for subsystems.
  * @dev: Device to resume.
  */
 int pm_generic_resume(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESUME, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->resume ? pm->resume(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume);
 
@@ -242,17 +277,33 @@ EXPORT_SYMBOL_GPL(pm_generic_resume);
  */
 int pm_generic_restore_noirq(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESTORE, true);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_noirq ? pm->restore_noirq(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
 
+/**
+ * pm_generic_restore_early - Generic restore_early callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_restore_early(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore_early ? pm->restore_early(dev) : 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore_early);
+
 /**
  * pm_generic_restore - Generic restore callback for subsystems.
  * @dev: Device to restore.
  */
 int pm_generic_restore(struct device *dev)
 {
-	return __pm_generic_call(dev, PM_EVENT_RESTORE, false);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	return pm && pm->restore ? pm->restore(dev) : 0;
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e2cc3d2e0ecccc1d6e6cb9b8f8f95ac4f81812f5..b462c0e341cbf6721fc2ab1dc7f59314f344f625 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -47,6 +47,7 @@ typedef int (*pm_callback_t)(struct device *);
 LIST_HEAD(dpm_list);
 LIST_HEAD(dpm_prepared_list);
 LIST_HEAD(dpm_suspended_list);
+LIST_HEAD(dpm_late_early_list);
 LIST_HEAD(dpm_noirq_list);
 
 struct suspend_stats suspend_stats;
@@ -245,6 +246,40 @@ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
 	return NULL;
 }
 
+/**
+ * pm_late_early_op - Return the PM operation appropriate for given PM event.
+ * @ops: PM operations to choose from.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops,
+				      pm_message_t state)
+{
+	switch (state.event) {
+#ifdef CONFIG_SUSPEND
+	case PM_EVENT_SUSPEND:
+		return ops->suspend_late;
+	case PM_EVENT_RESUME:
+		return ops->resume_early;
+#endif /* CONFIG_SUSPEND */
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+	case PM_EVENT_FREEZE:
+	case PM_EVENT_QUIESCE:
+		return ops->freeze_late;
+	case PM_EVENT_HIBERNATE:
+		return ops->poweroff_late;
+	case PM_EVENT_THAW:
+	case PM_EVENT_RECOVER:
+		return ops->thaw_early;
+	case PM_EVENT_RESTORE:
+		return ops->restore_early;
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
+	}
+
+	return NULL;
+}
+
 /**
  * pm_noirq_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
@@ -374,21 +409,21 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 	TRACE_RESUME(0);
 
 	if (dev->pm_domain) {
-		info = "EARLY power domain ";
+		info = "noirq power domain ";
 		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		info = "EARLY type ";
+		info = "noirq type ";
 		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		info = "EARLY class ";
+		info = "noirq class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "EARLY bus ";
+		info = "noirq bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
 	if (!callback && dev->driver && dev->driver->pm) {
-		info = "EARLY driver ";
+		info = "noirq driver ";
 		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
@@ -399,13 +434,13 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices.
+ * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
- * Call the "noirq" resume handlers for all devices marked as DPM_OFF_IRQ and
+ * Call the "noirq" resume handlers for all devices in dpm_noirq_list and
  * enable device drivers to receive interrupts.
  */
-void dpm_resume_noirq(pm_message_t state)
+static void dpm_resume_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 
@@ -415,7 +450,7 @@ void dpm_resume_noirq(pm_message_t state)
 		int error;
 
 		get_device(dev);
-		list_move_tail(&dev->power.entry, &dpm_suspended_list);
+		list_move_tail(&dev->power.entry, &dpm_late_early_list);
 		mutex_unlock(&dpm_list_mtx);
 
 		error = device_resume_noirq(dev, state);
@@ -423,6 +458,80 @@ void dpm_resume_noirq(pm_message_t state)
 			suspend_stats.failed_resume_noirq++;
 			dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
 			dpm_save_failed_dev(dev_name(dev));
+			pm_dev_err(dev, state, " noirq", error);
+		}
+
+		mutex_lock(&dpm_list_mtx);
+		put_device(dev);
+	}
+	mutex_unlock(&dpm_list_mtx);
+	dpm_show_time(starttime, state, "noirq");
+	resume_device_irqs();
+}
+
+/**
+ * device_resume_early - Execute an "early resume" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static int device_resume_early(struct device *dev, pm_message_t state)
+{
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+	int error = 0;
+
+	TRACE_DEVICE(dev);
+	TRACE_RESUME(0);
+
+	if (dev->pm_domain) {
+		info = "early power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "early type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "early class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "early bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "early driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	error = dpm_run_callback(callback, dev, state, info);
+
+	TRACE_RESUME(error);
+	return error;
+}
+
+/**
+ * dpm_resume_early - Execute "early resume" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static void dpm_resume_early(pm_message_t state)
+{
+	ktime_t starttime = ktime_get();
+
+	mutex_lock(&dpm_list_mtx);
+	while (!list_empty(&dpm_late_early_list)) {
+		struct device *dev = to_device(dpm_late_early_list.next);
+		int error;
+
+		get_device(dev);
+		list_move_tail(&dev->power.entry, &dpm_suspended_list);
+		mutex_unlock(&dpm_list_mtx);
+
+		error = device_resume_early(dev, state);
+		if (error) {
+			suspend_stats.failed_resume_early++;
+			dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+			dpm_save_failed_dev(dev_name(dev));
 			pm_dev_err(dev, state, " early", error);
 		}
 
@@ -431,9 +540,18 @@ void dpm_resume_noirq(pm_message_t state)
 	}
 	mutex_unlock(&dpm_list_mtx);
 	dpm_show_time(starttime, state, "early");
-	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(dpm_resume_noirq);
+
+/**
+ * dpm_resume_start - Execute "noirq" and "early" device callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+void dpm_resume_start(pm_message_t state)
+{
+	dpm_resume_noirq(state);
+	dpm_resume_early(state);
+}
+EXPORT_SYMBOL_GPL(dpm_resume_start);
 
 /**
  * device_resume - Execute "resume" callbacks for given device.
@@ -716,21 +834,21 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 	char *info = NULL;
 
 	if (dev->pm_domain) {
-		info = "LATE power domain ";
+		info = "noirq power domain ";
 		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		info = "LATE type ";
+		info = "noirq type ";
 		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		info = "LATE class ";
+		info = "noirq class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "LATE bus ";
+		info = "noirq bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
 	if (!callback && dev->driver && dev->driver->pm) {
-		info = "LATE driver ";
+		info = "noirq driver ";
 		callback = pm_noirq_op(dev->driver->pm, state);
 	}
 
@@ -738,21 +856,21 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices.
+ * dpm_suspend_noirq - Execute "noirq suspend" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  *
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
-int dpm_suspend_noirq(pm_message_t state)
+static int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
-	while (!list_empty(&dpm_suspended_list)) {
-		struct device *dev = to_device(dpm_suspended_list.prev);
+	while (!list_empty(&dpm_late_early_list)) {
+		struct device *dev = to_device(dpm_late_early_list.prev);
 
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
@@ -761,7 +879,7 @@ int dpm_suspend_noirq(pm_message_t state)
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
-			pm_dev_err(dev, state, " late", error);
+			pm_dev_err(dev, state, " noirq", error);
 			suspend_stats.failed_suspend_noirq++;
 			dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
 			dpm_save_failed_dev(dev_name(dev));
@@ -775,11 +893,96 @@ int dpm_suspend_noirq(pm_message_t state)
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
 		dpm_resume_noirq(resume_event(state));
+	else
+		dpm_show_time(starttime, state, "noirq");
+	return error;
+}
+
+/**
+ * device_suspend_late - Execute a "late suspend" callback for given device.
+ * @dev: Device to handle.
+ * @state: PM transition of the system being carried out.
+ *
+ * Runtime PM is disabled for @dev while this function is being executed.
+ */
+static int device_suspend_late(struct device *dev, pm_message_t state)
+{
+	pm_callback_t callback = NULL;
+	char *info = NULL;
+
+	if (dev->pm_domain) {
+		info = "late power domain ";
+		callback = pm_late_early_op(&dev->pm_domain->ops, state);
+	} else if (dev->type && dev->type->pm) {
+		info = "late type ";
+		callback = pm_late_early_op(dev->type->pm, state);
+	} else if (dev->class && dev->class->pm) {
+		info = "late class ";
+		callback = pm_late_early_op(dev->class->pm, state);
+	} else if (dev->bus && dev->bus->pm) {
+		info = "late bus ";
+		callback = pm_late_early_op(dev->bus->pm, state);
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "late driver ";
+		callback = pm_late_early_op(dev->driver->pm, state);
+	}
+
+	return dpm_run_callback(callback, dev, state, info);
+}
+
+/**
+ * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
+ * @state: PM transition of the system being carried out.
+ */
+static int dpm_suspend_late(pm_message_t state)
+{
+	ktime_t starttime = ktime_get();
+	int error = 0;
+
+	mutex_lock(&dpm_list_mtx);
+	while (!list_empty(&dpm_suspended_list)) {
+		struct device *dev = to_device(dpm_suspended_list.prev);
+
+		get_device(dev);
+		mutex_unlock(&dpm_list_mtx);
+
+		error = device_suspend_late(dev, state);
+
+		mutex_lock(&dpm_list_mtx);
+		if (error) {
+			pm_dev_err(dev, state, " late", error);
+			suspend_stats.failed_suspend_late++;
+			dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
+			dpm_save_failed_dev(dev_name(dev));
+			put_device(dev);
+			break;
+		}
+		if (!list_empty(&dev->power.entry))
+			list_move(&dev->power.entry, &dpm_late_early_list);
+		put_device(dev);
+	}
+	mutex_unlock(&dpm_list_mtx);
+	if (error)
+		dpm_resume_early(resume_event(state));
 	else
 		dpm_show_time(starttime, state, "late");
+
 	return error;
 }
-EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
+
+/**
+ * dpm_suspend_end - Execute "late" and "noirq" device suspend callbacks.
+ * @state: PM transition of the system being carried out.
+ */
+int dpm_suspend_end(pm_message_t state)
+{
+	int error = dpm_suspend_late(state);
+
+	return error ? : dpm_suspend_noirq(state);
+}
+EXPORT_SYMBOL_GPL(dpm_suspend_end);
 
 /**
  * legacy_suspend - Execute a legacy (bus or class) suspend callback for device.
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 9bf62323aaf35778a0d664f5e4c05713bce80d80..eeb4bff9505cd6dccd5d2cb30671fd69105966ef 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -71,6 +71,8 @@ extern void dpm_sysfs_remove(struct device *dev);
 extern void rpm_sysfs_remove(struct device *dev);
 extern int wakeup_sysfs_add(struct device *dev);
 extern void wakeup_sysfs_remove(struct device *dev);
+extern int pm_qos_sysfs_add(struct device *dev);
+extern void pm_qos_sysfs_remove(struct device *dev);
 
 #else /* CONFIG_PM */
 
@@ -79,5 +81,7 @@ static inline void dpm_sysfs_remove(struct device *dev) {}
 static inline void rpm_sysfs_remove(struct device *dev) {}
 static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
 static inline void wakeup_sysfs_remove(struct device *dev) {}
+static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
+static inline void pm_qos_sysfs_remove(struct device *dev) {}
 
 #endif
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index c5d358837461bf4d65716dd7a41d14d7341891d5..71855570922de8248a04fb762d633c2ff80895da 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -41,6 +41,7 @@
 #include <linux/mutex.h>
 #include <linux/export.h>
 
+#include "power.h"
 
 static DEFINE_MUTEX(dev_pm_qos_mtx);
 
@@ -166,6 +167,12 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 	struct dev_pm_qos_request *req, *tmp;
 	struct pm_qos_constraints *c;
 
+	/*
+	 * If the device's PM QoS resume latency limit has been exposed to user
+	 * space, it has to be hidden at this point.
+	 */
+	dev_pm_qos_hide_latency_limit(dev);
+
 	mutex_lock(&dev_pm_qos_mtx);
 
 	dev->power.power_state = PMSG_INVALID;
@@ -445,3 +452,57 @@ int dev_pm_qos_add_ancestor_request(struct device *dev,
 	return error;
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
+
+#ifdef CONFIG_PM_RUNTIME
+static void __dev_pm_qos_drop_user_request(struct device *dev)
+{
+	dev_pm_qos_remove_request(dev->power.pq_req);
+	dev->power.pq_req = 0;
+}
+
+/**
+ * dev_pm_qos_expose_latency_limit - Expose PM QoS latency limit to user space.
+ * @dev: Device whose PM QoS latency limit is to be exposed to user space.
+ * @value: Initial value of the latency limit.
+ */
+int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
+{
+	struct dev_pm_qos_request *req;
+	int ret;
+
+	if (!device_is_registered(dev) || value < 0)
+		return -EINVAL;
+
+	if (dev->power.pq_req)
+		return -EEXIST;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = dev_pm_qos_add_request(dev, req, value);
+	if (ret < 0)
+		return ret;
+
+	dev->power.pq_req = req;
+	ret = pm_qos_sysfs_add(dev);
+	if (ret)
+		__dev_pm_qos_drop_user_request(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit);
+
+/**
+ * dev_pm_qos_hide_latency_limit - Hide PM QoS latency limit from user space.
+ * @dev: Device whose PM QoS latency limit is to be hidden from user space.
+ */
+void dev_pm_qos_hide_latency_limit(struct device *dev)
+{
+	if (dev->power.pq_req) {
+		pm_qos_sysfs_remove(dev);
+		__dev_pm_qos_drop_user_request(dev);
+	}
+}
+EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit);
+#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index adf41be0ea664b7debc52b0ec49c5282a4fb46d5..95c12f6cb5b90da6257b95f839da60bfe3235c97 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -5,6 +5,7 @@
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/export.h>
+#include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/atomic.h>
 #include <linux/jiffies.h>
@@ -217,6 +218,31 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev,
 static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
 		autosuspend_delay_ms_store);
 
+static ssize_t pm_qos_latency_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", dev->power.pq_req->node.prio);
+}
+
+static ssize_t pm_qos_latency_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t n)
+{
+	s32 value;
+	int ret;
+
+	if (kstrtos32(buf, 0, &value))
+		return -EINVAL;
+
+	if (value < 0)
+		return -EINVAL;
+
+	ret = dev_pm_qos_update_request(dev->power.pq_req, value);
+	return ret < 0 ? ret : n;
+}
+
+static DEVICE_ATTR(pm_qos_resume_latency_us, 0644,
+		   pm_qos_latency_show, pm_qos_latency_store);
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
@@ -490,6 +516,17 @@ static struct attribute_group pm_runtime_attr_group = {
 	.attrs	= runtime_attrs,
 };
 
+static struct attribute *pm_qos_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_pm_qos_resume_latency_us.attr,
+#endif /* CONFIG_PM_RUNTIME */
+	NULL,
+};
+static struct attribute_group pm_qos_attr_group = {
+	.name	= power_group_name,
+	.attrs	= pm_qos_attrs,
+};
+
 int dpm_sysfs_add(struct device *dev)
 {
 	int rc;
@@ -530,6 +567,16 @@ void wakeup_sysfs_remove(struct device *dev)
 	sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
 }
 
+int pm_qos_sysfs_add(struct device *dev)
+{
+	return sysfs_merge_group(&dev->kobj, &pm_qos_attr_group);
+}
+
+void pm_qos_sysfs_remove(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_qos_attr_group);
+}
+
 void rpm_sysfs_remove(struct device *dev)
 {
 	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index caf995fb774b4ceef6ad03809ac7a2f1b5a5e9e5..2a3e581b8dcd167fa676eedc44b3426c8dc9d08a 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -52,6 +52,23 @@ static void pm_wakeup_timer_fn(unsigned long data);
 
 static LIST_HEAD(wakeup_sources);
 
+/**
+ * wakeup_source_prepare - Prepare a new wakeup source for initialization.
+ * @ws: Wakeup source to prepare.
+ * @name: Pointer to the name of the new wakeup source.
+ *
+ * Callers must ensure that the @name string won't be freed when @ws is still in
+ * use.
+ */
+void wakeup_source_prepare(struct wakeup_source *ws, const char *name)
+{
+	if (ws) {
+		memset(ws, 0, sizeof(*ws));
+		ws->name = name;
+	}
+}
+EXPORT_SYMBOL_GPL(wakeup_source_prepare);
+
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -60,37 +77,44 @@ struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
 
-	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+	ws = kmalloc(sizeof(*ws), GFP_KERNEL);
 	if (!ws)
 		return NULL;
 
-	spin_lock_init(&ws->lock);
-	if (name)
-		ws->name = kstrdup(name, GFP_KERNEL);
-
+	wakeup_source_prepare(ws, name ? kstrdup(name, GFP_KERNEL) : NULL);
 	return ws;
 }
 EXPORT_SYMBOL_GPL(wakeup_source_create);
 
+/**
+ * wakeup_source_drop - Prepare a struct wakeup_source object for destruction.
+ * @ws: Wakeup source to prepare for destruction.
+ *
+ * Callers must ensure that __pm_stay_awake() or __pm_wakeup_event() will never
+ * be run in parallel with this function for the same wakeup source object.
+ */
+void wakeup_source_drop(struct wakeup_source *ws)
+{
+	if (!ws)
+		return;
+
+	del_timer_sync(&ws->timer);
+	__pm_relax(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_drop);
+
 /**
  * wakeup_source_destroy - Destroy a struct wakeup_source object.
  * @ws: Wakeup source to destroy.
+ *
+ * Use only for wakeup source objects created with wakeup_source_create().
  */
 void wakeup_source_destroy(struct wakeup_source *ws)
 {
 	if (!ws)
 		return;
 
-	spin_lock_irq(&ws->lock);
-	while (ws->active) {
-		spin_unlock_irq(&ws->lock);
-
-		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
-
-		spin_lock_irq(&ws->lock);
-	}
-	spin_unlock_irq(&ws->lock);
-
+	wakeup_source_drop(ws);
 	kfree(ws->name);
 	kfree(ws);
 }
@@ -105,6 +129,7 @@ void wakeup_source_add(struct wakeup_source *ws)
 	if (WARN_ON(!ws))
 		return;
 
+	spin_lock_init(&ws->lock);
 	setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
 	ws->active = false;
 
@@ -152,8 +177,10 @@ EXPORT_SYMBOL_GPL(wakeup_source_register);
  */
 void wakeup_source_unregister(struct wakeup_source *ws)
 {
-	wakeup_source_remove(ws);
-	wakeup_source_destroy(ws);
+	if (ws) {
+		wakeup_source_remove(ws);
+		wakeup_source_destroy(ws);
+	}
 }
 EXPORT_SYMBOL_GPL(wakeup_source_unregister);
 
@@ -349,7 +376,6 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	ws->active = true;
 	ws->active_count++;
-	ws->timer_expires = jiffies;
 	ws->last_time = ktime_get();
 
 	/* Increment the counter of events in progress. */
@@ -370,9 +396,14 @@ void __pm_stay_awake(struct wakeup_source *ws)
 		return;
 
 	spin_lock_irqsave(&ws->lock, flags);
+
 	ws->event_count++;
 	if (!ws->active)
 		wakeup_source_activate(ws);
+
+	del_timer(&ws->timer);
+	ws->timer_expires = 0;
+
 	spin_unlock_irqrestore(&ws->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__pm_stay_awake);
@@ -438,6 +469,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
 		ws->max_time = duration;
 
 	del_timer(&ws->timer);
+	ws->timer_expires = 0;
 
 	/*
 	 * Increment the counter of registered wakeup events and decrement the
@@ -492,11 +524,22 @@ EXPORT_SYMBOL_GPL(pm_relax);
  * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
  * @data: Address of the wakeup source object associated with the event source.
  *
- * Call __pm_relax() for the wakeup source whose address is stored in @data.
+ * Call wakeup_source_deactivate() for the wakeup source whose address is stored
+ * in @data if it is currently active and its timer has not been canceled and
+ * the expiration time of the timer is not in future.
  */
 static void pm_wakeup_timer_fn(unsigned long data)
 {
-	__pm_relax((struct wakeup_source *)data);
+	struct wakeup_source *ws = (struct wakeup_source *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	if (ws->active && ws->timer_expires
+	    && time_after_eq(jiffies, ws->timer_expires))
+		wakeup_source_deactivate(ws);
+
+	spin_unlock_irqrestore(&ws->lock, flags);
 }
 
 /**
@@ -534,7 +577,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 	if (!expires)
 		expires = 1;
 
-	if (time_after(expires, ws->timer_expires)) {
+	if (!ws->timer_expires || time_after(expires, ws->timer_expires)) {
 		mod_timer(&ws->timer, expires);
 		ws->timer_expires = expires;
 	}
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index ca09bc421ddbc25345cf3d6d9b0efe237941454c..32fe9ef5cc5c374d3a9dabf5c733aa17bd73342f 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -32,6 +32,7 @@
 #include <linux/sh_timer.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_domain.h>
 
 struct sh_cmt_priv {
 	void __iomem *mapbase;
@@ -689,6 +690,9 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev)
 	struct sh_cmt_priv *p = platform_get_drvdata(pdev);
 	int ret;
 
+	if (!is_early_platform_device(pdev))
+		pm_genpd_dev_always_on(&pdev->dev, true);
+
 	if (p) {
 		dev_info(&pdev->dev, "kept as earlytimer\n");
 		return 0;
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index db8d5955bad47542f17c24c9186bc3c56be46403..a2172f6904180fd9f30d63cfd3cab2448ab9c856 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -31,6 +31,7 @@
 #include <linux/sh_timer.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_domain.h>
 
 struct sh_mtu2_priv {
 	void __iomem *mapbase;
@@ -306,6 +307,9 @@ static int __devinit sh_mtu2_probe(struct platform_device *pdev)
 	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
 	int ret;
 
+	if (!is_early_platform_device(pdev))
+		pm_genpd_dev_always_on(&pdev->dev, true);
+
 	if (p) {
 		dev_info(&pdev->dev, "kept as earlytimer\n");
 		return 0;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 079e96ad44e875b34861126a7446004109df1f74..97f54b634be43234d820beb1661b0493bd79637f 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -32,6 +32,7 @@
 #include <linux/sh_timer.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_domain.h>
 
 struct sh_tmu_priv {
 	void __iomem *mapbase;
@@ -410,6 +411,9 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev)
 	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
 	int ret;
 
+	if (!is_early_platform_device(pdev))
+		pm_genpd_dev_always_on(&pdev->dev, true);
+
 	if (p) {
 		dev_info(&pdev->dev, "kept as earlytimer\n");
 		return 0;
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index c189b82f5ececeddc29f97e62bc948ffb3f31961..70c31d43fff3f1f7c682187c3a3629fce99c1e96 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -83,6 +83,7 @@ int update_devfreq(struct devfreq *devfreq)
 {
 	unsigned long freq;
 	int err = 0;
+	u32 flags = 0;
 
 	if (!mutex_is_locked(&devfreq->lock)) {
 		WARN(true, "devfreq->lock must be locked by the caller.\n");
@@ -94,7 +95,24 @@ int update_devfreq(struct devfreq *devfreq)
 	if (err)
 		return err;
 
-	err = devfreq->profile->target(devfreq->dev.parent, &freq);
+	/*
+	 * Adjust the freuqency with user freq and QoS.
+	 *
+	 * List from the highest proiority
+	 * max_freq (probably called by thermal when it's too hot)
+	 * min_freq
+	 */
+
+	if (devfreq->min_freq && freq < devfreq->min_freq) {
+		freq = devfreq->min_freq;
+		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */
+	}
+	if (devfreq->max_freq && freq > devfreq->max_freq) {
+		freq = devfreq->max_freq;
+		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
+	}
+
+	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
 	if (err)
 		return err;
 
@@ -501,12 +519,82 @@ static ssize_t show_central_polling(struct device *dev,
 		       !to_devfreq(dev)->governor->no_central_polling);
 }
 
+static ssize_t store_min_freq(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	unsigned long value;
+	int ret;
+	unsigned long max;
+
+	ret = sscanf(buf, "%lu", &value);
+	if (ret != 1)
+		goto out;
+
+	mutex_lock(&df->lock);
+	max = df->max_freq;
+	if (value && max && value > max) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->min_freq = value;
+	update_devfreq(df);
+	ret = count;
+unlock:
+	mutex_unlock(&df->lock);
+out:
+	return ret;
+}
+
+static ssize_t show_min_freq(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "%lu\n", to_devfreq(dev)->min_freq);
+}
+
+static ssize_t store_max_freq(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	unsigned long value;
+	int ret;
+	unsigned long min;
+
+	ret = sscanf(buf, "%lu", &value);
+	if (ret != 1)
+		goto out;
+
+	mutex_lock(&df->lock);
+	min = df->min_freq;
+	if (value && min && value < min) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->max_freq = value;
+	update_devfreq(df);
+	ret = count;
+unlock:
+	mutex_unlock(&df->lock);
+out:
+	return ret;
+}
+
+static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "%lu\n", to_devfreq(dev)->max_freq);
+}
+
 static struct device_attribute devfreq_attrs[] = {
 	__ATTR(governor, S_IRUGO, show_governor, NULL),
 	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
 	__ATTR(central_polling, S_IRUGO, show_central_polling, NULL),
 	__ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
 	       store_polling_interval),
+	__ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq),
+	__ATTR(max_freq, S_IRUGO | S_IWUSR, show_max_freq, store_max_freq),
 	{ },
 };
 
@@ -555,14 +643,30 @@ module_exit(devfreq_exit);
  *			     freq value given to target callback.
  * @dev		The devfreq user device. (parent of devfreq)
  * @freq	The frequency given to target function
+ * @flags	Flags handed from devfreq framework.
  *
  */
-struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq)
+struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq,
+				    u32 flags)
 {
-	struct opp *opp = opp_find_freq_ceil(dev, freq);
+	struct opp *opp;
 
-	if (opp == ERR_PTR(-ENODEV))
+	if (flags & DEVFREQ_FLAG_LEAST_UPPER_BOUND) {
+		/* The freq is an upper bound. opp should be lower */
 		opp = opp_find_freq_floor(dev, freq);
+
+		/* If not available, use the closest opp */
+		if (opp == ERR_PTR(-ENODEV))
+			opp = opp_find_freq_ceil(dev, freq);
+	} else {
+		/* The freq is an lower bound. opp should be higher */
+		opp = opp_find_freq_ceil(dev, freq);
+
+		/* If not available, use the closest opp */
+		if (opp == ERR_PTR(-ENODEV))
+			opp = opp_find_freq_floor(dev, freq);
+	}
+
 	return opp;
 }
 
diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c
index 6460577d6701b468bd6edeaa1c21e739b7818918..1a361e99965ad26e6f9f93de0d94eed20bf77bad 100644
--- a/drivers/devfreq/exynos4_bus.c
+++ b/drivers/devfreq/exynos4_bus.c
@@ -619,15 +619,19 @@ static int exynos4_bus_setvolt(struct busfreq_data *data, struct opp *opp,
 	return err;
 }
 
-static int exynos4_bus_target(struct device *dev, unsigned long *_freq)
+static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
+			      u32 flags)
 {
 	int err = 0;
 	struct platform_device *pdev = container_of(dev, struct platform_device,
 						    dev);
 	struct busfreq_data *data = platform_get_drvdata(pdev);
-	struct opp *opp = devfreq_recommended_opp(dev, _freq);
-	unsigned long old_freq = opp_get_freq(data->curr_opp);
+	struct opp *opp = devfreq_recommended_opp(dev, _freq, flags);
 	unsigned long freq = opp_get_freq(opp);
+	unsigned long old_freq = opp_get_freq(data->curr_opp);
+
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
 
 	if (old_freq == freq)
 		return 0;
@@ -689,9 +693,7 @@ static int exynos4_get_busier_dmc(struct busfreq_data *data)
 static int exynos4_bus_get_dev_status(struct device *dev,
 				      struct devfreq_dev_status *stat)
 {
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data *data = platform_get_drvdata(pdev);
+	struct busfreq_data *data = dev_get_drvdata(dev);
 	int busier_dmc;
 	int cycles_x2 = 2; /* 2 x cycles */
 	void __iomem *addr;
@@ -739,9 +741,7 @@ static int exynos4_bus_get_dev_status(struct device *dev,
 
 static void exynos4_bus_exit(struct device *dev)
 {
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data *data = platform_get_drvdata(pdev);
+	struct busfreq_data *data = dev_get_drvdata(dev);
 
 	devfreq_unregister_opp_notifier(dev, data->devfreq);
 }
@@ -1087,9 +1087,7 @@ static __devexit int exynos4_busfreq_remove(struct platform_device *pdev)
 
 static int exynos4_busfreq_resume(struct device *dev)
 {
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data *data = platform_get_drvdata(pdev);
+	struct busfreq_data *data = dev_get_drvdata(dev);
 
 	busfreq_mon_reset(data);
 	return 0;
@@ -1132,4 +1130,3 @@ module_exit(exynos4_busfreq_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("EXYNOS4 busfreq driver with devfreq framework");
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
-MODULE_ALIAS("exynos4-busfreq");
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index c0596b29176113653a59c1993477f8b5a7e0c6aa..574a06b1b1deb4d431cddd1d7a399bcb9a7825f4 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -18,7 +18,10 @@ static int devfreq_performance_func(struct devfreq *df,
 	 * target callback should be able to get floor value as
 	 * said in devfreq.h
 	 */
-	*freq = UINT_MAX;
+	if (!df->max_freq)
+		*freq = UINT_MAX;
+	else
+		*freq = df->max_freq;
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index 2483a85a266fad446a39ae806438c952b0e9248b..d742d4a82d6a0da726f620ae5505e4d5c642c95b 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -18,7 +18,7 @@ static int devfreq_powersave_func(struct devfreq *df,
 	 * target callback should be able to get ceiling value as
 	 * said in devfreq.h
 	 */
-	*freq = 0;
+	*freq = df->min_freq;
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index efad8dcf902877f4b67e5179dbd1dc17ba695abb..a2e3eae790119ef7e01527a94b4bf44b20589a07 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -25,6 +25,7 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
 	unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
 	struct devfreq_simple_ondemand_data *data = df->data;
+	unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX;
 
 	if (err)
 		return err;
@@ -41,7 +42,7 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 
 	/* Assume MAX if it is going to be divided by zero */
 	if (stat.total_time == 0) {
-		*freq = UINT_MAX;
+		*freq = max;
 		return 0;
 	}
 
@@ -54,13 +55,13 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	/* Set MAX if it's busy enough */
 	if (stat.busy_time * 100 >
 	    stat.total_time * dfso_upthreshold) {
-		*freq = UINT_MAX;
+		*freq = max;
 		return 0;
 	}
 
 	/* Set MAX if we do not know the initial frequency */
 	if (stat.current_frequency == 0) {
-		*freq = UINT_MAX;
+		*freq = max;
 		return 0;
 	}
 
@@ -79,6 +80,11 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
 	*freq = (unsigned long) b;
 
+	if (df->min_freq && *freq < df->min_freq)
+		*freq = df->min_freq;
+	if (df->max_freq && *freq > df->max_freq)
+		*freq = df->max_freq;
+
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 4f8b563da78265debbe968690b37707b91e39fd1..0681246fc89dc32e53900929abd2c2df4329e0ab 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -25,10 +25,19 @@ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
 {
 	struct userspace_data *data = df->data;
 
-	if (!data->valid)
+	if (data->valid) {
+		unsigned long adjusted_freq = data->user_frequency;
+
+		if (df->max_freq && adjusted_freq > df->max_freq)
+			adjusted_freq = df->max_freq;
+
+		if (df->min_freq && adjusted_freq < df->min_freq)
+			adjusted_freq = df->min_freq;
+
+		*freq = adjusted_freq;
+	} else {
 		*freq = df->previous_freq; /* No user freq specified yet */
-	else
-		*freq = data->user_frequency;
+	}
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 352d4797865b4a9b76d58781cf80117c680e17c2..75a485448796e67d553cae26d3e3eda9fa329b98 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -56,6 +56,7 @@
 #include <linux/mmc/sh_mmcif.h>
 #include <linux/pagemap.h>
 #include <linux/platform_device.h>
+#include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
@@ -1346,6 +1347,8 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto clean_up5;
 
+	dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
+
 	dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
 	dev_dbg(&pdev->dev, "chip ver H'%04x\n",
 		sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff);
@@ -1376,6 +1379,8 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev)
 	host->dying = true;
 	pm_runtime_get_sync(&pdev->dev);
 
+	dev_pm_qos_hide_latency_limit(&pdev->dev);
+
 	mmc_remove_host(host->mmc);
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
 
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 5f9ad74fbf80b3a4a3556c5c5919c9ba525c610d..e21988901c367082f9fabb59eccb3b821b4b5843 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/platform_device.h>
+#include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
@@ -955,6 +956,8 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host,
 
 	mmc_add_host(mmc);
 
+	dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
+
 	/* Unmask the IRQs we want to know about */
 	if (!_host->chan_rx)
 		irq_mask |= TMIO_MASK_READOP;
@@ -993,6 +996,8 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host)
 		|| host->mmc->caps & MMC_CAP_NONREMOVABLE)
 		pm_runtime_get_sync(&pdev->dev);
 
+	dev_pm_qos_hide_latency_limit(&pdev->dev);
+
 	mmc_remove_host(host->mmc);
 	cancel_work_sync(&host->done);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index ce4fa0831860175dedd4b53d3cd22e168474f7b9..9e14ae6cd49c018bf45156d58c1ff4af7411fdc3 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -129,9 +129,9 @@ static void do_suspend(void)
 	printk(KERN_DEBUG "suspending xenstore...\n");
 	xs_suspend();
 
-	err = dpm_suspend_noirq(PMSG_FREEZE);
+	err = dpm_suspend_end(PMSG_FREEZE);
 	if (err) {
-		printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
+		printk(KERN_ERR "dpm_suspend_end failed: %d\n", err);
 		goto out_resume;
 	}
 
@@ -149,7 +149,7 @@ static void do_suspend(void)
 
 	err = stop_machine(xen_suspend, &si, cpumask_of(0));
 
-	dpm_resume_noirq(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
+	dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
 
 	if (err) {
 		printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e49e81bb80ef76feafe8054e04b717f547699505..0971e9217808829a67f5fd340a1972a63a982930 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -129,6 +129,8 @@ static int kjournald(void *arg)
 	setup_timer(&journal->j_commit_timer, commit_timeout,
 			(unsigned long)current);
 
+	set_freezable();
+
 	/* Record that the journal thread is running */
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5ff8940b8f024f309ee540490d5a776432f2d173..839377e3d6244ac540e0a573bf9c4adc679225a8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -139,6 +139,8 @@ static int kjournald2(void *arg)
 	setup_timer(&journal->j_commit_timer, commit_timeout,
 			(unsigned long)current);
 
+	set_freezable();
+
 	/* Record that the journal thread is running */
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 98ce8124b1cc5e5fb2e61fcf89cc8bd3cc0a7e82..281c72a3b9d50d19161c0f6182d38f67991cce8f 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -44,6 +44,14 @@ struct devfreq_dev_status {
 	void *private_data;
 };
 
+/*
+ * The resulting frequency should be at most this. (this bound is the
+ * least upper bound; thus, the resulting freq should be lower or same)
+ * If the flag is not set, the resulting frequency should be at most the
+ * bound (greatest lower bound)
+ */
+#define DEVFREQ_FLAG_LEAST_UPPER_BOUND		0x1
+
 /**
  * struct devfreq_dev_profile - Devfreq's user device profile
  * @initial_freq	The operating frequency when devfreq_add_device() is
@@ -54,6 +62,8 @@ struct devfreq_dev_status {
  *			higher than any operable frequency, set maximum.
  *			Before returning, target function should set
  *			freq at the current frequency.
+ *			The "flags" parameter's possible values are
+ *			explained above with "DEVFREQ_FLAG_*" macros.
  * @get_dev_status	The device should provide the current performance
  *			status to devfreq, which is used by governors.
  * @exit		An optional callback that is called when devfreq
@@ -66,7 +76,7 @@ struct devfreq_dev_profile {
 	unsigned long initial_freq;
 	unsigned int polling_ms;
 
-	int (*target)(struct device *dev, unsigned long *freq);
+	int (*target)(struct device *dev, unsigned long *freq, u32 flags);
 	int (*get_dev_status)(struct device *dev,
 			      struct devfreq_dev_status *stat);
 	void (*exit)(struct device *dev);
@@ -124,6 +134,8 @@ struct devfreq_governor {
  *		touch this.
  * @being_removed	a flag to mark that this object is being removed in
  *			order to prevent trying to remove the object multiple times.
+ * @min_freq	Limit minimum frequency requested by user (0: none)
+ * @max_freq	Limit maximum frequency requested by user (0: none)
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -149,6 +161,9 @@ struct devfreq {
 	void *data; /* private data for governors */
 
 	bool being_removed;
+
+	unsigned long min_freq;
+	unsigned long max_freq;
 };
 
 #if defined(CONFIG_PM_DEVFREQ)
@@ -160,7 +175,7 @@ extern int devfreq_remove_device(struct devfreq *devfreq);
 
 /* Helper functions for devfreq user device driver with OPP. */
 extern struct opp *devfreq_recommended_opp(struct device *dev,
-					   unsigned long *freq);
+					   unsigned long *freq, u32 flags);
 extern int devfreq_register_opp_notifier(struct device *dev,
 					 struct devfreq *devfreq);
 extern int devfreq_unregister_opp_notifier(struct device *dev,
@@ -200,18 +215,18 @@ struct devfreq_simple_ondemand_data {
 static struct devfreq *devfreq_add_device(struct device *dev,
 					  struct devfreq_dev_profile *profile,
 					  struct devfreq_governor *governor,
-					  void *data);
+					  void *data)
 {
 	return NULL;
 }
 
-static int devfreq_remove_device(struct devfreq *devfreq);
+static int devfreq_remove_device(struct devfreq *devfreq)
 {
 	return 0;
 }
 
 static struct opp *devfreq_recommended_opp(struct device *dev,
-					   unsigned long *freq)
+					   unsigned long *freq, u32 flags)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e4982ac3fbbc4c2fa9f9d54a9bd0a8b088fcffc6..715305e05123fbddfc6c886d05a0c5b2925ff508 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -110,6 +110,10 @@ typedef struct pm_message {
  *	Subsystem-level @suspend() is executed for all devices after invoking
  *	subsystem-level @prepare() for all of them.
  *
+ * @suspend_late: Continue operations started by @suspend().  For a number of
+ *	devices @suspend_late() may point to the same callback routine as the
+ *	runtime suspend callback.
+ *
  * @resume: Executed after waking the system up from a sleep state in which the
  *	contents of main memory were preserved.  The exact action to perform
  *	depends on the device's subsystem, but generally the driver is expected
@@ -122,6 +126,10 @@ typedef struct pm_message {
  *	Subsystem-level @resume() is executed for all devices after invoking
  *	subsystem-level @resume_noirq() for all of them.
  *
+ * @resume_early: Prepare to execute @resume().  For a number of devices
+ *	@resume_early() may point to the same callback routine as the runtime
+ *	resume callback.
+ *
  * @freeze: Hibernation-specific, executed before creating a hibernation image.
  *	Analogous to @suspend(), but it should not enable the device to signal
  *	wakeup events or change its power state.  The majority of subsystems
@@ -131,6 +139,10 @@ typedef struct pm_message {
  *	Subsystem-level @freeze() is executed for all devices after invoking
  *	subsystem-level @prepare() for all of them.
  *
+ * @freeze_late: Continue operations started by @freeze().  Analogous to
+ *	@suspend_late(), but it should not enable the device to signal wakeup
+ *	events or change its power state.
+ *
  * @thaw: Hibernation-specific, executed after creating a hibernation image OR
  *	if the creation of an image has failed.  Also executed after a failing
  *	attempt to restore the contents of main memory from such an image.
@@ -140,15 +152,23 @@ typedef struct pm_message {
  *	subsystem-level @thaw_noirq() for all of them.  It also may be executed
  *	directly after @freeze() in case of a transition error.
  *
+ * @thaw_early: Prepare to execute @thaw().  Undo the changes made by the
+ *	preceding @freeze_late().
+ *
  * @poweroff: Hibernation-specific, executed after saving a hibernation image.
  *	Analogous to @suspend(), but it need not save the device's settings in
  *	memory.
  *	Subsystem-level @poweroff() is executed for all devices after invoking
  *	subsystem-level @prepare() for all of them.
  *
+ * @poweroff_late: Continue operations started by @poweroff().  Analogous to
+ *	@suspend_late(), but it need not save the device's settings in memory.
+ *
  * @restore: Hibernation-specific, executed after restoring the contents of main
  *	memory from a hibernation image, analogous to @resume().
  *
+ * @restore_early: Prepare to execute @restore(), analogous to @resume_early().
+ *
  * @suspend_noirq: Complete the actions started by @suspend().  Carry out any
  *	additional operations required for suspending the device that might be
  *	racing with its driver's interrupt handler, which is guaranteed not to
@@ -158,9 +178,10 @@ typedef struct pm_message {
  *	@suspend_noirq() has returned successfully.  If the device can generate
  *	system wakeup signals and is enabled to wake up the system, it should be
  *	configured to do so at that time.  However, depending on the platform
- *	and device's subsystem, @suspend() may be allowed to put the device into
- *	the low-power state and configure it to generate wakeup signals, in
- *	which case it generally is not necessary to define @suspend_noirq().
+ *	and device's subsystem, @suspend() or @suspend_late() may be allowed to
+ *	put the device into the low-power state and configure it to generate
+ *	wakeup signals, in which case it generally is not necessary to define
+ *	@suspend_noirq().
  *
  * @resume_noirq: Prepare for the execution of @resume() by carrying out any
  *	operations required for resuming the device that might be racing with
@@ -171,9 +192,9 @@ typedef struct pm_message {
  *	additional operations required for freezing the device that might be
  *	racing with its driver's interrupt handler, which is guaranteed not to
  *	run while @freeze_noirq() is being executed.
- *	The power state of the device should not be changed by either @freeze()
- *	or @freeze_noirq() and it should not be configured to signal system
- *	wakeup by any of these callbacks.
+ *	The power state of the device should not be changed by either @freeze(),
+ *	or @freeze_late(), or @freeze_noirq() and it should not be configured to
+ *	signal system wakeup by any of these callbacks.
  *
  * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any
  *	operations required for thawing the device that might be racing with its
@@ -249,6 +270,12 @@ struct dev_pm_ops {
 	int (*thaw)(struct device *dev);
 	int (*poweroff)(struct device *dev);
 	int (*restore)(struct device *dev);
+	int (*suspend_late)(struct device *dev);
+	int (*resume_early)(struct device *dev);
+	int (*freeze_late)(struct device *dev);
+	int (*thaw_early)(struct device *dev);
+	int (*poweroff_late)(struct device *dev);
+	int (*restore_early)(struct device *dev);
 	int (*suspend_noirq)(struct device *dev);
 	int (*resume_noirq)(struct device *dev);
 	int (*freeze_noirq)(struct device *dev);
@@ -293,6 +320,15 @@ const struct dev_pm_ops name = { \
 /*
  * Use this for defining a set of PM operations to be used in all situations
  * (sustem suspend, hibernation or runtime PM).
+ * NOTE: In general, system suspend callbacks, .suspend() and .resume(), should
+ * be different from the corresponding runtime PM callbacks, .runtime_suspend(),
+ * and .runtime_resume(), because .runtime_suspend() always works on an already
+ * quiescent device, while .suspend() should assume that the device may be doing
+ * something when it is called (it should ensure that the device will be
+ * quiescent after it has returned).  Therefore it's better to point the "late"
+ * suspend and "early" resume callback pointers, .suspend_late() and
+ * .resume_early(), to the same routines as .runtime_suspend() and
+ * .runtime_resume(), respectively (and analogously for hibernation).
  */
 #define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
 const struct dev_pm_ops name = { \
@@ -510,6 +546,7 @@ struct dev_pm_info {
 	unsigned long		accounting_timestamp;
 	ktime_t			suspend_time;
 	s64			max_time_suspended_ns;
+	struct dev_pm_qos_request *pq_req;
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
 	struct pm_qos_constraints *constraints;
@@ -584,13 +621,13 @@ struct dev_pm_domain {
 
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
-extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_start(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
 extern void dpm_resume(pm_message_t state);
 extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
-extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_end(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
 extern int dpm_prepare(pm_message_t state);
@@ -605,17 +642,23 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 extern int device_pm_wait_for_dev(struct device *sub, struct device *dev);
 
 extern int pm_generic_prepare(struct device *dev);
+extern int pm_generic_suspend_late(struct device *dev);
 extern int pm_generic_suspend_noirq(struct device *dev);
 extern int pm_generic_suspend(struct device *dev);
+extern int pm_generic_resume_early(struct device *dev);
 extern int pm_generic_resume_noirq(struct device *dev);
 extern int pm_generic_resume(struct device *dev);
 extern int pm_generic_freeze_noirq(struct device *dev);
+extern int pm_generic_freeze_late(struct device *dev);
 extern int pm_generic_freeze(struct device *dev);
 extern int pm_generic_thaw_noirq(struct device *dev);
+extern int pm_generic_thaw_early(struct device *dev);
 extern int pm_generic_thaw(struct device *dev);
 extern int pm_generic_restore_noirq(struct device *dev);
+extern int pm_generic_restore_early(struct device *dev);
 extern int pm_generic_restore(struct device *dev);
 extern int pm_generic_poweroff_noirq(struct device *dev);
+extern int pm_generic_poweroff_late(struct device *dev);
 extern int pm_generic_poweroff(struct device *dev);
 extern void pm_generic_complete(struct device *dev);
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index a03a0ad998b81498b26697927931606d0349e13d..1236d262b3e868700e330af2fcb0783aa08702cd 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -11,6 +11,7 @@
 
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/of.h>
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
@@ -70,6 +71,7 @@ struct generic_pm_domain {
 	s64 break_even_ns;	/* Power break even for the entire domain. */
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	ktime_t power_off_time;
+	struct device_node *of_node; /* Node in device tree */
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
@@ -97,14 +99,15 @@ struct generic_pm_domain_data {
 	struct gpd_dev_ops ops;
 	struct gpd_timing_data td;
 	bool need_restore;
+	bool always_on;
 };
 
+#ifdef CONFIG_PM_GENERIC_DOMAINS
 static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data *pdd)
 {
 	return container_of(pdd, struct generic_pm_domain_data, base);
 }
 
-#ifdef CONFIG_PM_GENERIC_DOMAINS
 static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
 {
 	return to_gpd_data(dev->power.subsys_data->domain_data);
@@ -117,14 +120,25 @@ extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
 				 struct gpd_timing_data *td);
 
+extern int __pm_genpd_of_add_device(struct device_node *genpd_node,
+				    struct device *dev,
+				    struct gpd_timing_data *td);
+
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
 				      struct device *dev)
 {
 	return __pm_genpd_add_device(genpd, dev, NULL);
 }
 
+static inline int pm_genpd_of_add_device(struct device_node *genpd_node,
+					 struct device *dev)
+{
+	return __pm_genpd_of_add_device(genpd_node, dev, NULL);
+}
+
 extern int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 				  struct device *dev);
+extern void pm_genpd_dev_always_on(struct device *dev, bool val);
 extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
@@ -143,6 +157,10 @@ extern bool default_stop_ok(struct device *dev);
 extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
+static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
+{
+	return ERR_PTR(-ENOSYS);
+}
 static inline struct generic_pm_domain *dev_to_genpd(struct device *dev)
 {
 	return ERR_PTR(-ENOSYS);
@@ -163,6 +181,7 @@ static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
+static inline void pm_genpd_dev_always_on(struct device *dev, bool val) {}
 static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 					 struct generic_pm_domain *new_sd)
 {
@@ -183,7 +202,8 @@ static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
 {
 	return -ENOSYS;
 }
-static inline void pm_genpd_init(struct generic_pm_domain *genpd, bool is_off)
+static inline void pm_genpd_init(struct generic_pm_domain *genpd,
+				 struct dev_power_governor *gov, bool is_off)
 {
 }
 static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
@@ -194,6 +214,7 @@ static inline bool default_stop_ok(struct device *dev)
 {
 	return false;
 }
+#define simple_qos_governor NULL
 #define pm_domain_always_on_gov NULL
 #endif
 
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 4d99e4e6ef83fa4910e22959bc9326117ce1288e..2e9191a712f301574a6109541358b3a2d3ae9e52 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -9,12 +9,16 @@
 #include <linux/miscdevice.h>
 #include <linux/device.h>
 
-#define PM_QOS_RESERVED 0
-#define PM_QOS_CPU_DMA_LATENCY 1
-#define PM_QOS_NETWORK_LATENCY 2
-#define PM_QOS_NETWORK_THROUGHPUT 3
+enum {
+	PM_QOS_RESERVED = 0,
+	PM_QOS_CPU_DMA_LATENCY,
+	PM_QOS_NETWORK_LATENCY,
+	PM_QOS_NETWORK_THROUGHPUT,
+
+	/* insert new class ID */
+	PM_QOS_NUM_CLASSES,
+};
 
-#define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
@@ -63,7 +67,6 @@ static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req)
 	return req->dev != 0;
 }
 
-#ifdef CONFIG_PM
 int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 			 enum pm_qos_req_action action, int value);
 void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class,
@@ -78,6 +81,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier);
 int pm_qos_request_active(struct pm_qos_request *req);
 s32 pm_qos_read_value(struct pm_qos_constraints *c);
 
+#ifdef CONFIG_PM
 s32 __dev_pm_qos_read_value(struct device *dev);
 s32 dev_pm_qos_read_value(struct device *dev);
 int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req,
@@ -95,45 +99,6 @@ void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
 				    struct dev_pm_qos_request *req, s32 value);
 #else
-static inline int pm_qos_update_target(struct pm_qos_constraints *c,
-				       struct plist_node *node,
-				       enum pm_qos_req_action action,
-				       int value)
-			{ return 0; }
-static inline void pm_qos_add_request(struct pm_qos_request *req,
-				      int pm_qos_class, s32 value)
-			{ return; }
-static inline void pm_qos_update_request(struct pm_qos_request *req,
-					 s32 new_value)
-			{ return; }
-static inline void pm_qos_remove_request(struct pm_qos_request *req)
-			{ return; }
-
-static inline int pm_qos_request(int pm_qos_class)
-{
-	switch (pm_qos_class) {
-	case PM_QOS_CPU_DMA_LATENCY:
-		return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE;
-	case PM_QOS_NETWORK_LATENCY:
-		return PM_QOS_NETWORK_LAT_DEFAULT_VALUE;
-	case PM_QOS_NETWORK_THROUGHPUT:
-		return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE;
-	default:
-		return PM_QOS_DEFAULT_VALUE;
-	}
-}
-
-static inline int pm_qos_add_notifier(int pm_qos_class,
-				      struct notifier_block *notifier)
-			{ return 0; }
-static inline int pm_qos_remove_notifier(int pm_qos_class,
-					 struct notifier_block *notifier)
-			{ return 0; }
-static inline int pm_qos_request_active(struct pm_qos_request *req)
-			{ return 0; }
-static inline s32 pm_qos_read_value(struct pm_qos_constraints *c)
-			{ return 0; }
-
 static inline s32 __dev_pm_qos_read_value(struct device *dev)
 			{ return 0; }
 static inline s32 dev_pm_qos_read_value(struct device *dev)
@@ -172,4 +137,13 @@ static inline int dev_pm_qos_add_ancestor_request(struct device *dev,
 			{ return 0; }
 #endif
 
+#ifdef CONFIG_PM_RUNTIME
+int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value);
+void dev_pm_qos_hide_latency_limit(struct device *dev);
+#else
+static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
+			{ return 0; }
+static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {}
+#endif
+
 #endif
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index a32da962d6932963196cf0ea3783d27db33c9fbe..d9f05113e5fb85204df5f45ee28d17e4596f31ee 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -41,7 +41,7 @@
  * @active: Status of the wakeup source.
  */
 struct wakeup_source {
-	char 			*name;
+	const char 		*name;
 	struct list_head	entry;
 	spinlock_t		lock;
 	struct timer_list	timer;
@@ -73,7 +73,9 @@ static inline bool device_may_wakeup(struct device *dev)
 }
 
 /* drivers/base/power/wakeup.c */
+extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name);
 extern struct wakeup_source *wakeup_source_create(const char *name);
+extern void wakeup_source_drop(struct wakeup_source *ws);
 extern void wakeup_source_destroy(struct wakeup_source *ws);
 extern void wakeup_source_add(struct wakeup_source *ws);
 extern void wakeup_source_remove(struct wakeup_source *ws);
@@ -103,11 +105,16 @@ static inline bool device_can_wakeup(struct device *dev)
 	return dev->power.can_wakeup;
 }
 
+static inline void wakeup_source_prepare(struct wakeup_source *ws,
+					 const char *name) {}
+
 static inline struct wakeup_source *wakeup_source_create(const char *name)
 {
 	return NULL;
 }
 
+static inline void wakeup_source_drop(struct wakeup_source *ws) {}
+
 static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
 
 static inline void wakeup_source_add(struct wakeup_source *ws) {}
@@ -165,4 +172,17 @@ static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
 
 #endif /* !CONFIG_PM_SLEEP */
 
+static inline void wakeup_source_init(struct wakeup_source *ws,
+				      const char *name)
+{
+	wakeup_source_prepare(ws, name);
+	wakeup_source_add(ws);
+}
+
+static inline void wakeup_source_trash(struct wakeup_source *ws)
+{
+	wakeup_source_remove(ws);
+	wakeup_source_drop(ws);
+}
+
 #endif /* _LINUX_PM_WAKEUP_H */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 91784a4f860852d01f958ecce70439e381dcaabf..ac1c114c499d088e0e9d7bed9f214abe9d924cf2 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -42,8 +42,10 @@ enum suspend_stat_step {
 	SUSPEND_FREEZE = 1,
 	SUSPEND_PREPARE,
 	SUSPEND_SUSPEND,
+	SUSPEND_SUSPEND_LATE,
 	SUSPEND_SUSPEND_NOIRQ,
 	SUSPEND_RESUME_NOIRQ,
+	SUSPEND_RESUME_EARLY,
 	SUSPEND_RESUME
 };
 
@@ -53,8 +55,10 @@ struct suspend_stats {
 	int	failed_freeze;
 	int	failed_prepare;
 	int	failed_suspend;
+	int	failed_suspend_late;
 	int	failed_suspend_noirq;
 	int	failed_resume;
+	int	failed_resume_early;
 	int	failed_resume_noirq;
 #define	REC_FAILED_NUM	2
 	int	last_failed_dev;
diff --git a/kernel/exit.c b/kernel/exit.c
index ce5f758f40bd81a0975a4690a0d500ad850b925b..0ed15fed579f81754c5cb51b8f4d06e89406de99 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -424,7 +424,7 @@ void daemonize(const char *name, ...)
 	 */
 	exit_mm(current);
 	/*
-	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
+	 * We don't want to get frozen, in case system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
 	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 9815b8d1eed5536e312c561a0c066578e359bd51..11f82a4d4eae07b4d98595852ea50dcf1c028a6b 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -99,9 +99,9 @@ static void fake_signal_wake_up(struct task_struct *p)
  * freeze_task - send a freeze request to given task
  * @p: task to send the request to
  *
- * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE
- * flag and either sending a fake signal to it or waking it up, depending
- * on whether it has %PF_FREEZER_NOSIG set.
+ * If @p is freezing, the freeze request is sent either by sending a fake
+ * signal (if it's not a kernel thread) or waking it up (if it's a kernel
+ * thread).
  *
  * RETURNS:
  * %false, if @p is not freezing or already frozen; %true, otherwise
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 7b0886786701b845cfbd8823252392e40b11a1c8..a6a675cb9818ad43728ba7246472f62b44fcd23b 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1546,13 +1546,13 @@ int kernel_kexec(void)
 		if (error)
 			goto Resume_console;
 		/* At this point, dpm_suspend_start() has been called,
-		 * but *not* dpm_suspend_noirq(). We *must* call
-		 * dpm_suspend_noirq() now.  Otherwise, drivers for
+		 * but *not* dpm_suspend_end(). We *must* call
+		 * dpm_suspend_end() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = dpm_suspend_noirq(PMSG_FREEZE);
+		error = dpm_suspend_end(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1579,7 +1579,7 @@ int kernel_kexec(void)
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		dpm_resume_noirq(PMSG_RESTORE);
+		dpm_resume_start(PMSG_RESTORE);
  Resume_devices:
 		dpm_resume_end(PMSG_RESTORE);
  Resume_console:
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 07e0e28ffba7644b8b97e037d05b9d9aa8e5df46..66d808ec525234bfee1433a4df2c33ff9c399201 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,7 +1,8 @@
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
-obj-$(CONFIG_PM)		+= main.o qos.o
+obj-y				+= qos.o
+obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP)	+= console.o
 obj-$(CONFIG_FREEZER)		+= process.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 6d6d28870335831fa05c80428fefc05e20be3f69..0a186cfde7884e6b4f0f3d09e1b3d549e29dafb8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -245,8 +245,8 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
  * create_image - Create a hibernation image.
  * @platform_mode: Whether or not to use the platform driver.
  *
- * Execute device drivers' .freeze_noirq() callbacks, create a hibernation image
- * and execute the drivers' .thaw_noirq() callbacks.
+ * Execute device drivers' "late" and "noirq" freeze callbacks, create a
+ * hibernation image and run the drivers' "noirq" and "early" thaw callbacks.
  *
  * Control reappears in this routine after the subsequent restore.
  */
@@ -254,7 +254,7 @@ static int create_image(int platform_mode)
 {
 	int error;
 
-	error = dpm_suspend_noirq(PMSG_FREEZE);
+	error = dpm_suspend_end(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -306,7 +306,7 @@ static int create_image(int platform_mode)
  Platform_finish:
 	platform_finish(platform_mode);
 
-	dpm_resume_noirq(in_suspend ?
+	dpm_resume_start(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -343,13 +343,13 @@ int hibernation_snapshot(int platform_mode)
 		 * successful freezer test.
 		 */
 		freezer_test_done = true;
-		goto Cleanup;
+		goto Thaw;
 	}
 
 	error = dpm_prepare(PMSG_FREEZE);
 	if (error) {
 		dpm_complete(PMSG_RECOVER);
-		goto Cleanup;
+		goto Thaw;
 	}
 
 	suspend_console();
@@ -385,6 +385,8 @@ int hibernation_snapshot(int platform_mode)
 	platform_end(platform_mode);
 	return error;
 
+ Thaw:
+	thaw_kernel_threads();
  Cleanup:
 	swsusp_free();
 	goto Close;
@@ -394,16 +396,16 @@ int hibernation_snapshot(int platform_mode)
  * resume_target_kernel - Restore system state from a hibernation image.
  * @platform_mode: Whether or not to use the platform driver.
  *
- * Execute device drivers' .freeze_noirq() callbacks, restore the contents of
- * highmem that have not been restored yet from the image and run the low-level
- * code that will restore the remaining contents of memory and switch to the
- * just restored target kernel.
+ * Execute device drivers' "noirq" and "late" freeze callbacks, restore the
+ * contents of highmem that have not been restored yet from the image and run
+ * the low-level code that will restore the remaining contents of memory and
+ * switch to the just restored target kernel.
  */
 static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	error = dpm_suspend_noirq(PMSG_QUIESCE);
+	error = dpm_suspend_end(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -460,7 +462,7 @@ static int resume_target_kernel(bool platform_mode)
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	dpm_resume_noirq(PMSG_RECOVER);
+	dpm_resume_start(PMSG_RECOVER);
 
 	return error;
 }
@@ -518,7 +520,7 @@ int hibernation_platform_enter(void)
 		goto Resume_devices;
 	}
 
-	error = dpm_suspend_noirq(PMSG_HIBERNATE);
+	error = dpm_suspend_end(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -549,7 +551,7 @@ int hibernation_platform_enter(void)
  Platform_finish:
 	hibernation_ops->finish();
 
-	dpm_resume_noirq(PMSG_RESTORE);
+	dpm_resume_start(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
@@ -616,7 +618,7 @@ int hibernate(void)
 	/* Allocate memory management structures */
 	error = create_basic_memory_bitmaps();
 	if (error)
-		goto Exit;
+		goto Enable_umh;
 
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
@@ -624,15 +626,11 @@ int hibernate(void)
 
 	error = freeze_processes();
 	if (error)
-		goto Finish;
+		goto Free_bitmaps;
 
 	error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
-	if (error)
-		goto Thaw;
-	if (freezer_test_done) {
-		freezer_test_done = false;
+	if (error || freezer_test_done)
 		goto Thaw;
-	}
 
 	if (in_suspend) {
 		unsigned int flags = 0;
@@ -657,8 +655,13 @@ int hibernate(void)
 
  Thaw:
 	thaw_processes();
- Finish:
+
+	/* Don't bother checking whether freezer_test_done is true */
+	freezer_test_done = false;
+
+ Free_bitmaps:
 	free_basic_memory_bitmaps();
+ Enable_umh:
 	usermodehelper_enable();
  Exit:
 	pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 9824b41e5a18cf5ecc5e57773b5ccc2195984c99..1c12581f1c62ce56bfe61bae08d58745309b0b9d 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -165,16 +165,20 @@ static int suspend_stats_show(struct seq_file *s, void *unused)
 	last_errno %= REC_FAILED_NUM;
 	last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
 	last_step %= REC_FAILED_NUM;
-	seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
-			"%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
+	seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
+			"%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
 			"success", suspend_stats.success,
 			"fail", suspend_stats.fail,
 			"failed_freeze", suspend_stats.failed_freeze,
 			"failed_prepare", suspend_stats.failed_prepare,
 			"failed_suspend", suspend_stats.failed_suspend,
+			"failed_suspend_late",
+				suspend_stats.failed_suspend_late,
 			"failed_suspend_noirq",
 				suspend_stats.failed_suspend_noirq,
 			"failed_resume", suspend_stats.failed_resume,
+			"failed_resume_early",
+				suspend_stats.failed_resume_early,
 			"failed_resume_noirq",
 				suspend_stats.failed_resume_noirq);
 	seq_printf(s,	"failures:\n  last_failed_dev:\t%-s\n",
@@ -287,16 +291,10 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 #ifdef CONFIG_SUSPEND
 	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
-		if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
+		if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
+			error = pm_suspend(state);
 			break;
-	}
-	if (state < PM_SUSPEND_MAX && *s) {
-		error = enter_state(state);
-		if (error) {
-			suspend_stats.fail++;
-			dpm_save_failed_errno(error);
-		} else
-			suspend_stats.success++;
+		}
 	}
 #endif
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 21724eee5206f7ffe6f780ba6c0c8acb8e7a0930..98f3622d740713dcf1b6bdfa170b00885d90f121 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -177,13 +177,11 @@ extern const char *const pm_states[];
 
 extern bool valid_state(suspend_state_t state);
 extern int suspend_devices_and_enter(suspend_state_t state);
-extern int enter_state(suspend_state_t state);
 #else /* !CONFIG_SUSPEND */
 static inline int suspend_devices_and_enter(suspend_state_t state)
 {
 	return -ENOSYS;
 }
-static inline int enter_state(suspend_state_t state) { return -ENOSYS; }
 static inline bool valid_state(suspend_state_t state) { return false; }
 #endif /* !CONFIG_SUSPEND */
 
@@ -234,16 +232,14 @@ static inline int suspend_freeze_processes(void)
 	int error;
 
 	error = freeze_processes();
-
 	/*
 	 * freeze_processes() automatically thaws every task if freezing
 	 * fails. So we need not do anything extra upon error.
 	 */
 	if (error)
-		goto Finish;
+		return error;
 
 	error = freeze_kernel_threads();
-
 	/*
 	 * freeze_kernel_threads() thaws only kernel threads upon freezing
 	 * failure. So we have to thaw the userspace tasks ourselves.
@@ -251,7 +247,6 @@ static inline int suspend_freeze_processes(void)
 	if (error)
 		thaw_processes();
 
- Finish:
 	return error;
 }
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7e426459e60a21633d8eeff0d9751455303bdf97..0d2aeb2261082f52d4ead619f352f03c03c35001 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -53,11 +53,9 @@ static int try_to_freeze_tasks(bool user_only)
 			 * It is "frozen enough".  If the task does wake
 			 * up, it will immediately call try_to_freeze.
 			 *
-			 * Because freeze_task() goes through p's
-			 * scheduler lock after setting TIF_FREEZE, it's
-			 * guaranteed that either we see TASK_RUNNING or
-			 * try_to_stop() after schedule() in ptrace/signal
-			 * stop sees TIF_FREEZE.
+			 * Because freeze_task() goes through p's scheduler lock, it's
+			 * guaranteed that TASK_STOPPED/TRACED -> TASK_RUNNING
+			 * transition can't race with task state testing here.
 			 */
 			if (!task_is_stopped_or_traced(p) &&
 			    !freezer_should_skip(p))
@@ -98,13 +96,15 @@ static int try_to_freeze_tasks(bool user_only)
 		       elapsed_csecs / 100, elapsed_csecs % 100,
 		       todo - wq_busy, wq_busy);
 
-		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
-			if (!wakeup && !freezer_should_skip(p) &&
-			    p != current && freezing(p) && !frozen(p))
-				sched_show_task(p);
-		} while_each_thread(g, p);
-		read_unlock(&tasklist_lock);
+		if (!wakeup) {
+			read_lock(&tasklist_lock);
+			do_each_thread(g, p) {
+				if (p != current && !freezer_should_skip(p)
+				    && freezing(p) && !frozen(p))
+					sched_show_task(p);
+			} while_each_thread(g, p);
+			read_unlock(&tasklist_lock);
+		}
 	} else {
 		printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100,
 			elapsed_csecs % 100);
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 995e3bd3417b930902317f9cb0e23dfa4811c36c..d6d6dbd1ecc09f673d987600981c549e5332ad50 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -469,21 +469,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
 static int __init pm_qos_power_init(void)
 {
 	int ret = 0;
+	int i;
 
-	ret = register_pm_qos_misc(&cpu_dma_pm_qos);
-	if (ret < 0) {
-		printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n");
-		return ret;
-	}
-	ret = register_pm_qos_misc(&network_lat_pm_qos);
-	if (ret < 0) {
-		printk(KERN_ERR "pm_qos_param: network_latency setup failed\n");
-		return ret;
+	BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
+
+	for (i = 1; i < PM_QOS_NUM_CLASSES; i++) {
+		ret = register_pm_qos_misc(pm_qos_array[i]);
+		if (ret < 0) {
+			printk(KERN_ERR "pm_qos_param: %s setup failed\n",
+			       pm_qos_array[i]->name);
+			return ret;
+		}
 	}
-	ret = register_pm_qos_misc(&network_throughput_pm_qos);
-	if (ret < 0)
-		printk(KERN_ERR
-			"pm_qos_param: network_throughput setup failed\n");
 
 	return ret;
 }
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3a564ac85f36cf4bcc80a9d0e260d6d9d2c3609e..0de28576807df9e7dc5be5f05d75a0faa937caf2 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -711,9 +711,10 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 	list_for_each_entry(region, &nosave_regions, list) {
 		unsigned long pfn;
 
-		pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
-				region->start_pfn << PAGE_SHIFT,
-				region->end_pfn << PAGE_SHIFT);
+		pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
+			 (unsigned long long) region->start_pfn << PAGE_SHIFT,
+			 ((unsigned long long) region->end_pfn << PAGE_SHIFT)
+				- 1);
 
 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 			if (pfn_valid(pfn)) {
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4fd51beed87955f938f7b6545e49ffec0430da56..88e5c967370d31e359cbd06f23914735c11f15b2 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -37,8 +37,8 @@ const char *const pm_states[PM_SUSPEND_MAX] = {
 static const struct platform_suspend_ops *suspend_ops;
 
 /**
- *	suspend_set_ops - Set the global suspend method table.
- *	@ops:	Pointer to ops structure.
+ * suspend_set_ops - Set the global suspend method table.
+ * @ops: Suspend operations to use.
  */
 void suspend_set_ops(const struct platform_suspend_ops *ops)
 {
@@ -58,11 +58,11 @@ bool valid_state(suspend_state_t state)
 }
 
 /**
- * suspend_valid_only_mem - generic memory-only valid callback
+ * suspend_valid_only_mem - Generic memory-only valid callback.
  *
- * Platform drivers that implement mem suspend only and only need
- * to check for that in their .valid callback can use this instead
- * of rolling their own .valid callback.
+ * Platform drivers that implement mem suspend only and only need to check for
+ * that in their .valid() callback can use this instead of rolling their own
+ * .valid() callback.
  */
 int suspend_valid_only_mem(suspend_state_t state)
 {
@@ -83,10 +83,11 @@ static int suspend_test(int level)
 }
 
 /**
- *	suspend_prepare - Do prep work before entering low-power state.
+ * suspend_prepare - Prepare for entering system sleep state.
  *
- *	This is common code that is called for each state that we're entering.
- *	Run suspend notifiers, allocate a console and stop all processes.
+ * Common code run for every system sleep state that can be entered (except for
+ * hibernation).  Run suspend notifiers, allocate the "suspend" console and
+ * freeze processes.
  */
 static int suspend_prepare(void)
 {
@@ -131,9 +132,9 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
 }
 
 /**
- * suspend_enter - enter the desired system sleep state.
- * @state: State to enter
- * @wakeup: Returns information that suspend should not be entered again.
+ * suspend_enter - Make the system enter the given sleep state.
+ * @state: System sleep state to enter.
+ * @wakeup: Returns information that the sleep state should not be re-entered.
  *
  * This function should be called after devices have been suspended.
  */
@@ -147,7 +148,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 			goto Platform_finish;
 	}
 
-	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	error = dpm_suspend_end(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
 		goto Platform_finish;
@@ -189,7 +190,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	if (suspend_ops->wake)
 		suspend_ops->wake();
 
-	dpm_resume_noirq(PMSG_RESUME);
+	dpm_resume_start(PMSG_RESUME);
 
  Platform_finish:
 	if (suspend_ops->finish)
@@ -199,9 +200,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 }
 
 /**
- *	suspend_devices_and_enter - suspend devices and enter the desired system
- *				    sleep state.
- *	@state:		  state to enter
+ * suspend_devices_and_enter - Suspend devices and enter system sleep state.
+ * @state: System sleep state to enter.
  */
 int suspend_devices_and_enter(suspend_state_t state)
 {
@@ -251,10 +251,10 @@ int suspend_devices_and_enter(suspend_state_t state)
 }
 
 /**
- *	suspend_finish - Do final work before exiting suspend sequence.
+ * suspend_finish - Clean up before finishing the suspend sequence.
  *
- *	Call platform code to clean up, restart processes, and free the
- *	console that we've allocated. This is not called for suspend-to-disk.
+ * Call platform code to clean up, restart processes, and free the console that
+ * we've allocated. This routine is not called for hibernation.
  */
 static void suspend_finish(void)
 {
@@ -265,16 +265,14 @@ static void suspend_finish(void)
 }
 
 /**
- *	enter_state - Do common work of entering low-power state.
- *	@state:		pm_state structure for state we're entering.
+ * enter_state - Do common work needed to enter system sleep state.
+ * @state: System sleep state to enter.
  *
- *	Make sure we're the only ones trying to enter a sleep state. Fail
- *	if someone has beat us to it, since we don't want anything weird to
- *	happen when we wake up.
- *	Then, do the setup for suspend, enter the state, and cleaup (after
- *	we've woken up).
+ * Make sure that no one else is trying to put the system into a sleep state.
+ * Fail if that's not the case.  Otherwise, prepare for system suspend, make the
+ * system enter the given sleep state and clean up after wakeup.
  */
-int enter_state(suspend_state_t state)
+static int enter_state(suspend_state_t state)
 {
 	int error;
 
@@ -310,24 +308,26 @@ int enter_state(suspend_state_t state)
 }
 
 /**
- *	pm_suspend - Externally visible function for suspending system.
- *	@state:		Enumerated value of state to enter.
+ * pm_suspend - Externally visible function for suspending the system.
+ * @state: System sleep state to enter.
  *
- *	Determine whether or not value is within range, get state
- *	structure, and enter (above).
+ * Check if the value of @state represents one of the supported states,
+ * execute enter_state() and update system suspend statistics.
  */
 int pm_suspend(suspend_state_t state)
 {
-	int ret;
-	if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) {
-		ret = enter_state(state);
-		if (ret) {
-			suspend_stats.fail++;
-			dpm_save_failed_errno(ret);
-		} else
-			suspend_stats.success++;
-		return ret;
+	int error;
+
+	if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
+		return -EINVAL;
+
+	error = enter_state(state);
+	if (error) {
+		suspend_stats.fail++;
+		dpm_save_failed_errno(error);
+	} else {
+		suspend_stats.success++;
 	}
-	return -EINVAL;
+	return error;
 }
 EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3e100075b13cb6ab449034af458266e20dcaa369..33c4329205af0e88da9e80ee98e9ff0e1259bf7d 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -249,16 +249,10 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		}
 		pm_restore_gfp_mask();
 		error = hibernation_snapshot(data->platform_support);
-		if (error) {
-			thaw_kernel_threads();
-		} else {
+		if (!error) {
 			error = put_user(in_suspend, (int __user *)arg);
-			if (!error && !freezer_test_done)
-				data->ready = 1;
-			if (freezer_test_done) {
-				freezer_test_done = false;
-				thaw_kernel_threads();
-			}
+			data->ready = !freezer_test_done && !error;
+			freezer_test_done = false;
 		}
 		break;