diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ce00c570459de45a19b0ae97c56aca85a84d36f2..7be67ef4b84f95ee5da32b86254a895fcde1f502 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -374,6 +374,14 @@ config PLAT_IOP
 
 source arch/arm/mm/Kconfig
 
+config IWMMXT
+	bool "Enable iWMMXt support"
+	depends CPU_XSCALE || CPU_XSC3
+	default y if PXA27x
+	help
+	  Enable support for iWMMXt context switching at run time if
+	  running on a CPU that supports it.
+
 #  bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER
 config XSCALE_PMU
 	bool
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 1320a0efca738eea92013b0d9461913d758205c7..ab06a86e85d58c71e17fb223e8e8ef4b45e22587 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -24,7 +24,9 @@ obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
 AFLAGS_crunch-bits.o		:= -Wa,-mcpu=ep9312
 
-obj-$(CONFIG_IWMMXT)		+= iwmmxt.o iwmmxt-notifier.o
+obj-$(CONFIG_CPU_XSCALE)	+= xscale-cp0.o
+obj-$(CONFIG_CPU_XSC3)		+= xscale-cp0.o
+obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 
 ifneq ($(CONFIG_ARCH_EBSA110),y)
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index bd623b73445f79a9b37018e0da7d320d003c2a7c..2db42b18f53f060a1040ba65cf01114793535ad9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -589,10 +589,6 @@ ENTRY(__switch_to)
 	strex	r5, r4, [ip]			@ Clear exclusive monitor
 #endif
 #endif
-#if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
-	mra	r4, r5, acc0
-	stmia   ip, {r4, r5}
-#endif
 #if defined(CONFIG_HAS_TLS_REG)
 	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
 #elif !defined(CONFIG_TLS_REG_EMUL)
@@ -601,11 +597,6 @@ ENTRY(__switch_to)
 #endif
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
-#endif
-#if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
-	add	r4, r2, #TI_CPU_DOMAIN + 40	@ cpu_context_save->extra
-	ldmib	r4, {r4, r5}
-	mar	acc0, r4, r5
 #endif
 	mov	r5, r0
 	add	r4, r2, #TI_CPU_SAVE
diff --git a/arch/arm/kernel/iwmmxt-notifier.c b/arch/arm/kernel/iwmmxt-notifier.c
deleted file mode 100644
index 0d1a1db40062990a60bdd93d691a047941a6d4c1..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/iwmmxt-notifier.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  linux/arch/arm/kernel/iwmmxt-notifier.c
- *
- *  XScale iWMMXt (Concan) context switching and handling
- *
- *  Initial code:
- *  Copyright (c) 2003, Intel Corporation
- *
- *  Full lazy switching support, optimizations and more, by Nicolas Pitre
- *  Copyright (c) 2003-2004, MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/thread_notify.h>
-#include <asm/io.h>
-
-static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
-{
-	struct thread_info *thread = t;
-
-	switch (cmd) {
-	case THREAD_NOTIFY_FLUSH:
-		/*
-		 * flush_thread() zeroes thread->fpstate, so no need
-		 * to do anything here.
-		 *
-		 * FALLTHROUGH: Ensure we don't try to overwrite our newly
-		 * initialised state information on the first fault.
-		 */
-
-	case THREAD_NOTIFY_RELEASE:
-		iwmmxt_task_release(thread);
-		break;
-
-	case THREAD_NOTIFY_SWITCH:
-		iwmmxt_task_switch(thread);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block iwmmxt_notifier_block = {
-	.notifier_call	= iwmmxt_do,
-};
-
-static int __init iwmmxt_init(void)
-{
-	thread_register_notifier(&iwmmxt_notifier_block);
-
-	return 0;
-}
-
-late_initcall(iwmmxt_init);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 29efc9f82057c534d5ece4868e29ff00b0854e9f..6f12d2686aaf586d39aebb429a9d9138c58568b9 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -357,9 +357,6 @@ static void __init setup_processor(void)
 #ifndef CONFIG_VFP
 	elf_hwcap &= ~HWCAP_VFP;
 #endif
-#ifndef CONFIG_IWMMXT
-	elf_hwcap &= ~HWCAP_IWMMXT;
-#endif
 
 	cpu_proc_init();
 }
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
new file mode 100644
index 0000000000000000000000000000000000000000..180000bfdc8fcd0e5f1ec6c54a2995e8126f3b6e
--- /dev/null
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -0,0 +1,179 @@
+/*
+ * linux/arch/arm/kernel/xscale-cp0.c
+ *
+ * XScale DSP and iWMMXt coprocessor context switching and handling
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <asm/thread_notify.h>
+#include <asm/io.h>
+
+static inline void dsp_save_state(u32 *state)
+{
+	__asm__ __volatile__ (
+		"mrrc	p0, 0, %0, %1, c0\n"
+		: "=r" (state[0]), "=r" (state[1]));
+}
+
+static inline void dsp_load_state(u32 *state)
+{
+	__asm__ __volatile__ (
+		"mcrr	p0, 0, %0, %1, c0\n"
+		: : "r" (state[0]), "r" (state[1]));
+}
+
+static int dsp_do(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		thread->cpu_context.extra[0] = 0;
+		thread->cpu_context.extra[1] = 0;
+		break;
+
+	case THREAD_NOTIFY_SWITCH:
+		dsp_save_state(current_thread_info()->cpu_context.extra);
+		dsp_load_state(thread->cpu_context.extra);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dsp_notifier_block = {
+	.notifier_call	= dsp_do,
+};
+
+
+#ifdef CONFIG_IWMMXT
+static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
+{
+	struct thread_info *thread = t;
+
+	switch (cmd) {
+	case THREAD_NOTIFY_FLUSH:
+		/*
+		 * flush_thread() zeroes thread->fpstate, so no need
+		 * to do anything here.
+		 *
+		 * FALLTHROUGH: Ensure we don't try to overwrite our newly
+		 * initialised state information on the first fault.
+		 */
+
+	case THREAD_NOTIFY_RELEASE:
+		iwmmxt_task_release(thread);
+		break;
+
+	case THREAD_NOTIFY_SWITCH:
+		iwmmxt_task_switch(thread);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block iwmmxt_notifier_block = {
+	.notifier_call	= iwmmxt_do,
+};
+#endif
+
+
+static u32 __init xscale_cp_access_read(void)
+{
+	u32 value;
+
+	__asm__ __volatile__ (
+		"mrc	p15, 0, %0, c15, c1, 0\n\t"
+		: "=r" (value));
+
+	return value;
+}
+
+static void __init xscale_cp_access_write(u32 value)
+{
+	u32 temp;
+
+	__asm__ __volatile__ (
+		"mcr	p15, 0, %1, c15, c1, 0\n\t"
+		"mrc	p15, 0, %0, c15, c1, 0\n\t"
+		"mov	%0, %0\n\t"
+		"sub	pc, pc, #4\n\t"
+		: "=r" (temp) : "r" (value));
+}
+
+/*
+ * Detect whether we have a MAC coprocessor (40 bit register) or an
+ * iWMMXt coprocessor (64 bit registers) by loading 00000100:00000000
+ * into a coprocessor register and reading it back, and checking
+ * whether the upper word survived intact.
+ */
+static int __init cpu_has_iwmmxt(void)
+{
+	u32 lo;
+	u32 hi;
+
+	/*
+	 * This sequence is interpreted by the DSP coprocessor as:
+	 *	mar	acc0, %2, %3
+	 *	mra	%0, %1, acc0
+	 *
+	 * And by the iWMMXt coprocessor as:
+	 *	tmcrr	wR0, %2, %3
+	 *	tmrrc	%0, %1, wR0
+	 */
+	__asm__ __volatile__ (
+		"mcrr	p0, 0, %2, %3, c0\n"
+		"mrrc	p0, 0, %0, %1, c0\n"
+		: "=r" (lo), "=r" (hi)
+		: "r" (0), "r" (0x100));
+
+	return !!hi;
+}
+
+
+/*
+ * If we detect that the CPU has iWMMXt (and CONFIG_IWMMXT=y), we
+ * disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy
+ * switch code handle iWMMXt context switching.  If on the other
+ * hand the CPU has a DSP coprocessor, we keep access to CP0 enabled
+ * all the time, and save/restore acc0 on context switch in non-lazy
+ * fashion.
+ */
+static int __init xscale_cp0_init(void)
+{
+	u32 cp_access;
+
+	cp_access = xscale_cp_access_read() & ~3;
+	xscale_cp_access_write(cp_access | 1);
+
+	if (cpu_has_iwmmxt()) {
+#ifndef CONFIG_IWMMXT
+		printk(KERN_WARNING "CAUTION: XScale iWMMXt coprocessor "
+			"detected, but kernel support is missing.\n");
+#else
+		printk(KERN_INFO "XScale iWMMXt coprocessor detected.\n");
+		elf_hwcap |= HWCAP_IWMMXT;
+		thread_register_notifier(&iwmmxt_notifier_block);
+#endif
+	} else {
+		printk(KERN_INFO "XScale DSP coprocessor detected.\n");
+		thread_register_notifier(&dsp_notifier_block);
+		cp_access |= 1;
+	}
+
+	xscale_cp_access_write(cp_access);
+
+	return 0;
+}
+
+late_initcall(xscale_cp0_init);
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 03d07cae26c827285f9407e47c9b46504d6a08b1..9e3d0bdcba071acd5e8e185aa933417367ed8d08 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -13,12 +13,10 @@ config ARCH_LUBBOCK
 config MACH_LOGICPD_PXA270
 	bool "LogicPD PXA270 Card Engine Development Platform"
 	select PXA27x
-	select IWMMXT
 
 config MACH_MAINSTONE
 	bool "Intel HCDDBBVA0 Development Platform"
 	select PXA27x
-	select IWMMXT
 
 config ARCH_PXA_IDP
 	bool "Accelent Xscale IDP"
@@ -53,7 +51,6 @@ config PXA_SHARPSL_25x
 config PXA_SHARPSL_27x
 	bool "Sharp PXA270 models (SL-Cxx00)"
 	select PXA27x
-	select IWMMXT
 
 endchoice
 
@@ -129,11 +126,6 @@ config PXA27x
 	help
 	  Select code specific to PXA27x variants
 
-config IWMMXT
-	bool
-	help
-	  Enable support for iWMMXt
-
 config PXA_SHARP_C7xx
 	bool
 	select PXA_SSP
diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c
index 2112c414f0e212588742c1be02c2a312772856a8..b4d8276d6050b1e34d16c06153228f03c0b113a3 100644
--- a/arch/arm/mach-pxa/pm.c
+++ b/arch/arm/mach-pxa/pm.c
@@ -83,7 +83,8 @@ int pxa_pm_enter(suspend_state_t state)
 
 #ifdef CONFIG_IWMMXT
 	/* force any iWMMXt context to ram **/
-	iwmmxt_task_disable(NULL);
+	if (elf_hwcap & HWCAP_IWMMXT)
+		iwmmxt_task_disable(NULL);
 #endif
 
 	/* preserve current time */
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 906e9de7f83e827aa63e7121832ca7a6b69451f4..cc1004b3e5117cdcfa9a6d365406de93ebdb1722 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -491,12 +491,7 @@ __xscale_setup:
 	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I, D caches & BTB
 	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I, D TLBs
-#ifdef CONFIG_IWMMXT
-	mov	r0, #0				@ initially disallow access to CP0/CP1
-#else
-	mov	r0, #1				@ Allow access to CP0
-#endif
-	orr     r0, r0, #1 << 6			@ cp6 for IOP3xx and Bulverde
+	mov	r0, #1 << 6			@ cp6 for IOP3xx and Bulverde
 	orr	r0, r0, #1 << 13		@ Its undefined whether this
 	mcr	p15, 0, r0, c15, c1, 0		@ affects USR or SVC modes
 
@@ -909,7 +904,7 @@ __pxa270_proc_info:
 	b	__xscale_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_IWMMXT
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
 	.long	cpu_pxa270_name
 	.long	xscale_processor_functions
 	.long	v4wbi_tlb_fns
diff --git a/include/asm-arm/elf.h b/include/asm-arm/elf.h
index f3929307a56b6a06fe1b95bbe2a07fed32803c69..642382d2c9f051eec79f4b13ffcfc7cbc4a42432 100644
--- a/include/asm-arm/elf.h
+++ b/include/asm-arm/elf.h
@@ -114,40 +114,24 @@ extern char elf_platform[];
    have no such handler.  */
 #define ELF_PLAT_INIT(_r, load_addr)	(_r)->ARM_r0 = 0
 
-#ifndef CONFIG_IWMMXT
-
-/* Old NetWinder binaries were compiled in such a way that the iBCS
-   heuristic always trips on them.  Until these binaries become uncommon
-   enough not to care, don't trust the `ibcs' flag here.  In any case
-   there is no other ELF system currently supported by iBCS.
-   @@ Could print a warning message to encourage users to upgrade.  */
-#define SET_PERSONALITY(ex,ibcs2) \
-	set_personality(((ex).e_flags & EF_ARM_APCS26 ? PER_LINUX : PER_LINUX_32BIT))
-
-#else
-
 /*
- * All iWMMXt capable CPUs don't support 26-bit mode.  Yet they can run
- * legacy binaries which used to contain FPA11 floating point instructions
- * that have always been emulated by the kernel.  PFA11 and iWMMXt overlap
- * on coprocessor 1 space though.  We therefore must decide if given task
- * is allowed to use CP 0 and 1 for iWMMXt, or if they should be blocked
- * at all times for the prefetch exception handler to catch FPA11 opcodes
- * and emulate them.  The best indication to discriminate those two cases
- * is the SOFT_FLOAT flag in the ELF header.
+ * Since the FPA coprocessor uses CP1 and CP2, and iWMMXt uses CP0
+ * and CP1, we only enable access to the iWMMXt coprocessor if the
+ * binary is EABI or softfloat (and thus, guaranteed not to use
+ * FPA instructions.)
  */
-
-#define SET_PERSONALITY(ex,ibcs2) \
-do { \
-	set_personality(PER_LINUX_32BIT); \
-	if (((ex).e_flags & EF_ARM_EABI_MASK) || \
-	    ((ex).e_flags & EF_ARM_SOFT_FLOAT)) \
-		set_thread_flag(TIF_USING_IWMMXT); \
-	else \
-		clear_thread_flag(TIF_USING_IWMMXT); \
-} while (0)
-
-#endif
+#define SET_PERSONALITY(ex, ibcs2)					\
+	do {								\
+		if ((ex).e_flags & EF_ARM_APCS26) {			\
+			set_personality(PER_LINUX);			\
+		} else {						\
+			set_personality(PER_LINUX_32BIT);		\
+			if (elf_hwcap & HWCAP_IWMMXT && (ex).e_flags & (EF_ARM_EABI_MASK | EF_ARM_SOFT_FLOAT)) \
+				set_thread_flag(TIF_USING_IWMMXT);	\
+			else						\
+				clear_thread_flag(TIF_USING_IWMMXT);	\
+		}							\
+	} while (0)
 
 #endif