diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9ed2377fe8e54b26fca9fc2ec6a9b6694fb17533..d0daeab2234e34cc4ac8b466a037a176e188e5b7 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -19,31 +19,21 @@
 
 #ifdef __KERNEL__
 
+/*
+ * On ARM, ordinary assignment (str instruction) doesn't clear the local
+ * strex/ldrex monitor on some implementations. The reason we can use it for
+ * atomic_set() is the clrex or dummy strex done on every exception return.
+ */
 #define atomic_read(v)	((v)->counter)
+#define atomic_set(v,i)	(((v)->counter) = (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
 /*
  * ARMv6 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
- * to ensure that the update happens.  Writing to 'v->counter'
- * without using the following operations WILL break the atomic
- * nature of these ops.
+ * to ensure that the update happens.
  */
-static inline void atomic_set(atomic_t *v, int i)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__("@ atomic_set\n"
-"1:	ldrex	%0, [%1]\n"
-"	strex	%0, %2, [%1]\n"
-"	teq	%0, #0\n"
-"	bne	1b"
-	: "=&r" (tmp)
-	: "r" (&v->counter), "r" (i)
-	: "cc");
-}
-
 static inline void atomic_add(int i, atomic_t *v)
 {
 	unsigned long tmp;
@@ -163,8 +153,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 #error SMP not supported on pre-ARMv6 CPUs
 #endif
 
-#define atomic_set(v,i)	(((v)->counter) = (i))
-
 static inline int atomic_add_return(int i, atomic_t *v)
 {
 	unsigned long flags;
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3d727a8a23bc8013749458c4a870da618ca7d772..a332bc7225bfab148af4a508233b109f8bf645a7 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -734,13 +734,6 @@ ENTRY(__switch_to)
 #ifdef CONFIG_MMU
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
-#if __LINUX_ARM_ARCH__ >= 6
-#ifdef CONFIG_CPU_32v6K
-	clrex
-#else
-	strex	r5, r4, [ip]			@ Clear exclusive monitor
-#endif
-#endif
 #if defined(CONFIG_HAS_TLS_REG)
 	mcr	p15, 0, r3, c13, c0, 3		@ set TLS register
 #elif !defined(CONFIG_TLS_REG_EMUL)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index a4eaf4f920c58fbaa8c5cb6be285a7c4f684a376..e17e3c30d957ca3a2c32bbc6c91ff626df14151c 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -76,13 +76,25 @@
 #ifndef CONFIG_THUMB2_KERNEL
 	.macro	svc_exit, rpsr
 	msr	spsr_cxsf, \rpsr
+#if defined(CONFIG_CPU_32v6K)
+	clrex					@ clear the exclusive monitor
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+#elif defined (CONFIG_CPU_V6)
+	ldr	r0, [sp]
+	strex	r1, r2, [sp]			@ clear the exclusive monitor
+	ldmib	sp, {r1 - pc}^			@ load r1 - pc, cpsr
+#endif
 	.endm
 
 	.macro	restore_user_regs, fast = 0, offset = 0
 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
 	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
 	msr	spsr_cxsf, r1			@ save in spsr_svc
+#if defined(CONFIG_CPU_32v6K)
+	clrex					@ clear the exclusive monitor
+#elif defined (CONFIG_CPU_V6)
+	strex	r1, r2, [sp]			@ clear the exclusive monitor
+#endif
 	.if	\fast
 	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
 	.else
@@ -98,6 +110,7 @@
 	.endm
 #else	/* CONFIG_THUMB2_KERNEL */
 	.macro	svc_exit, rpsr
+	clrex					@ clear the exclusive monitor
 	ldr	r0, [sp, #S_SP]			@ top of the stack
 	ldr	r1, [sp, #S_PC]			@ return address
 	tst	r0, #4				@ orig stack 8-byte aligned?
@@ -110,6 +123,7 @@
 	.endm
 
 	.macro	restore_user_regs, fast = 0, offset = 0
+	clrex					@ clear the exclusive monitor
 	mov	r2, sp
 	load_user_sp_lr r2, r3, \offset + S_SP	@ calling sp, lr
 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr