diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 6aed974276d8f2cf9f337e73b04cd8479e99bab8..34f7222c5efe0405af96d09f1c691405109ab810 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -12,17 +12,17 @@ config NDS32
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_NONCOHERENT_OPS
-	select GENERIC_ASHLDI3
-	select GENERIC_ASHRDI3
-	select GENERIC_LSHRDI3
-	select GENERIC_CMPDI2
-	select GENERIC_MULDI3
-	select GENERIC_UCMPDI2
 	select GENERIC_ATOMIC64
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_SHOW
+	select GENERIC_LIB_ASHLDI3
+	select GENERIC_LIB_ASHRDI3
+	select GENERIC_LIB_CMPDI2
+	select GENERIC_LIB_LSHRDI3
+	select GENERIC_LIB_MULDI3
+	select GENERIC_LIB_UCMPDI2
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 513bb2e9baf9fa84615a8d03ce9ec57fb7f55849..031c676821ff8797a879c6af56aeffd9ea4c4ed3 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -34,10 +34,12 @@ ifdef CONFIG_CPU_LITTLE_ENDIAN
 KBUILD_CFLAGS   += $(call cc-option, -EL)
 KBUILD_AFLAGS   += $(call cc-option, -EL)
 LDFLAGS         += $(call cc-option, -EL)
+CHECKFLAGS      += -D__NDS32_EL__
 else
 KBUILD_CFLAGS   += $(call cc-option, -EB)
 KBUILD_AFLAGS   += $(call cc-option, -EB)
 LDFLAGS         += $(call cc-option, -EB)
+CHECKFLAGS      += -D__NDS32_EB__
 endif
 
 boot := arch/nds32/boot
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 10b48f0d8e857fe9ae3ec6d35bd4a6004c2b4aba..8b26198d51bb78b60a28748248e1e50f89be52ab 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -8,6 +8,8 @@
 
 #define PG_dcache_dirty PG_arch_1
 
+void flush_icache_range(unsigned long start, unsigned long end);
+void flush_icache_page(struct vm_area_struct *vma, struct page *page);
 #ifdef CONFIG_CPU_CACHE_ALIASING
 void flush_cache_mm(struct mm_struct *mm);
 void flush_cache_dup_mm(struct mm_struct *mm);
@@ -34,13 +36,16 @@ void flush_anon_page(struct vm_area_struct *vma,
 void flush_kernel_dcache_page(struct page *page);
 void flush_kernel_vmap_range(void *addr, int size);
 void invalidate_kernel_vmap_range(void *addr, int size);
-void flush_icache_range(unsigned long start, unsigned long end);
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
 #define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
 #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
 
 #else
 #include <asm-generic/cacheflush.h>
+#undef flush_icache_range
+#undef flush_icache_page
+#undef flush_icache_user_range
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+	                     unsigned long addr, int len);
 #endif
 
 #endif /* __NDS32_CACHEFLUSH_H__ */
diff --git a/arch/nds32/include/asm/futex.h b/arch/nds32/include/asm/futex.h
index eab5e84bd9919eaa7503eef129c3cd8072b9a61c..cb6cb91cfdf81622dc170286d83803e2d4e7ad73 100644
--- a/arch/nds32/include/asm/futex.h
+++ b/arch/nds32/include/asm/futex.h
@@ -16,7 +16,7 @@
 	"	.popsection\n"					\
 	"	.pushsection .fixup,\"ax\"\n"			\
 	"4:	move	%0, " err_reg "\n"			\
-	"	j	3b\n"					\
+	"	b	3b\n"					\
 	"	.popsection"
 
 #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 2f5b2ccebe47166a9863468960f145e9601a9bf4..63a1a5ef5219f47bcd9797e543298056294da22f 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -278,7 +278,8 @@ static void __init setup_memory(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-	early_init_devtree( __dtb_start);
+	early_init_devtree(__atags_pointer ? \
+		phys_to_virt(__atags_pointer) : __dtb_start);
 
 	setup_cpuinfo();
 
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index ce8fd34497bf045beafa845d2df1500b00281a4c..254703653b6f5db430af4f4ae01cca63aa67049a 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -13,7 +13,39 @@
 
 extern struct cache_info L1_cache_info[2];
 
-#ifndef CONFIG_CPU_CACHE_ALIASING
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long line_size, flags;
+	line_size = L1_cache_info[DCACHE].line_size;
+	start = start & ~(line_size - 1);
+	end = (end + line_size - 1) & ~(line_size - 1);
+	local_irq_save(flags);
+	cpu_cache_wbinval_range(start, end, 1);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	unsigned long flags;
+	unsigned long kaddr;
+	local_irq_save(flags);
+	kaddr = (unsigned long)kmap_atomic(page);
+	cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC);
+	kunmap_atomic((void *)kaddr);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(flush_icache_page);
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+	                     unsigned long addr, int len)
+{
+	unsigned long kaddr;
+	kaddr = (unsigned long)kmap_atomic(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(kaddr, kaddr + len);
+	kunmap_atomic((void *)kaddr);
+}
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 		      pte_t * pte)
 {
@@ -35,19 +67,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 
 	if ((test_and_clear_bit(PG_dcache_dirty, &page->flags)) ||
 	    (vma->vm_flags & VM_EXEC)) {
-
-		if (!PageHighMem(page)) {
-			cpu_cache_wbinval_page((unsigned long)
-					       page_address(page),
-					       vma->vm_flags & VM_EXEC);
-		} else {
-			unsigned long kaddr = (unsigned long)kmap_atomic(page);
-			cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC);
-			kunmap_atomic((void *)kaddr);
-		}
+		unsigned long kaddr;
+		local_irq_save(flags);
+		kaddr = (unsigned long)kmap_atomic(page);
+		cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC);
+		kunmap_atomic((void *)kaddr);
+		local_irq_restore(flags);
 	}
 }
-#else
+#ifdef CONFIG_CPU_CACHE_ALIASING
 extern pte_t va_present(struct mm_struct *mm, unsigned long addr);
 
 static inline unsigned long aliasing(unsigned long addr, unsigned long page)
@@ -317,52 +345,4 @@ void invalidate_kernel_vmap_range(void *addr, int size)
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
-
-void flush_icache_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size, flags;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & ~(line_size - 1);
-	end = (end + line_size - 1) & ~(line_size - 1);
-	local_irq_save(flags);
-	cpu_cache_wbinval_range(start, end, 1);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	cpu_cache_wbinval_page((unsigned long)page_address(page),
-			       vma->vm_flags & VM_EXEC);
-	local_irq_restore(flags);
-}
-
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-		      pte_t * pte)
-{
-	struct page *page;
-	unsigned long flags;
-	unsigned long pfn = pte_pfn(*pte);
-
-	if (!pfn_valid(pfn))
-		return;
-
-	if (vma->vm_mm == current->active_mm) {
-		local_irq_save(flags);
-		__nds32__mtsr_dsb(addr, NDS32_SR_TLB_VPN);
-		__nds32__tlbop_rwr(*pte);
-		__nds32__isb();
-		local_irq_restore(flags);
-	}
-
-	page = pfn_to_page(pfn);
-	if (test_and_clear_bit(PG_dcache_dirty, &page->flags) ||
-	    (vma->vm_flags & VM_EXEC)) {
-		local_irq_save(flags);
-		cpu_dcache_wbinval_page((unsigned long)page_address(page));
-		local_irq_restore(flags);
-	}
-}
 #endif