diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 857f1dfac794c5bc5ef63a01b840f3444d0c2888..321c83e43a1e7a2cc9d8d770417004a5c206be9a 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -36,6 +36,8 @@
 struct mmu_gather {
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
+	unsigned long		range_start;
+	unsigned long		range_end;
 };
 
 DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -63,7 +65,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 	put_cpu_var(mmu_gathers);
 }
 
-#define tlb_remove_tlb_entry(tlb,ptep,address)	do { } while (0)
+/*
+ * Memorize the range for the TLB flush.
+ */
+static inline void
+tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
+{
+	if (!tlb->fullmm) {
+		if (addr < tlb->range_start)
+			tlb->range_start = addr;
+		if (addr + PAGE_SIZE > tlb->range_end)
+			tlb->range_end = addr + PAGE_SIZE;
+	}
+}
 
 /*
  * In the case of tlb vma handling, we can optimise these away in the
@@ -73,15 +87,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 static inline void
 tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
-	if (!tlb->fullmm)
+	if (!tlb->fullmm) {
 		flush_cache_range(vma, vma->vm_start, vma->vm_end);
+		tlb->range_start = TASK_SIZE;
+		tlb->range_end = 0;
+	}
 }
 
 static inline void
 tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
-	if (!tlb->fullmm)
-		flush_tlb_range(vma, vma->vm_start, vma->vm_end);
+	if (!tlb->fullmm && tlb->range_end > 0)
+		flush_tlb_range(vma, tlb->range_start, tlb->range_end);
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)