mmu_gather: move minimal range calculations into generic code

On architectures with hardware broadcasting of TLB invalidation messages
, it makes sense to reduce the range of the mmu_gather structure when
unmapping page ranges based on the dirty address information passed to
tlb_remove_tlb_entry.

arm64 already does this by directly manipulating the start/end fields
of the gather structure, but this confuses the generic code which
does not expect these fields to change and can end up calculating
invalid, negative ranges when forcing a flush in zap_pte_range.

This patch moves the minimal range calculation out of the arm64 code
and into the generic implementation, simplifying zap_pte_range in the
process (which no longer needs to care about start/end, since they will
point to the appropriate ranges already). With the range being tracked
by core code, the need_flush flag is dropped in favour of checking that
the end of the range has actually been set.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 5672d7e..0884805 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -96,10 +96,9 @@
 #endif
 	unsigned long		start;
 	unsigned long		end;
-	unsigned int		need_flush : 1,	/* Did free PTEs */
 	/* we are in the middle of an operation to clear
 	 * a full mm and can make some optimizations */
-				fullmm : 1,
+	unsigned int		fullmm : 1,
 	/* we have performed an operation which
 	 * requires a complete flush of the tlb */
 				need_flush_all : 1;
@@ -128,16 +127,54 @@
 		tlb_flush_mmu(tlb);
 }
 
+static inline void __tlb_adjust_range(struct mmu_gather *tlb,
+				      unsigned long address)
+{
+	tlb->start = min(tlb->start, address);
+	tlb->end = max(tlb->end, address + PAGE_SIZE);
+}
+
+static inline void __tlb_reset_range(struct mmu_gather *tlb)
+{
+	tlb->start = TASK_SIZE;
+	tlb->end = 0;
+}
+
+/*
+ * In the case of tlb vma handling, we can optimise these away in the
+ * case where we're doing a full MM flush.  When we're doing a munmap,
+ * the vmas are adjusted to only cover the region to be torn down.
+ */
+#ifndef tlb_start_vma
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#endif
+
+#define __tlb_end_vma(tlb, vma)					\
+	do {							\
+		if (!tlb->fullmm && tlb->end) {			\
+			tlb_flush(tlb);				\
+			__tlb_reset_range(tlb);			\
+		}						\
+	} while (0)
+
+#ifndef tlb_end_vma
+#define tlb_end_vma	__tlb_end_vma
+#endif
+
+#ifndef __tlb_remove_tlb_entry
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#endif
+
 /**
  * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
  *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate.   This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
+ * Record the fact that pte's were really unmapped by updating the range,
+ * so we can later optimise away the tlb invalidate.   This helps when
+ * userspace is unmapping already-unmapped pages, which happens quite a lot.
  */
 #define tlb_remove_tlb_entry(tlb, ptep, address)		\
 	do {							\
-		tlb->need_flush = 1;				\
+		__tlb_adjust_range(tlb, address);		\
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
@@ -151,27 +188,27 @@
 
 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)		\
 	do {							\
-		tlb->need_flush = 1;				\
+		__tlb_adjust_range(tlb, address);		\
 		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);	\
 	} while (0)
 
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
-		tlb->need_flush = 1;				\
+		__tlb_adjust_range(tlb, address);		\
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
 
 #ifndef __ARCH_HAS_4LEVEL_HACK
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
-		tlb->need_flush = 1;				\
+		__tlb_adjust_range(tlb, address);		\
 		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
 
 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
-		tlb->need_flush = 1;				\
+		__tlb_adjust_range(tlb, address);		\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)