MIPS: Fold the TLB refill at the vmalloc path if possible.

Try to fold the 64-bit TLB refill handler opportunistically at the
beginning of the vmalloc path so as to avoid splitting execution flow in
half and wasting cycles for a branch required at that point then.  Resort
to doing the split if either of the newly created parts would not fit into
its designated slot.

Original-patch-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 678e633..d9a18b2 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -6,7 +6,7 @@
  * Synthesize TLB refill handlers at runtime.
  *
  * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer
- * Copyright (C) 2005, 2007  Maciej W. Rozycki
+ * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
  * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
  *
  * ... and the days got worse and worse and now you see
@@ -19,6 +19,7 @@
  * (Condolences to Napoleon XIV)
  */
 
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -732,36 +733,60 @@
 		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
 		final_len = p - tlb_handler;
 	} else {
-		/*
-		 * Split two instructions before the end.  One for the
-		 * branch and one for the instruction in the delay
-		 * slot.
-		 */
-		u32 *split = tlb_handler + MIPS64_REFILL_INSNS - 2;
+#ifdef MODULE_START
+		const enum label_id ls = label_module_alloc;
+#else
+		const enum label_id ls = label_vmalloc;
+#endif
+		u32 *split;
+		int ov = 0;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++)
+			;
+		BUG_ON(i == ARRAY_SIZE(labels));
+		split = labels[i].addr;
 
 		/*
-		 * Find the split point.  If the branch would fall in
-		 * a delay slot, we must back up an additional
-		 * instruction so that it is no longer in a delay
-		 * slot.
+		 * See if we have overflown one way or the other.
 		 */
-		if (uasm_insn_has_bdelay(relocs, split - 1))
-			split--;
+		if (split > tlb_handler + MIPS64_REFILL_INSNS ||
+		    split < p - MIPS64_REFILL_INSNS)
+			ov = 1;
 
+		if (ov) {
+			/*
+			 * Split two instructions before the end.  One
+			 * for the branch and one for the instruction
+			 * in the delay slot.
+			 */
+			split = tlb_handler + MIPS64_REFILL_INSNS - 2;
+
+			/*
+			 * If the branch would fall in a delay slot,
+			 * we must back up an additional instruction
+			 * so that it is no longer in a delay slot.
+			 */
+			if (uasm_insn_has_bdelay(relocs, split - 1))
+				split--;
+		}
 		/* Copy first part of the handler. */
 		uasm_copy_handler(relocs, labels, tlb_handler, split, f);
 		f += split - tlb_handler;
 
-		/* Insert branch. */
-		uasm_l_split(&l, final_handler);
-		uasm_il_b(&f, &r, label_split);
-		if (uasm_insn_has_bdelay(relocs, split))
-			uasm_i_nop(&f);
-		else {
-			uasm_copy_handler(relocs, labels, split, split + 1, f);
-			uasm_move_labels(labels, f, f + 1, -1);
-			f++;
-			split++;
+		if (ov) {
+			/* Insert branch. */
+			uasm_l_split(&l, final_handler);
+			uasm_il_b(&f, &r, label_split);
+			if (uasm_insn_has_bdelay(relocs, split))
+				uasm_i_nop(&f);
+			else {
+				uasm_copy_handler(relocs, labels,
+						  split, split + 1, f);
+				uasm_move_labels(labels, f, f + 1, -1);
+				f++;
+				split++;
+			}
 		}
 
 		/* Copy the rest of the handler. */