[SPARC64]: Add explicit register args to trap state loading macros.

This, as well as making the code cleaner, allows a simplification in
the TSB miss handling path.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index b3511ff..4ca3ea0 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -50,7 +50,7 @@
 	add		%g0, %g0, %g0
 	ba,a,pt		%xcc, rtrap_clr_l6
 
-1:	TRAP_LOAD_THREAD_REG
+1:	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldub		[%g6 + TI_FPSAVED], %g5
 	wr		%g0, FPRS_FEF, %fprs
 	andcc		%g5, FPRS_FEF, %g0
@@ -190,7 +190,7 @@
 	.globl		do_fpother_check_fitos
 	.align		32
 do_fpother_check_fitos:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	sethi		%hi(fp_other_bounce - 4), %g7
 	or		%g7, %lo(fp_other_bounce - 4), %g7
 
@@ -378,7 +378,7 @@
 	sllx		%g2, %g4, %g2
 	sllx		%g4, 2, %g4
 
-	TRAP_LOAD_IRQ_WORK
+	TRAP_LOAD_IRQ_WORK(%g6, %g1)
 
 	lduw		[%g6 + %g4], %g5	/* g5 = irq_work(cpu, pil) */
 	stw		%g5, [%g3 + 0x00]	/* bucket->irq_chain = g5 */
@@ -422,7 +422,7 @@
 
 	.globl		utrap_trap
 utrap_trap:		/* %g3=handler,%g4=level */
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx		[%g6 + TI_UTRAPS], %g1
 	brnz,pt		%g1, invoke_utrap
 	 nop
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index d974d18..b5f6bc5 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -31,7 +31,7 @@
 		.globl	etrap, etrap_irq, etraptl1
 etrap:		rdpr	%pil, %g2
 etrap_irq:
-		TRAP_LOAD_THREAD_REG
+		TRAP_LOAD_THREAD_REG(%g6, %g1)
 		rdpr	%tstate, %g1
 		sllx	%g2, 20, %g3
 		andcc	%g1, TSTATE_PRIV, %g0
@@ -100,7 +100,7 @@
 		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
 		wrpr	%g0, ETRAP_PSTATE2, %pstate
 		mov	%l6, %g6
-		LOAD_PER_CPU_BASE(%g4, %g3, %l1)
+		LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
 		jmpl	%l2 + 0x4, %g0
 		 ldx	[%g6 + TI_TASK], %g4
 
@@ -124,7 +124,7 @@
 		 *	0x58	TL4's TT
 		 *	0x60	TL
 		 */
-		TRAP_LOAD_THREAD_REG
+		TRAP_LOAD_THREAD_REG(%g6, %g1)
 		sub	%sp, ((4 * 8) * 4) + 8, %g2
 		rdpr	%tl, %g1
 
@@ -179,7 +179,7 @@
 		.align	64
 		.globl	scetrap
 scetrap:
-		TRAP_LOAD_THREAD_REG
+		TRAP_LOAD_THREAD_REG(%g6, %g1)
 		rdpr	%pil, %g2
 		rdpr	%tstate, %g1
 		sllx	%g2, 20, %g3
@@ -250,7 +250,7 @@
 		stx	%i6, [%sp + PTREGS_OFF + PT_V9_I6]
 		mov	%l6, %g6
 		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
-		LOAD_PER_CPU_BASE(%g4, %g3, %l1)
+		LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
 		ldx	[%g6 + TI_TASK], %g4
 		done
 
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index 64bc036..61bd45e 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -226,7 +226,7 @@
 		brz,pt			%l3, 1f
 		 nop
 		/* Must do this before thread reg is clobbered below.  */
-		LOAD_PER_CPU_BASE(%i0, %i1, %i2)
+		LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2)
 1:
 		ldx			[%sp + PTREGS_OFF + PT_V9_G6], %g6
 		ldx			[%sp + PTREGS_OFF + PT_V9_G7], %g7
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index ff6a79b..28e38b1 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -36,14 +36,7 @@
 	 nop
 
 tsb_miss_page_table_walk:
-	/* This clobbers %g1 and %g6, preserve them... */
-	mov		%g1, %g5
-	mov		%g6, %g2
-
-	TRAP_LOAD_PGD_PHYS
-
-	mov		%g2, %g6
-	mov		%g5, %g1
+	TRAP_LOAD_PGD_PHYS(%g7, %g5)
 
 	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
 
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index 320a762..211021a 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -40,7 +40,7 @@
 	 */
 	.globl	fill_fixup, spill_fixup
 fill_fixup:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	rdpr		%tstate, %g1
 	andcc		%g1, TSTATE_PRIV, %g0
 	or		%g4, FAULT_CODE_WINFIXUP, %g4
@@ -86,7 +86,7 @@
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6
 	ldx		[%g6 + TI_TASK], %g4
-	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
+	LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
 
 	/* This is the same as below, except we handle this a bit special
 	 * since we must preserve %l5 and %l6, see comment above.
@@ -105,7 +105,7 @@
 	 * do not touch %g7 or %g2 so we handle the two cases fine.
 	 */
 spill_fixup:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx		[%g6 + TI_FLAGS], %g1
 	andcc		%g1, _TIF_32BIT, %g0
 	ldub		[%g6 + TI_WSAVED], %g1
@@ -181,7 +181,7 @@
 	wrpr		%g3, %tnpc
 	done
 fill_fixup_mna:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	rdpr		%tstate, %g1
 	andcc		%g1, TSTATE_PRIV, %g0
 	be,pt		%xcc, window_mna_from_user_common
@@ -209,14 +209,14 @@
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6			! Get current back.
 	ldx		[%g6 + TI_TASK], %g4		! Finish it.
-	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
+	LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
 	call		mem_address_unaligned
 	 add		%sp, PTREGS_OFF, %o0
 
 	b,pt		%xcc, rtrap
 	 nop						! yes, the nop is correct
 spill_fixup_mna:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx		[%g6 + TI_FLAGS], %g1
 	andcc		%g1, _TIF_32BIT, %g0
 	ldub		[%g6 + TI_WSAVED], %g1
@@ -284,7 +284,7 @@
 	wrpr		%g3, %tnpc
 	done
 fill_fixup_dax:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	rdpr		%tstate, %g1
 	andcc		%g1, TSTATE_PRIV, %g0
 	be,pt		%xcc, window_dax_from_user_common
@@ -312,14 +312,14 @@
 	wrpr		%l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
 	mov		%o7, %g6			! Get current back.
 	ldx		[%g6 + TI_TASK], %g4		! Finish it.
-	LOAD_PER_CPU_BASE(%g1, %g2, %g3)
+	LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
 	call		spitfire_data_access_exception
 	 add		%sp, PTREGS_OFF, %o0
 
 	b,pt		%xcc, rtrap
 	 nop						! yes, the nop is correct
 spill_fixup_dax:
-	TRAP_LOAD_THREAD_REG
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
 	ldx		[%g6 + TI_FLAGS], %g1
 	andcc		%g1, _TIF_32BIT, %g0
 	ldub		[%g6 + TI_WSAVED], %g1
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index da54b4f..c15514f 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -107,67 +107,67 @@
 	lduwa		[REG] ASI_PHYS_BYPASS_EC_E, REG;\
 	.previous;
 
-/* Clobbers %g1, current address space PGD phys address into %g7.  */
-#define TRAP_LOAD_PGD_PHYS			\
-	__GET_CPUID(%g1)			\
-	sethi	%hi(trap_block), %g7;		\
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g1;	\
-	or	%g7, %lo(trap_block), %g7;	\
-	add	%g7, %g1, %g7;			\
-	ldx	[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	__GET_CPUID(TMP)			\
+	sethi	%hi(trap_block), DEST;		\
+	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
+	or	DEST, %lo(trap_block), DEST;	\
+	add	DEST, TMP, DEST;		\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
-/* Clobbers %g1, loads local processor's IRQ work area into %g6.  */
-#define TRAP_LOAD_IRQ_WORK			\
-	__GET_CPUID(%g1)			\
-	sethi	%hi(__irq_work), %g6;		\
-	sllx	%g1, 6, %g1;			\
-	or	%g6, %lo(__irq_work), %g6;	\
-	add	%g6, %g1, %g6;
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP)		\
+	__GET_CPUID(TMP)			\
+	sethi	%hi(__irq_work), DEST;		\
+	sllx	TMP, 6, TMP;			\
+	or	DEST, %lo(__irq_work), DEST;	\
+	add	DEST, TMP, DEST;
 
-/* Clobbers %g1, loads %g6 with current thread info pointer.  */
-#define TRAP_LOAD_THREAD_REG			\
-	__GET_CPUID(%g1)			\
-	sethi	%hi(trap_block), %g6;		\
-	sllx	%g1, TRAP_BLOCK_SZ_SHIFT, %g1;	\
-	or	%g6, %lo(trap_block), %g6;	\
-	ldx	[%g6 + %g1], %g6;
+/* Clobbers TMP, loads DEST with current thread info pointer.  */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	__GET_CPUID(TMP)			\
+	sethi	%hi(trap_block), DEST;		\
+	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
+	or	DEST, %lo(trap_block), DEST;	\
+	ldx	[DEST + TMP], DEST;
 
-/* Given the current thread info pointer in %g6, load the per-cpu
- * area base of the current processor into %g5.  REG1, REG2, and REG3 are
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST.  REG1, REG2, and REG3 are
  * clobbered.
  *
- * You absolutely cannot use %g5 as a temporary in this code.  The
+ * You absolutely cannot use DEST as a temporary in this code.  The
  * reason is that traps can happen during execution, and return from
- * trap will load the fully resolved %g5 per-cpu base.  This can corrupt
+ * trap will load the fully resolved DEST per-cpu base.  This can corrupt
  * the calculations done by the macro mid-stream.
  */
-#define LOAD_PER_CPU_BASE(REG1, REG2, REG3)		\
-	ldub	[%g6 + TI_CPU], REG1;			\
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
+	ldub	[THR + TI_CPU], REG1;			\
 	sethi	%hi(__per_cpu_shift), REG3;		\
 	sethi	%hi(__per_cpu_base), REG2;		\
 	ldx	[REG3 + %lo(__per_cpu_shift)], REG3;	\
 	ldx	[REG2 + %lo(__per_cpu_base)], REG2;	\
 	sllx	REG1, REG3, REG3;			\
-	add	REG3, REG2, %g5;
+	add	REG3, REG2, DEST;
 
 #else
 
 /* Uniprocessor versions, we know the cpuid is zero.  */
-#define TRAP_LOAD_PGD_PHYS			\
-	sethi	%hi(trap_block), %g7;		\
-	or	%g7, %lo(trap_block), %g7;	\
-	ldx	[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	or	DEST, %lo(trap_block), DEST;	\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
 
-#define TRAP_LOAD_IRQ_WORK			\
-	sethi	%hi(__irq_work), %g6;		\
-	or	%g6, %lo(__irq_work), %g6;
+#define TRAP_LOAD_IRQ_WORK(DEST, TMP)		\
+	sethi	%hi(__irq_work), DEST;		\
+	or	DEST, %lo(__irq_work), DEST;
 
-#define TRAP_LOAD_THREAD_REG			\
-	sethi	%hi(trap_block), %g6;		\
-	ldx	[%g6 + %lo(trap_block)], %g6;
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	ldx	[DEST + %lo(trap_block)], DEST;
 
-/* No per-cpu areas on uniprocessor, so no need to load %g5.  */
-#define LOAD_PER_CPU_BASE(REG1, REG2, REG3)
+/* No per-cpu areas on uniprocessor, so no need to load DEST.  */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
 
 #endif /* !(CONFIG_SMP) */