powerpc: Don't use thread struct for saving SRR0/1 on syscall.

CR0 can be saved later, and CTR can also be used for saving.

Keep SRR1 in r9 and stash SRR0 in CTR, this avoids using thread_struct
in memory for that.

Saves 3 cycles (ie 1%) in null_syscall selftest on 8xx.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94c3bc03bac9431fec2dadb686384c481889422.1580470483.git.christophe.leroy@c-s.fr
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 0e7bf28..4a1faed 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -114,28 +114,23 @@
 	mfspr	r9, SPRN_SRR1
 #ifdef CONFIG_VMAP_STACK
 	mfspr	r11, SPRN_SRR0
-	stw	r11, SRR0(r12)
-	stw	r9, SRR1(r12)
+	mtctr	r11
 #endif
-	mfcr	r10
 	andi.	r11, r9, MSR_PR
 	lwz	r11,TASK_STACK-THREAD(r12)
 	beq-	99f
-	rlwinm	r10,r10,0,4,2	/* Clear SO bit in CR */
 	addi	r11, r11, THREAD_SIZE - INT_FRAME_SIZE
 #ifdef CONFIG_VMAP_STACK
-	li	r9, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
-	mtmsr	r9
+	li	r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+	mtmsr	r10
 	isync
 #endif
 	tovirt_vmstack r12, r12
 	tophys_novmstack r11, r11
-	stw	r10,_CCR(r11)		/* save registers */
 	mflr	r10
 	stw	r10, _LINK(r11)
 #ifdef CONFIG_VMAP_STACK
-	lwz	r10, SRR0(r12)
-	lwz	r9, SRR1(r12)
+	mfctr	r10
 #else
 	mfspr	r10,SPRN_SRR0
 #endif
@@ -143,6 +138,9 @@
 	stw	r1,0(r11)
 	tovirt_novmstack r1, r11	/* set new kernel sp */
 	stw	r10,_NIP(r11)
+	mfcr	r10
+	rlwinm	r10,r10,0,4,2	/* Clear SO bit in CR */
+	stw	r10,_CCR(r11)		/* save registers */
 #ifdef CONFIG_40x
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
 #else