Hexagon: add support for new v4+ registers

Add support for a couple new v4+ registers, along with
newer save/restore pt_regs.

Signed-off-by: Richard Kuo <rkuo@codeaurora.org>
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
index 425e50c..ffe4a42 100644
--- a/arch/hexagon/kernel/vm_entry.S
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -45,48 +45,88 @@
  * number in the case where we decode a system call (trap0(#1)).
  */
 
+#if CONFIG_HEXAGON_ARCH_VERSION < 4
 #define save_pt_regs()\
-	memd(R0 + #_PT_R3130) = R31:30; \
+ memd(R0 + #_PT_R3130) = R31:30; \
+ { memw(R0 + #_PT_R2928) = R28; \
+   R31 = memw(R0 + #_PT_ER_VMPSP); }\
+ { memw(R0 + #(_PT_R2928 + 4)) = R31; \
+   R31 = ugp; } \
+ { memd(R0 + #_PT_R2726) = R27:26; \
+   R30 = gp ; } \
+ memd(R0 + #_PT_R2524) = R25:24; \
+ memd(R0 + #_PT_R2322) = R23:22; \
+ memd(R0 + #_PT_R2120) = R21:20; \
+ memd(R0 + #_PT_R1918) = R19:18; \
+ memd(R0 + #_PT_R1716) = R17:16; \
+ memd(R0 + #_PT_R1514) = R15:14; \
+ memd(R0 + #_PT_R1312) = R13:12; \
+ { memd(R0 + #_PT_R1110) = R11:10; \
+   R15 = lc0; } \
+ { memd(R0 + #_PT_R0908) = R9:8; \
+   R14 = sa0; } \
+ { memd(R0 + #_PT_R0706) = R7:6; \
+   R13 = lc1; } \
+ { memd(R0 + #_PT_R0504) = R5:4; \
+   R12 = sa1; } \
+ { memd(R0 + #_PT_GPUGP) = R31:30; \
+   R11 = m1; \
+   R2.H = #HI(_THREAD_SIZE); } \
+ { memd(R0 + #_PT_LC0SA0) = R15:14; \
+   R10 = m0; \
+   R2.L = #LO(_THREAD_SIZE); } \
+ { memd(R0 + #_PT_LC1SA1) = R13:12; \
+   R15 = p3:0; \
+   R2 = neg(R2); } \
+ { memd(R0 + #_PT_M1M0) = R11:10; \
+   R14  = usr; \
+   R2 = and(R0,R2); } \
+ { memd(R0 + #_PT_PREDSUSR) =  R15:14; \
+   THREADINFO_REG = R2; } \
+ { r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \
+   memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \
+   R2 = #-1; } \
+ { memw(R0 + #_PT_SYSCALL_NR) = R2; \
+   R30 = #0; }
+#else
+/* V4+ */
+/* the # ## # syntax inserts a literal ## */
+#define save_pt_regs()\
+	{ memd(R0 + #_PT_R3130) = R31:30; \
+		R30 = memw(R0 + #_PT_ER_VMPSP); }\
 	{ memw(R0 + #_PT_R2928) = R28; \
-	  R31 = memw(R0 + #_PT_ER_VMPSP); }\
-	{ memw(R0 + #(_PT_R2928 + 4)) = R31; \
-	  R31 = ugp; } \
-	{ memd(R0 + #_PT_R2726) = R27:26; \
-	  R30 = gp ; } \
-	memd(R0 + #_PT_R2524) = R25:24; \
-	memd(R0 + #_PT_R2322) = R23:22; \
-	memd(R0 + #_PT_R2120) = R21:20; \
-	memd(R0 + #_PT_R1918) = R19:18; \
-	memd(R0 + #_PT_R1716) = R17:16; \
-	memd(R0 + #_PT_R1514) = R15:14; \
-	memd(R0 + #_PT_R1312) = R13:12; \
+		memw(R0 + #(_PT_R2928 + 4)) = R30; }\
+	{ R31:30 = C11:10; \
+		memd(R0 + #_PT_R2726) = R27:26; \
+		memd(R0 + #_PT_R2524) = R25:24; }\
+	{ memd(R0 + #_PT_R2322) = R23:22; \
+		memd(R0 + #_PT_R2120) = R21:20; }\
+	{ memd(R0 + #_PT_R1918) = R19:18; \
+		memd(R0 + #_PT_R1716) = R17:16; }\
+	{ memd(R0 + #_PT_R1514) = R15:14; \
+		memd(R0 + #_PT_R1312) = R13:12; \
+		R17:16 = C13:12; }\
 	{ memd(R0 + #_PT_R1110) = R11:10; \
-	  R15 = lc0; } \
-	{ memd(R0 + #_PT_R0908) = R9:8; \
-	  R14 = sa0; } \
+		memd(R0 + #_PT_R0908) = R9:8; \
+	  R15:14 = C1:0; } \
 	{ memd(R0 + #_PT_R0706) = R7:6; \
-	  R13 = lc1; } \
-	{ memd(R0 + #_PT_R0504) = R5:4; \
-	  R12 = sa1; } \
-	{ memd(R0 + #_PT_UGPGP) = R31:30; \
-	  R11 = m1; \
-	  R2.H = #HI(_THREAD_SIZE); } \
-	{ memd(R0 + #_PT_LC0SA0) = R15:14; \
-	  R10 = m0; \
-	  R2.L = #LO(_THREAD_SIZE); } \
-	{ memd(R0 + #_PT_LC1SA1) = R13:12; \
-	  R15 = p3:0; \
-	  R2 = neg(R2); } \
+		memd(R0 + #_PT_R0504) = R5:4; \
+    R13:12 = C3:2; } \
+	{ memd(R0 + #_PT_GPUGP) = R31:30; \
+		memd(R0 + #_PT_LC0SA0) = R15:14; \
+	  R11:10 = C7:6; }\
+	{	THREADINFO_REG = and(R0, # ## #-_THREAD_SIZE); \
+		memd(R0 + #_PT_LC1SA1) = R13:12; \
+	  R15 = p3:0; }\
 	{ memd(R0 + #_PT_M1M0) = R11:10; \
-	  R14  = usr; \
-	  R2 = and(R0,R2); } \
-	{ memd(R0 + #_PT_PREDSUSR) =  R15:14; \
-	  THREADINFO_REG = R2; } \
+		memw(R0 + #_PT_PREDSUSR + 4) =  R15; }\
 	{ r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \
 	  memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \
 	  R2 = #-1; } \
 	{ memw(R0 + #_PT_SYSCALL_NR) = R2; \
+		memd(R0 + #_PT_CS1CS0) = R17:16; \
 	  R30 = #0; }
+#endif
 
 /*
  * Restore registers and thread_info.regs state. THREADINFO_REG
@@ -94,6 +134,7 @@
  * preserved. Don't restore R29 (SP) until later.
  */
 
+#if CONFIG_HEXAGON_ARCH_VERSION < 4
 #define restore_pt_regs() \
 	{ memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \
 	  R15:14 = memd(R0 + #_PT_PREDSUSR); } \
@@ -121,11 +162,44 @@
 	  R23:22 = memd(R0 + #_PT_R2322); } \
 	{ R25:24 = memd(R0 + #_PT_R2524); \
 	  R27:26 = memd(R0 + #_PT_R2726); } \
-	R31:30 = memd(R0 + #_PT_UGPGP); \
+	R31:30 = memd(R0 + #_PT_GPUGP); \
 	{ R28 = memw(R0 + #_PT_R2928); \
 	  ugp = R31; } \
 	{ R31:30 = memd(R0 + #_PT_R3130); \
 	  gp = R30; }
+#else
+/* V4+ */
+#define restore_pt_regs() \
+	{ memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \
+	  R15:14 = memd(R0 + #_PT_PREDSUSR); } \
+	{ R11:10 = memd(R0 + #_PT_M1M0); \
+		R13:12 = memd(R0 + #_PT_LC1SA1); \
+		p3:0 = R15; } \
+	{ R15:14 = memd(R0 + #_PT_LC0SA0); \
+		R3:2 = memd(R0 + #_PT_R0302); \
+		usr = R14; } \
+	{ R5:4 = memd(R0 + #_PT_R0504); \
+		R7:6 = memd(R0 + #_PT_R0706); \
+		C7:6 = R11:10; }\
+	{ R9:8 = memd(R0 + #_PT_R0908); \
+		R11:10 = memd(R0 + #_PT_R1110); \
+    C3:2 = R13:12; }\
+	{ R13:12 = memd(R0 + #_PT_R1312); \
+	  R15:14 = memd(R0 + #_PT_R1514); \
+		C1:0 = R15:14; }\
+	{ R17:16 = memd(R0 + #_PT_R1716); \
+	  R19:18 = memd(R0 + #_PT_R1918); } \
+	{ R21:20 = memd(R0 + #_PT_R2120); \
+	  R23:22 = memd(R0 + #_PT_R2322); } \
+	{ R25:24 = memd(R0 + #_PT_R2524); \
+	  R27:26 = memd(R0 + #_PT_R2726); } \
+	R31:30 = memd(R0 + #_PT_CS1CS0); \
+	{ C13:12 = R31:30; \
+		R31:30 = memd(R0 + #_PT_GPUGP) ; \
+		R28 = memw(R0 + #_PT_R2928); }\
+	{ C11:10 = R31:30; \
+		R31:30 = memd(R0 + #_PT_R3130); }
+#endif
 
 	/*
 	 * Clears off enough space for the rest of pt_regs; evrec is a part
@@ -139,6 +213,7 @@
  * Need to save off R0, R1, R2, R3 immediately.
  */
 
+#if CONFIG_HEXAGON_ARCH_VERSION < 4
 #define	vm_event_entry(CHandler) \
 	{ \
 		R29 = add(R29, #-(_PT_REGS_SIZE)); \
@@ -158,6 +233,34 @@
 		R1.H = #HI(CHandler); \
 		jump event_dispatch; \
 	}
+#else
+/* V4+ */
+/* turn on I$ prefetch early */
+/* the # ## # syntax inserts a literal ## */
+#define	vm_event_entry(CHandler) \
+	{ \
+		R29 = add(R29, #-(_PT_REGS_SIZE)); \
+		memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \
+		memd(R29 + #(_PT_R0302 + -_PT_REGS_SIZE)) = R3:2; \
+		R0 = usr; \
+	} \
+	{ \
+		memw(R29 + #_PT_PREDSUSR) = R0; \
+		R0 = setbit(R0, #16); \
+	} \
+	usr = R0; \
+	R1:0 = G1:0; \
+	{ \
+		memd(R29 + #_PT_ER_VMEL) = R1:0; \
+		R1 = # ## #(CHandler); \
+		R3:2 = G3:2; \
+	} \
+	{ \
+		R0 = R29; \
+		memd(R29 + #_PT_ER_VMPSP) = R3:2; \
+		jump event_dispatch; \
+	}
+#endif
 
 .text
 	/*
@@ -184,8 +287,10 @@
 
 	/*  "Nested control path" -- if the previous mode was kernel  */
 	R0 = memw(R29 + #_PT_ER_VMEST);
-	P0 = tstbit(R0, #HVM_VMEST_UM_SFT);
-	if !P0 jump restore_all;
+	{
+		P0 = tstbit(R0, #HVM_VMEST_UM_SFT);
+		if (!P0.new) jump:nt restore_all;
+	}
 	/*
 	 * Returning from system call, normally coming back from user mode
 	 */
@@ -198,11 +303,18 @@
 	 * Coming back from the C-world, our thread info pointer
 	 * should be in the designated register (usually R19)
 	 */
+#if CONFIG_HEXAGON_ARCH_VERSION < 4
 	R1.L = #LO(_TIF_ALLWORK_MASK)
 	{
 		R1.H = #HI(_TIF_ALLWORK_MASK);
 		R0 = memw(THREADINFO_REG + #_THREAD_INFO_FLAGS);
 	}
+#else
+	{
+		R1 = ##_TIF_ALLWORK_MASK;
+		R0 = memw(THREADINFO_REG + #_THREAD_INFO_FLAGS);
+	}
+#endif
 
 	/*
 	 * Compare against the "return to userspace" _TIF_WORK_MASK
@@ -222,10 +334,14 @@
 work_notifysig:
 	/*  this is the part that's kind of fuzzy.  */
 	R1 = and(R0, #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME));
-	P0 = cmp.eq(R1, #0);
-	if P0 jump restore_all
-	R1 = R0; 	/* unsigned long thread_info_flags */
-	R0 = R29;	/* regs should still be at top of stack  */
+	{
+		P0 = cmp.eq(R1, #0);
+		if P0.new jump:t restore_all;
+	}
+	{
+		R1 = R0; 	/* unsigned long thread_info_flags */
+		R0 = R29;	/* regs should still be at top of stack  */
+	}
 	call do_notify_resume
 
 restore_all:
@@ -235,14 +351,23 @@
 
 	/*  do the setregs here for VM 0.5  */
 	/*  R29 here should already be pointing at pt_regs  */
-	R1:0 = memd(R29 + #_PT_ER_VMEL);
-	R3:2 = memd(R29 + #_PT_ER_VMPSP);
+	{
+		R1:0 = memd(R29 + #_PT_ER_VMEL);
+		R3:2 = memd(R29 + #_PT_ER_VMPSP);
+	}
+#if CONFIG_HEXAGON_ARCH_VERSION < 4
 	trap1(#HVM_TRAP1_VMSETREGS);
+#else
+	G1:0 = R1:0;
+	G3:2 = R3:2;
+#endif
 
 	R0 = R29
 	restore_pt_regs()
-	R1:0 = memd(R29 + #_PT_R0100);
-	R29 = add(R29, #_PT_REGS_SIZE);
+	{
+		R1:0 = memd(R29 + #_PT_R0100);
+		R29 = add(R29, #_PT_REGS_SIZE);
+	}
 	trap1(#HVM_TRAP1_VMRTE)
 	/* Notreached */