xtensa: abstract 'entry' and 'retw' in assembly code

Provide abi_entry, abi_entry_default, abi_ret and abi_ret_default macros
that allocate aligned stack frame in windowed and call0 ABIs.
Provide XTENSA_SPILL_STACK_RESERVE macro that specifies required stack
frame size when register spilling is involved.
Replace all uses of 'entry' and 'retw' with the above macros.
This makes most of the xtensa assembly code ready for XEA3 and call0 ABI.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index c6e73b1..4cb9ca5 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -43,7 +43,7 @@
 	 * Experiments with Ethernet and SLIP connections show that buf
 	 * is aligned on either a 2-byte or 4-byte boundary.
 	 */
-	entry	sp, 32
+	abi_entry_default
 	extui	a5, a2, 0, 2
 	bnez	a5, 8f		/* branch if 2-byte aligned */
 	/* Fall-through on common case, 4-byte alignment */
@@ -107,7 +107,7 @@
 	ONES_ADD(a4, a6)
 7:
 	mov	a2, a4
-	retw
+	abi_ret_default
 
 	/* uncommon case, buf is 2-byte aligned */
 8:
@@ -195,7 +195,7 @@
 
 ENTRY(csum_partial_copy_generic)
 
-	entry	sp, 32
+	abi_entry_default
 	mov	a12, a3
 	mov	a11, a4
 	or	a10, a2, a3
@@ -316,7 +316,7 @@
 	ONES_ADD(a5, a9)
 8:
 	mov	a2, a5
-	retw
+	abi_ret_default
 
 5:
 	/* Control branch to here when either src or dst is odd.  We
@@ -383,12 +383,12 @@
 	blt	a12, a11, .Leloop
 #endif
 2:
-	retw
+	abi_ret_default
 
 11:
 	movi	a2, -EFAULT
 	s32i	a2, a7, 0	/* dst_err_ptr */
 	movi	a2, 0
-	retw
+	abi_ret_default
 
 .previous
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index efecfd7..582d817 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -79,7 +79,7 @@
 	bne	a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
 #endif /* !XCHAL_HAVE_LOOPS */
 .Lbytecopydone:
-	retw
+	abi_ret_default
 
 /*
  * Destination is unaligned
@@ -112,7 +112,7 @@
 ENTRY(__memcpy)
 WEAK(memcpy)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ dst, a3/ src, a4/ len
 	mov	a5, a2		# copy dst so that a2 is return value
 .Lcommon:
@@ -161,7 +161,7 @@
 	bbsi.l	a4, 2, .L3
 	bbsi.l	a4, 1, .L4
 	bbsi.l	a4, 0, .L5
-	retw
+	abi_ret_default
 .L3:
 	# copy 4 bytes
 	l32i	a6, a3,  0
@@ -170,7 +170,7 @@
 	addi	a5, a5,  4
 	bbsi.l	a4, 1, .L4
 	bbsi.l	a4, 0, .L5
-	retw
+	abi_ret_default
 .L4:
 	# copy 2 bytes
 	l16ui	a6, a3,  0
@@ -178,12 +178,12 @@
 	s16i	a6, a5,  0
 	addi	a5, a5,  2
 	bbsi.l	a4, 0, .L5
-	retw
+	abi_ret_default
 .L5:
 	# copy 1 byte
 	l8ui	a6, a3,  0
 	s8i	a6, a5,  0
-	retw
+	abi_ret_default
 
 /*
  * Destination is aligned, Source is unaligned
@@ -255,7 +255,7 @@
 #endif
 	bbsi.l	a4, 1, .L14
 	bbsi.l	a4, 0, .L15
-.Ldone:	retw
+.Ldone:	abi_ret_default
 .L14:
 	# copy 2 bytes
 	l8ui	a6, a3,  0
@@ -265,12 +265,12 @@
 	s8i	a7, a5,  1
 	addi	a5, a5,  2
 	bbsi.l	a4, 0, .L15
-	retw
+	abi_ret_default
 .L15:
 	# copy 1 byte
 	l8ui	a6, a3,  0
 	s8i	a6, a5,  0
-	retw
+	abi_ret_default
 
 ENDPROC(__memcpy)
 
@@ -280,7 +280,7 @@
 
 ENTRY(bcopy)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2=src, a3=dst, a4=len
 	mov	a5, a3
 	mov	a3, a2
@@ -346,7 +346,7 @@
 				       # $a3:src != $a7:src_start
 #endif /* !XCHAL_HAVE_LOOPS */
 .Lbackbytecopydone:
-	retw
+	abi_ret_default
 
 /*
  * Destination is unaligned
@@ -380,7 +380,7 @@
 ENTRY(__memmove)
 WEAK(memmove)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ dst, a3/ src, a4/ len
 	mov	a5, a2		# copy dst so that a2 is return value
 .Lmovecommon:
@@ -435,7 +435,7 @@
 	bbsi.l	a4, 2, .Lback3
 	bbsi.l	a4, 1, .Lback4
 	bbsi.l	a4, 0, .Lback5
-	retw
+	abi_ret_default
 .Lback3:
 	# copy 4 bytes
 	addi	a3, a3, -4
@@ -444,7 +444,7 @@
 	s32i	a6, a5,  0
 	bbsi.l	a4, 1, .Lback4
 	bbsi.l	a4, 0, .Lback5
-	retw
+	abi_ret_default
 .Lback4:
 	# copy 2 bytes
 	addi	a3, a3, -2
@@ -452,14 +452,14 @@
 	addi	a5, a5, -2
 	s16i	a6, a5,  0
 	bbsi.l	a4, 0, .Lback5
-	retw
+	abi_ret_default
 .Lback5:
 	# copy 1 byte
 	addi	a3, a3, -1
 	l8ui	a6, a3,  0
 	addi	a5, a5, -1
 	s8i	a6, a5,  0
-	retw
+	abi_ret_default
 
 /*
  * Destination is aligned, Source is unaligned
@@ -531,7 +531,7 @@
 	bbsi.l	a4, 1, .Lback14
 	bbsi.l	a4, 0, .Lback15
 .Lbackdone:
-	retw
+	abi_ret_default
 .Lback14:
 	# copy 2 bytes
 	addi	a3, a3, -2
@@ -541,13 +541,13 @@
 	s8i	a6, a5,  0
 	s8i	a7, a5,  1
 	bbsi.l	a4, 0, .Lback15
-	retw
+	abi_ret_default
 .Lback15:
 	# copy 1 byte
 	addi	a3, a3, -1
 	addi	a5, a5, -1
 	l8ui	a6, a3,  0
 	s8i	a6, a5,  0
-	retw
+	abi_ret_default
 
 ENDPROC(__memmove)
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 8632eac..59b1524 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -34,7 +34,7 @@
 ENTRY(__memset)
 WEAK(memset)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ dst, a3/ c, a4/ length
 	extui	a3, a3, 0, 8	# mask to just 8 bits
 	slli	a7, a3, 8	# duplicate character in all bytes of word
@@ -48,7 +48,7 @@
 	srli	a7, a4, 4	# number of loop iterations with 16B
 				# per iteration
 	bnez	a4, .Laligned
-	retw
+	abi_ret_default
 
 /*
  * Destination is word-aligned.
@@ -95,7 +95,7 @@
 EX(10f) s8i	a3, a5,  0
 .L5:
 .Lret1:
-	retw
+	abi_ret_default
 
 /*
  * Destination is unaligned
@@ -139,7 +139,7 @@
 	blt	a5, a6, .Lbyteloop
 #endif /* !XCHAL_HAVE_LOOPS */
 .Lbytesetdone:
-	retw
+	abi_ret_default
 
 ENDPROC(__memset)
 
@@ -150,4 +150,4 @@
 
 10:
 	movi	a2, 0
-	retw
+	abi_ret_default
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index c4c6c85..4faf46f 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -50,7 +50,7 @@
 .text
 ENTRY(__strncpy_user)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ dst, a3/ src, a4/ len
 	mov	a11, a2		# leave dst in return value register
 	beqz	a4, .Lret	# if len is zero
@@ -93,7 +93,7 @@
 	bnez	a4, .Lsrcaligned	# if len is nonzero
 .Lret:
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 
 /*
  * dst is word-aligned, src is word-aligned
@@ -148,14 +148,14 @@
 .Lz3:	# byte 3 is zero
 	addi	a11, a11, 3		# advance dst pointer
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 .Lz0:	# byte 0 is zero
 #ifdef __XTENSA_EB__
 	movi	a9, 0
 #endif /* __XTENSA_EB__ */
 EX(10f)	s8i	a9, a11, 0
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 .Lz1:	# byte 1 is zero
 #ifdef __XTENSA_EB__
 	extui   a9, a9, 16, 16
@@ -163,7 +163,7 @@
 EX(10f)	s16i	a9, a11, 0
 	addi	a11, a11, 1		# advance dst pointer
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 .Lz2:	# byte 2 is zero
 #ifdef __XTENSA_EB__
 	extui   a9, a9, 16, 16
@@ -173,7 +173,7 @@
 EX(10f)	s8i	a9, a11, 2
 	addi	a11, a11, 2		# advance dst pointer
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 
 	.align	4		# 1 mod 4 alignment for LOOPNEZ
 	.byte	0		# (0 mod 4 alignment for LBEG)
@@ -199,7 +199,7 @@
 
 .Lunalignedend:
 	sub	a2, a11, a2		# compute strlen
-	retw
+	abi_ret_default
 
 ENDPROC(__strncpy_user)
 
@@ -214,4 +214,4 @@
 10:
 11:
 	movi	a2, -EFAULT
-	retw
+	abi_ret_default
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 1f2ca2b..3d391dca 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -45,7 +45,7 @@
 .text
 ENTRY(__strnlen_user)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ s, a3/ len
 	addi	a4, a2, -4	# because we overincrement at the end;
 				# we compensate with load offsets of 4
@@ -96,7 +96,7 @@
 	addi	a4, a4, 1	# advance string pointer
 .L101:
 	sub	a2, a4, a2	# compute length
-	retw
+	abi_ret_default
 
 # NOTE that in several places below, we point to the byte just after
 # the zero byte in order to include the NULL terminator in the count.
@@ -106,15 +106,15 @@
 .Lz0:	# byte 0 is zero
 	addi	a4, a4, 1	# point just beyond zero byte
 	sub	a2, a4, a2	# subtract to get length
-	retw
+	abi_ret_default
 .Lz1:	# byte 1 is zero
 	addi	a4, a4, 1+1	# point just beyond zero byte
 	sub	a2, a4, a2	# subtract to get length
-	retw
+	abi_ret_default
 .Lz2:	# byte 2 is zero
 	addi	a4, a4, 2+1	# point just beyond zero byte
 	sub	a2, a4, a2	# subtract to get length
-	retw
+	abi_ret_default
 
 .L1mod2:	# address is odd
 EX(10f)	l8ui	a9, a4, 4		# get byte 0
@@ -130,7 +130,7 @@
 	# byte 3 is zero
 	addi	a4, a4, 3+1	# point just beyond zero byte
 	sub	a2, a4, a2	# subtract to get length
-	retw
+	abi_ret_default
 
 ENDPROC(__strnlen_user)
 
@@ -138,4 +138,4 @@
 	.align	4
 10:
 	movi	a2, 0
-	retw
+	abi_ret_default
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 228607e..a0aa404 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -60,7 +60,7 @@
 	.text
 ENTRY(__xtensa_copy_user)
 
-	entry	sp, 16		# minimal stack frame
+	abi_entry_default
 	# a2/ dst, a3/ src, a4/ len
 	mov	a5, a2		# copy dst so that a2 is return value
 	mov	a11, a4		# preserve original len for error case
@@ -75,7 +75,7 @@
 	__ssa8	a3		# set shift amount from byte offset
 	bnez	a4, .Lsrcunaligned
 	movi	a2, 0		# return success for len==0
-	retw
+	abi_ret_default
 
 /*
  * Destination is unaligned
@@ -127,7 +127,7 @@
 #endif /* !XCHAL_HAVE_LOOPS */
 .Lbytecopydone:
 	movi	a2, 0		# return success for len bytes copied
-	retw
+	abi_ret_default
 
 /*
  * Destination and source are word-aligned.
@@ -187,7 +187,7 @@
 EX(10f)	s8i	a6, a5,  0
 .L5:
 	movi	a2, 0		# return success for len bytes copied
-	retw
+	abi_ret_default
 
 /*
  * Destination is aligned, Source is unaligned
@@ -264,7 +264,7 @@
 EX(10f)	s8i	a6, a5,  0
 .L15:
 	movi	a2, 0		# return success for len bytes copied
-	retw
+	abi_ret_default
 
 ENDPROC(__xtensa_copy_user)
 
@@ -281,4 +281,4 @@
 10:
 	sub	a2, a5, a2	/* a2 <-- bytes copied */
 	sub	a2, a11, a2	/* a2 <-- bytes not copied */
-	retw
+	abi_ret_default