x86: atomic64 assembly improvements In the "xchg" implementation, %ebx and %ecx don't need to be copied into %eax and %edx respectively (this is only necessary when desiring to only read the stored value). In the "add_unless" implementation, swapping the use of %ecx and %esi for passing arguments allows %esi to become an input only (i.e. permitting the register to be re-used to address the same object without reload). In "{add,sub}_return", doing the initial read64 through the passed in %ecx decreases a register dependency. In "inc_not_zero", a branch can be eliminated by or-ing together the two halves of the current (64-bit) value, and code size can be further reduced by adjusting the arithmetic slightly. v2: Undo the folding of "xchg" and "set". Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com Cc: Luca Barbieri <luca@luca-barbieri.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

commit: cb8095bba6d24118135a5683a956f4f4fb5f17bb [log] [tgz]
author: Jan Beulich <JBeulich@suse.com> Fri Jan 20 16:22:04 2012 +0000
committer: H. Peter Anvin <hpa@linux.intel.com> Fri Jan 20 17:29:49 2012 -0800
tree: 25eff3732e8471e314591d0bc6ea41d96857c18b
parent: 819165fb34b9777f852429f2c6d6f79fbb71b9eb [diff] [blame]
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083..f5cc9eb 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S

@@ -55,8 +55,6 @@
 ENTRY(atomic64_xchg_cx8)
 	CFI_STARTPROC
 
-	movl %ebx, %eax
-	movl %ecx, %edx
 1:
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
@@ -78,7 +76,7 @@
 	movl %edx, %edi
 	movl %ecx, %ebp
 
-	read64 %ebp
+	read64 %ecx
 1:
 	movl %eax, %ebx
 	movl %edx, %ecx
@@ -159,23 +157,22 @@
 	SAVE ebx
 /* these just push these two parameters on the stack */
 	SAVE edi
-	SAVE esi
+	SAVE ecx
 
-	movl %ecx, %ebp
-	movl %eax, %esi
+	movl %eax, %ebp
 	movl %edx, %edi
 
-	read64 %ebp
+	read64 %esi
 1:
 	cmpl %eax, 0(%esp)
 	je 4f
 2:
 	movl %eax, %ebx
 	movl %edx, %ecx
-	addl %esi, %ebx
+	addl %ebp, %ebx
 	adcl %edi, %ecx
 	LOCK_PREFIX
-	cmpxchg8b (%ebp)
+	cmpxchg8b (%esi)
 	jne 1b
 
 	movl $1, %eax
@@ -199,13 +196,13 @@
 
 	read64 %esi
 1:
-	testl %eax, %eax
-	je 4f
-2:
+	movl %eax, %ecx
+	orl %edx, %ecx
+	jz 3f
 	movl %eax, %ebx
-	movl %edx, %ecx
+	xorl %ecx, %ecx
 	addl $1, %ebx
-	adcl $0, %ecx
+	adcl %edx, %ecx
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
@@ -214,9 +211,5 @@
 3:
 	RESTORE ebx
 	ret
-4:
-	testl %edx, %edx
-	jne 2b
-	jmp 3b
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
commit	cb8095bba6d24118135a5683a956f4f4fb5f17bb	[log] [tgz]
author	Jan Beulich <JBeulich@suse.com>	Fri Jan 20 16:22:04 2012 +0000
committer	H. Peter Anvin <hpa@linux.intel.com>	Fri Jan 20 17:29:49 2012 -0800
tree	25eff3732e8471e314591d0bc6ea41d96857c18b
parent	819165fb34b9777f852429f2c6d6f79fbb71b9eb [diff] [blame]