x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied
into %eax and %edx respectively (this is only necessary when desiring
to only read the stored value).
In the "add_unless" implementation, swapping the use of %ecx and %esi
for passing arguments allows %esi to become an input only (i.e.
permitting the register to be re-used to address the same object
without reload).
In "{add,sub}_return", doing the initial read64 through the passed in
%ecx decreases a register dependency.
In "inc_not_zero", a branch can be eliminated by or-ing together the
two halves of the current (64-bit) value, and code size can be further
reduced by adjusting the arithmetic slightly.
v2: Undo the folding of "xchg" and "set".
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083..f5cc9eb 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@
ENTRY(atomic64_xchg_cx8)
CFI_STARTPROC
- movl %ebx, %eax
- movl %ecx, %edx
1:
LOCK_PREFIX
cmpxchg8b (%esi)
@@ -78,7 +76,7 @@
movl %edx, %edi
movl %ecx, %ebp
- read64 %ebp
+ read64 %ecx
1:
movl %eax, %ebx
movl %edx, %ecx
@@ -159,23 +157,22 @@
SAVE ebx
/* these just push these two parameters on the stack */
SAVE edi
- SAVE esi
+ SAVE ecx
- movl %ecx, %ebp
- movl %eax, %esi
+ movl %eax, %ebp
movl %edx, %edi
- read64 %ebp
+ read64 %esi
1:
cmpl %eax, 0(%esp)
je 4f
2:
movl %eax, %ebx
movl %edx, %ecx
- addl %esi, %ebx
+ addl %ebp, %ebx
adcl %edi, %ecx
LOCK_PREFIX
- cmpxchg8b (%ebp)
+ cmpxchg8b (%esi)
jne 1b
movl $1, %eax
@@ -199,13 +196,13 @@
read64 %esi
1:
- testl %eax, %eax
- je 4f
-2:
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jz 3f
movl %eax, %ebx
- movl %edx, %ecx
+ xorl %ecx, %ecx
addl $1, %ebx
- adcl $0, %ecx
+ adcl %edx, %ecx
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
@@ -214,9 +211,5 @@
3:
RESTORE ebx
ret
-4:
- testl %edx, %edx
- jne 2b
- jmp 3b
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)