ARM: bitops: switch set/clear/change bitops to use ldrex/strex

Switch the set/clear/change bitops to use the word-based exclusive
operations, which are only present in a wider range of ARM architectures
than the byte-based exclusive operations.

Tested record:
- Nicolas Pitre: ext3,rw,le
- Sourav Poddar: nfs,le
- Will Deacon: ext3,rw,le
- Tony Lindgren: ext3+nfs,le

Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Sourav Poddar <sourav.poddar@ti.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index bd00551f..a9d9d15 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,15 +1,15 @@
-
-#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_32v6K)
+#if __LINUX_ARM_ARCH__ >= 6
 	.macro	bitop, instr
 	ands	ip, r1, #3
 	strneb	r1, [ip]		@ assert word-aligned
 	mov	r2, #1
-	and	r3, r0, #7		@ Get bit offset
-	add	r1, r1, r0, lsr #3	@ Get byte offset
+	and	r3, r0, #31		@ Get bit offset
+	mov	r0, r0, lsr #5
+	add	r1, r1, r0, lsl #2	@ Get word offset
 	mov	r3, r2, lsl r3
-1:	ldrexb	r2, [r1]
+1:	ldrex	r2, [r1]
 	\instr	r2, r2, r3
-	strexb	r0, r2, [r1]
+	strex	r0, r2, [r1]
 	cmp	r0, #0
 	bne	1b
 	mov	pc, lr
@@ -18,15 +18,16 @@
 	.macro	testop, instr, store
 	ands	ip, r1, #3
 	strneb	r1, [ip]		@ assert word-aligned
-	and	r3, r0, #7		@ Get bit offset
 	mov	r2, #1
-	add	r1, r1, r0, lsr #3	@ Get byte offset
+	and	r3, r0, #31		@ Get bit offset
+	mov	r0, r0, lsr #5
+	add	r1, r1, r0, lsl #2	@ Get word offset
 	mov	r3, r2, lsl r3		@ create mask
 	smp_dmb
-1:	ldrexb	r2, [r1]
+1:	ldrex	r2, [r1]
 	ands	r0, r2, r3		@ save old value of bit
-	\instr	r2, r2, r3			@ toggle bit
-	strexb	ip, r2, [r1]
+	\instr	r2, r2, r3		@ toggle bit
+	strex	ip, r2, [r1]
 	cmp	ip, #0
 	bne	1b
 	smp_dmb
@@ -38,13 +39,14 @@
 	.macro	bitop, instr
 	ands	ip, r1, #3
 	strneb	r1, [ip]		@ assert word-aligned
-	and	r2, r0, #7
+	and	r2, r0, #31
+	mov	r0, r0, lsr #5
 	mov	r3, #1
 	mov	r3, r3, lsl r2
 	save_and_disable_irqs ip
-	ldrb	r2, [r1, r0, lsr #3]
+	ldr	r2, [r1, r0, lsl #2]
 	\instr	r2, r2, r3
-	strb	r2, [r1, r0, lsr #3]
+	str	r2, [r1, r0, lsl #2]
 	restore_irqs ip
 	mov	pc, lr
 	.endm
@@ -60,11 +62,11 @@
 	.macro	testop, instr, store
 	ands	ip, r1, #3
 	strneb	r1, [ip]		@ assert word-aligned
-	add	r1, r1, r0, lsr #3
-	and	r3, r0, #7
-	mov	r0, #1
+	and	r3, r0, #31
+	mov	r0, r0, lsr #5
 	save_and_disable_irqs ip
-	ldrb	r2, [r1]
+	ldr	r2, [r1, r0, lsl #2]!
+	mov	r0, #1
 	tst	r2, r0, lsl r3
 	\instr	r2, r2, r0, lsl r3
 	\store	r2, [r1]
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
index 80f3115..68ed5b6 100644
--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -12,12 +12,6 @@
 #include "bitops.h"
                 .text
 
-/* Purpose  : Function to change a bit
- * Prototype: int change_bit(int bit, void *addr)
- */
-ENTRY(_change_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_change_bit_le)
+ENTRY(_change_bit)
 	bitop	eor
-ENDPROC(_change_bit_be)
-ENDPROC(_change_bit_le)
+ENDPROC(_change_bit)
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
index 1a63e43..4c04c3b 100644
--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -12,13 +12,6 @@
 #include "bitops.h"
                 .text
 
-/*
- * Purpose  : Function to clear a bit
- * Prototype: int clear_bit(int bit, void *addr)
- */
-ENTRY(_clear_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_clear_bit_le)
+ENTRY(_clear_bit)
 	bitop	bic
-ENDPROC(_clear_bit_be)
-ENDPROC(_clear_bit_le)
+ENDPROC(_clear_bit)
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
index 1dd7176..bbee5c6 100644
--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -12,13 +12,6 @@
 #include "bitops.h"
 		.text
 
-/*
- * Purpose  : Function to set a bit
- * Prototype: int set_bit(int bit, void *addr)
- */
-ENTRY(_set_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_set_bit_le)
+ENTRY(_set_bit)
 	bitop	orr
-ENDPROC(_set_bit_be)
-ENDPROC(_set_bit_le)
+ENDPROC(_set_bit)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index 5c98dc5..15a4d43 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -12,9 +12,6 @@
 #include "bitops.h"
                 .text
 
-ENTRY(_test_and_change_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_test_and_change_bit_le)
-	testop	eor, strb
-ENDPROC(_test_and_change_bit_be)
-ENDPROC(_test_and_change_bit_le)
+ENTRY(_test_and_change_bit)
+	testop	eor, str
+ENDPROC(_test_and_change_bit)
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 543d709..521b66b 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -12,9 +12,6 @@
 #include "bitops.h"
                 .text
 
-ENTRY(_test_and_clear_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_test_and_clear_bit_le)
-	testop	bicne, strneb
-ENDPROC(_test_and_clear_bit_be)
-ENDPROC(_test_and_clear_bit_le)
+ENTRY(_test_and_clear_bit)
+	testop	bicne, strne
+ENDPROC(_test_and_clear_bit)
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 0b3f390..1c98cc2 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -12,9 +12,6 @@
 #include "bitops.h"
                 .text
 
-ENTRY(_test_and_set_bit_be)
-		eor	r0, r0, #0x18		@ big endian byte ordering
-ENTRY(_test_and_set_bit_le)
-	testop	orreq, streqb
-ENDPROC(_test_and_set_bit_be)
-ENDPROC(_test_and_set_bit_le)
+ENTRY(_test_and_set_bit)
+	testop	orreq, streq
+ENDPROC(_test_and_set_bit)