blob: d75d18b7bd554d3d389d543d3084e43be29f1d16 [file] [log] [blame]
Anton Blanchard15c2d452015-01-21 12:27:38 +11001/*
2 * Author: Anton Blanchard <anton@au.ibm.com>
3 * Copyright 2015 IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10#include <asm/ppc_asm.h>
Al Viro9445aa12016-01-13 23:33:46 -050011#include <asm/export.h>
Anton Blanchard15c2d452015-01-21 12:27:38 +110012
13#define off8 r6
14#define off16 r7
15#define off24 r8
16
17#define rA r9
18#define rB r10
19#define rC r11
20#define rD r27
21#define rE r28
22#define rF r29
23#define rG r30
24#define rH r31
25
26#ifdef __LITTLE_ENDIAN__
27#define LD ldbrx
28#else
29#define LD ldx
30#endif
31
32_GLOBAL(memcmp)
33 cmpdi cr1,r5,0
34
35 /* Use the short loop if both strings are not 8B aligned */
36 or r6,r3,r4
37 andi. r6,r6,7
38
39 /* Use the short loop if length is less than 32B */
40 cmpdi cr6,r5,31
41
42 beq cr1,.Lzero
43 bne .Lshort
44 bgt cr6,.Llong
45
46.Lshort:
47 mtctr r5
48
491: lbz rA,0(r3)
50 lbz rB,0(r4)
51 subf. rC,rB,rA
52 bne .Lnon_zero
53 bdz .Lzero
54
55 lbz rA,1(r3)
56 lbz rB,1(r4)
57 subf. rC,rB,rA
58 bne .Lnon_zero
59 bdz .Lzero
60
61 lbz rA,2(r3)
62 lbz rB,2(r4)
63 subf. rC,rB,rA
64 bne .Lnon_zero
65 bdz .Lzero
66
67 lbz rA,3(r3)
68 lbz rB,3(r4)
69 subf. rC,rB,rA
70 bne .Lnon_zero
71
72 addi r3,r3,4
73 addi r4,r4,4
74
75 bdnz 1b
76
77.Lzero:
78 li r3,0
79 blr
80
81.Lnon_zero:
82 mr r3,rC
83 blr
84
85.Llong:
86 li off8,8
87 li off16,16
88 li off24,24
89
90 std r31,-8(r1)
91 std r30,-16(r1)
92 std r29,-24(r1)
93 std r28,-32(r1)
94 std r27,-40(r1)
95
96 srdi r0,r5,5
97 mtctr r0
98 andi. r5,r5,31
99
100 LD rA,0,r3
101 LD rB,0,r4
102
103 LD rC,off8,r3
104 LD rD,off8,r4
105
106 LD rE,off16,r3
107 LD rF,off16,r4
108
109 LD rG,off24,r3
110 LD rH,off24,r4
111 cmpld cr0,rA,rB
112
113 addi r3,r3,32
114 addi r4,r4,32
115
116 bdz .Lfirst32
117
118 LD rA,0,r3
119 LD rB,0,r4
120 cmpld cr1,rC,rD
121
122 LD rC,off8,r3
123 LD rD,off8,r4
124 cmpld cr6,rE,rF
125
126 LD rE,off16,r3
127 LD rF,off16,r4
128 cmpld cr7,rG,rH
129 bne cr0,.LcmpAB
130
131 LD rG,off24,r3
132 LD rH,off24,r4
133 cmpld cr0,rA,rB
134 bne cr1,.LcmpCD
135
136 addi r3,r3,32
137 addi r4,r4,32
138
139 bdz .Lsecond32
140
141 .balign 16
142
1431: LD rA,0,r3
144 LD rB,0,r4
145 cmpld cr1,rC,rD
146 bne cr6,.LcmpEF
147
148 LD rC,off8,r3
149 LD rD,off8,r4
150 cmpld cr6,rE,rF
151 bne cr7,.LcmpGH
152
153 LD rE,off16,r3
154 LD rF,off16,r4
155 cmpld cr7,rG,rH
156 bne cr0,.LcmpAB
157
158 LD rG,off24,r3
159 LD rH,off24,r4
160 cmpld cr0,rA,rB
161 bne cr1,.LcmpCD
162
163 addi r3,r3,32
164 addi r4,r4,32
165
166 bdnz 1b
167
168.Lsecond32:
169 cmpld cr1,rC,rD
170 bne cr6,.LcmpEF
171
172 cmpld cr6,rE,rF
173 bne cr7,.LcmpGH
174
175 cmpld cr7,rG,rH
176 bne cr0,.LcmpAB
177
178 bne cr1,.LcmpCD
179 bne cr6,.LcmpEF
180 bne cr7,.LcmpGH
181
182.Ltail:
183 ld r31,-8(r1)
184 ld r30,-16(r1)
185 ld r29,-24(r1)
186 ld r28,-32(r1)
187 ld r27,-40(r1)
188
189 cmpdi r5,0
190 beq .Lzero
191 b .Lshort
192
193.Lfirst32:
194 cmpld cr1,rC,rD
195 cmpld cr6,rE,rF
196 cmpld cr7,rG,rH
197
198 bne cr0,.LcmpAB
199 bne cr1,.LcmpCD
200 bne cr6,.LcmpEF
201 bne cr7,.LcmpGH
202
203 b .Ltail
204
205.LcmpAB:
206 li r3,1
207 bgt cr0,.Lout
208 li r3,-1
209 b .Lout
210
211.LcmpCD:
212 li r3,1
213 bgt cr1,.Lout
214 li r3,-1
215 b .Lout
216
217.LcmpEF:
218 li r3,1
219 bgt cr6,.Lout
220 li r3,-1
221 b .Lout
222
223.LcmpGH:
224 li r3,1
225 bgt cr7,.Lout
226 li r3,-1
227
228.Lout:
229 ld r31,-8(r1)
230 ld r30,-16(r1)
231 ld r29,-24(r1)
232 ld r28,-32(r1)
233 ld r27,-40(r1)
234 blr
Al Viro9445aa12016-01-13 23:33:46 -0500235EXPORT_SYMBOL(memcmp)