blob: 59026029d017d33ef784f4e80ece4593cc0cea0e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@cam.org>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING. If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA. */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44 clz \curbit, \divisor
45 clz \result, \dividend
46 sub \result, \curbit, \result
47 mov \curbit, #1
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
50 mov \result, #0
51
52#else
53
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
60 moveq \curbit, #8
61 movne \curbit, #1
62
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
671: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
71 blo 1b
72
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
751: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
79 blo 1b
80
81 mov \result, #0
82
83#endif
84
85 @ Division loop
861: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
101 bne 1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110 clz \order, \divisor
111 rsb \order, \order, #31
112
113#else
114
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
117 movhs \order, #16
118 movlo \order, #0
119
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
123
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
127
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141 clz \order, \divisor
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
145
146#else
147
148 mov \order, #0
149
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
1541: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
158 blo 1b
159
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
1621: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
166 blo 1b
167
168#endif
169
170 @ Perform all needed substractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
173 blt 2f
174
1751: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
183 cmp \dividend, #1
184 mov \divisor, \divisor, lsr #4
185 subges \order, \order, #4
186 bge 1b
187
188 tst \order, #3
189 teqne \dividend, #0
190 beq 5f
191
192 @ Either 1, 2 or 3 comparison/substractions are left.
1932: cmn \order, #2
194 blt 4f
195 beq 3f
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
1993: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
2024: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
2045:
205.endm
206
207
208ENTRY(__udivsi3)
209
210 subs r2, r1, #1
211 moveq pc, lr
212 bcc Ldiv0
213 cmp r0, r1
214 bls 11f
215 tst r1, r2
216 beq 12f
217
218 ARM_DIV_BODY r0, r1, r2, r3
219
220 mov r0, r2
221 mov pc, lr
222
22311: moveq r0, #1
224 movne r0, #0
225 mov pc, lr
226
22712: ARM_DIV2_ORDER r1, r2
228
229 mov r0, r0, lsr r2
230 mov pc, lr
231
232
233ENTRY(__umodsi3)
234
235 subs r2, r1, #1 @ compare divisor with 1
236 bcc Ldiv0
237 cmpne r0, r1 @ compare dividend with divisor
238 moveq r0, #0
239 tsthi r1, r2 @ see if divisor is power of 2
240 andeq r0, r0, r2
241 movls pc, lr
242
243 ARM_MOD_BODY r0, r1, r2, r3
244
245 mov pc, lr
246
247
248ENTRY(__divsi3)
249
250 cmp r1, #0
251 eor ip, r0, r1 @ save the sign of the result.
252 beq Ldiv0
253 rsbmi r1, r1, #0 @ loops below use unsigned.
254 subs r2, r1, #1 @ division by 1 or -1 ?
255 beq 10f
256 movs r3, r0
257 rsbmi r3, r0, #0 @ positive dividend value
258 cmp r3, r1
259 bls 11f
260 tst r1, r2 @ divisor is power of 2 ?
261 beq 12f
262
263 ARM_DIV_BODY r3, r1, r0, r2
264
265 cmp ip, #0
266 rsbmi r0, r0, #0
267 mov pc, lr
268
26910: teq ip, r0 @ same sign ?
270 rsbmi r0, r0, #0
271 mov pc, lr
272
27311: movlo r0, #0
274 moveq r0, ip, asr #31
275 orreq r0, r0, #1
276 mov pc, lr
277
27812: ARM_DIV2_ORDER r1, r2
279
280 cmp ip, #0
281 mov r0, r3, lsr r2
282 rsbmi r0, r0, #0
283 mov pc, lr
284
285
286ENTRY(__modsi3)
287
288 cmp r1, #0
289 beq Ldiv0
290 rsbmi r1, r1, #0 @ loops below use unsigned.
291 movs ip, r0 @ preserve sign of dividend
292 rsbmi r0, r0, #0 @ if negative make positive
293 subs r2, r1, #1 @ compare divisor with 1
294 cmpne r0, r1 @ compare dividend with divisor
295 moveq r0, #0
296 tsthi r1, r2 @ see if divisor is power of 2
297 andeq r0, r0, r2
298 bls 10f
299
300 ARM_MOD_BODY r0, r1, r2, r3
301
30210: cmp ip, #0
303 rsbmi r0, r0, #0
304 mov pc, lr
305
306
307Ldiv0:
308
309 str lr, [sp, #-4]!
310 bl __div0
311 mov r0, #0 @ About as wrong as it could be.
312 ldr pc, [sp], #4
313
314