blob: 0f64fcee4ccd1c7b1482919db040d99968d22b7e [file] [log] [blame]
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +03001/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2 * Note: I added some stuff for use with gnupg
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
6 *
7 * This file is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
11 *
12 * This file is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 * License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this file; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 * MA 02111-1307, USA. */
21
Christoph Hellwiga1164a32015-08-28 09:27:15 +020022#include <linux/count_zeros.h>
David Howellsaacf29b2012-09-13 13:09:33 +010023
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +030024/* You have to define the following before including this file:
25 *
26 * UWtype -- An unsigned type, default type for operations (typically a "word")
27 * UHWtype -- An unsigned type, at least half the size of UWtype.
28 * UDWtype -- An unsigned type, at least twice as large a UWtype
29 * W_TYPE_SIZE -- size in bits of UWtype
30 *
31 * SItype, USItype -- Signed and unsigned 32 bit types.
32 * DItype, UDItype -- Signed and unsigned 64 bit types.
33 *
34 * On a 32 bit machine UWtype should typically be USItype;
35 * on a 64 bit machine, UWtype should typically be UDItype.
36*/
37
38#define __BITS4 (W_TYPE_SIZE / 4)
39#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
40#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
41#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
42
43/* This is used to make sure no undesirable sharing between different libraries
44 that use this file takes place. */
45#ifndef __MPN
46#define __MPN(x) __##x
47#endif
48
49/* Define auxiliary asm macros.
50 *
51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
53 * word product in HIGH_PROD and LOW_PROD.
54 *
55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
56 * UDWtype product. This is just a variant of umul_ppmm.
57
58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
59 * denominator) divides a UDWtype, composed by the UWtype integers
60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
62 * than DENOMINATOR for correct operation. If, in addition, the most
63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
64 * UDIV_NEEDS_NORMALIZATION is defined to 1.
65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient
67 * is rounded towards 0.
68 *
69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
70 * msb to the first non-zero bit in the UWtype X. This is the number of
71 * steps X needs to be shifted left to set the msb. Undefined for X == 0,
72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
73 *
74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
75 * from the least significant end.
76 *
77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by
79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
81 * (i.e. carry out) is not stored anywhere, and is lost.
82 *
83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
88 * and is lost.
89 *
90 * If any of these macros are left undefined for a particular CPU,
91 * C macros are used. */
92
93/* The CPUs come in alphabetical order below.
94 *
95 * Please add support for more CPUs here, or improve the current support
96 * for the CPUs below! */
97
98#if defined(__GNUC__) && !defined(NO_ASM)
99
100/* We sometimes need to clobber "cc" with gcc2, but that would not be
101 understood by gcc1. Use cpp to avoid major code duplication. */
102#if __GNUC__ < 2
103#define __CLOBBER_CC
104#define __AND_CLOBBER_CC
105#else /* __GNUC__ >= 2 */
106#define __CLOBBER_CC : "cc"
107#define __AND_CLOBBER_CC , "cc"
108#endif /* __GNUC__ < 2 */
109
110/***************************************
111 ************** A29K *****************
112 ***************************************/
113#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
114#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
115 __asm__ ("add %1,%4,%5\n" \
116 "addc %0,%2,%3" \
117 : "=r" ((USItype)(sh)), \
118 "=&r" ((USItype)(sl)) \
119 : "%r" ((USItype)(ah)), \
120 "rI" ((USItype)(bh)), \
121 "%r" ((USItype)(al)), \
122 "rI" ((USItype)(bl)))
123#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
124 __asm__ ("sub %1,%4,%5\n" \
125 "subc %0,%2,%3" \
126 : "=r" ((USItype)(sh)), \
127 "=&r" ((USItype)(sl)) \
128 : "r" ((USItype)(ah)), \
129 "rI" ((USItype)(bh)), \
130 "r" ((USItype)(al)), \
131 "rI" ((USItype)(bl)))
132#define umul_ppmm(xh, xl, m0, m1) \
133do { \
134 USItype __m0 = (m0), __m1 = (m1); \
135 __asm__ ("multiplu %0,%1,%2" \
136 : "=r" ((USItype)(xl)) \
137 : "r" (__m0), \
138 "r" (__m1)); \
139 __asm__ ("multmu %0,%1,%2" \
140 : "=r" ((USItype)(xh)) \
141 : "r" (__m0), \
142 "r" (__m1)); \
143} while (0)
144#define udiv_qrnnd(q, r, n1, n0, d) \
145 __asm__ ("dividu %0,%3,%4" \
146 : "=r" ((USItype)(q)), \
147 "=q" ((USItype)(r)) \
148 : "1" ((USItype)(n1)), \
149 "r" ((USItype)(n0)), \
150 "r" ((USItype)(d)))
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300151#endif /* __a29k__ */
152
153#if defined(__alpha) && W_TYPE_SIZE == 64
Richard Hendersona5c6eae2013-07-10 11:05:59 -0700154#define umul_ppmm(ph, pl, m0, m1) \
155do { \
156 UDItype __m0 = (m0), __m1 = (m1); \
157 (ph) = __builtin_alpha_umulh(__m0, __m1); \
158 (pl) = __m0 * __m1; \
159} while (0)
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300160#define UMUL_TIME 46
161#ifndef LONGLONG_STANDALONE
162#define udiv_qrnnd(q, r, n1, n0, d) \
163do { UDItype __r; \
164 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
165 (r) = __r; \
166} while (0)
Richard Hendersona5c6eae2013-07-10 11:05:59 -0700167extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300168#define UDIV_TIME 220
169#endif /* LONGLONG_STANDALONE */
170#endif /* __alpha */
171
172/***************************************
173 ************** ARM ******************
174 ***************************************/
175#if defined(__arm__) && W_TYPE_SIZE == 32
176#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
177 __asm__ ("adds %1, %4, %5\n" \
178 "adc %0, %2, %3" \
179 : "=r" ((USItype)(sh)), \
180 "=&r" ((USItype)(sl)) \
181 : "%r" ((USItype)(ah)), \
182 "rI" ((USItype)(bh)), \
183 "%r" ((USItype)(al)), \
184 "rI" ((USItype)(bl)))
185#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
186 __asm__ ("subs %1, %4, %5\n" \
187 "sbc %0, %2, %3" \
188 : "=r" ((USItype)(sh)), \
189 "=&r" ((USItype)(sl)) \
190 : "r" ((USItype)(ah)), \
191 "rI" ((USItype)(bh)), \
192 "r" ((USItype)(al)), \
193 "rI" ((USItype)(bl)))
194#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
195#define umul_ppmm(xh, xl, a, b) \
196 __asm__ ("%@ Inlined umul_ppmm\n" \
197 "mov %|r0, %2, lsr #16 @ AAAA\n" \
198 "mov %|r2, %3, lsr #16 @ BBBB\n" \
199 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \
200 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \
201 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \
202 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \
203 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \
204 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \
205 "adds %|r0, %1, %0 @ central sum\n" \
206 "addcs %|r2, %|r2, #65536\n" \
207 "adds %1, %|r1, %|r0, lsl #16\n" \
208 "adc %0, %|r2, %|r0, lsr #16" \
209 : "=&r" ((USItype)(xh)), \
210 "=r" ((USItype)(xl)) \
211 : "r" ((USItype)(a)), \
212 "r" ((USItype)(b)) \
213 : "r0", "r1", "r2")
214#else
215#define umul_ppmm(xh, xl, a, b) \
216 __asm__ ("%@ Inlined umul_ppmm\n" \
217 "umull %r1, %r0, %r2, %r3" \
218 : "=&r" ((USItype)(xh)), \
Arnd Bergmannc5d55242016-02-26 13:46:26 +0100219 "=&r" ((USItype)(xl)) \
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300220 : "r" ((USItype)(a)), \
221 "r" ((USItype)(b)) \
222 : "r0", "r1")
223#endif
224#define UMUL_TIME 20
225#define UDIV_TIME 100
226#endif /* __arm__ */
227
228/***************************************
229 ************** CLIPPER **************
230 ***************************************/
231#if defined(__clipper__) && W_TYPE_SIZE == 32
232#define umul_ppmm(w1, w0, u, v) \
233 ({union {UDItype __ll; \
234 struct {USItype __l, __h; } __i; \
235 } __xx; \
236 __asm__ ("mulwux %2,%0" \
237 : "=r" (__xx.__ll) \
238 : "%0" ((USItype)(u)), \
239 "r" ((USItype)(v))); \
240 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
241#define smul_ppmm(w1, w0, u, v) \
242 ({union {DItype __ll; \
243 struct {SItype __l, __h; } __i; \
244 } __xx; \
245 __asm__ ("mulwx %2,%0" \
246 : "=r" (__xx.__ll) \
247 : "%0" ((SItype)(u)), \
248 "r" ((SItype)(v))); \
249 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
250#define __umulsidi3(u, v) \
251 ({UDItype __w; \
252 __asm__ ("mulwux %2,%0" \
253 : "=r" (__w) \
254 : "%0" ((USItype)(u)), \
255 "r" ((USItype)(v))); \
256 __w; })
257#endif /* __clipper__ */
258
259/***************************************
260 ************** GMICRO ***************
261 ***************************************/
262#if defined(__gmicro__) && W_TYPE_SIZE == 32
263#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
264 __asm__ ("add.w %5,%1\n" \
265 "addx %3,%0" \
266 : "=g" ((USItype)(sh)), \
267 "=&g" ((USItype)(sl)) \
268 : "%0" ((USItype)(ah)), \
269 "g" ((USItype)(bh)), \
270 "%1" ((USItype)(al)), \
271 "g" ((USItype)(bl)))
272#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
273 __asm__ ("sub.w %5,%1\n" \
274 "subx %3,%0" \
275 : "=g" ((USItype)(sh)), \
276 "=&g" ((USItype)(sl)) \
277 : "0" ((USItype)(ah)), \
278 "g" ((USItype)(bh)), \
279 "1" ((USItype)(al)), \
280 "g" ((USItype)(bl)))
281#define umul_ppmm(ph, pl, m0, m1) \
282 __asm__ ("mulx %3,%0,%1" \
283 : "=g" ((USItype)(ph)), \
284 "=r" ((USItype)(pl)) \
285 : "%0" ((USItype)(m0)), \
286 "g" ((USItype)(m1)))
287#define udiv_qrnnd(q, r, nh, nl, d) \
288 __asm__ ("divx %4,%0,%1" \
289 : "=g" ((USItype)(q)), \
290 "=r" ((USItype)(r)) \
291 : "1" ((USItype)(nh)), \
292 "0" ((USItype)(nl)), \
293 "g" ((USItype)(d)))
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300294#endif
295
296/***************************************
297 ************** HPPA *****************
298 ***************************************/
299#if defined(__hppa) && W_TYPE_SIZE == 32
300#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
301 __asm__ ("add %4,%5,%1\n" \
302 "addc %2,%3,%0" \
303 : "=r" ((USItype)(sh)), \
304 "=&r" ((USItype)(sl)) \
305 : "%rM" ((USItype)(ah)), \
306 "rM" ((USItype)(bh)), \
307 "%rM" ((USItype)(al)), \
308 "rM" ((USItype)(bl)))
309#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
310 __asm__ ("sub %4,%5,%1\n" \
311 "subb %2,%3,%0" \
312 : "=r" ((USItype)(sh)), \
313 "=&r" ((USItype)(sl)) \
314 : "rM" ((USItype)(ah)), \
315 "rM" ((USItype)(bh)), \
316 "rM" ((USItype)(al)), \
317 "rM" ((USItype)(bl)))
Helge Deller70ef5572013-05-05 22:08:22 +0000318#if 0 && defined(_PA_RISC1_1)
319/* xmpyu uses floating point register which is not allowed in Linux kernel. */
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300320#define umul_ppmm(wh, wl, u, v) \
321do { \
322 union {UDItype __ll; \
323 struct {USItype __h, __l; } __i; \
324 } __xx; \
325 __asm__ ("xmpyu %1,%2,%0" \
326 : "=*f" (__xx.__ll) \
327 : "*f" ((USItype)(u)), \
328 "*f" ((USItype)(v))); \
329 (wh) = __xx.__i.__h; \
330 (wl) = __xx.__i.__l; \
331} while (0)
332#define UMUL_TIME 8
333#define UDIV_TIME 60
334#else
335#define UMUL_TIME 40
336#define UDIV_TIME 80
337#endif
Helge Deller70ef5572013-05-05 22:08:22 +0000338#if 0 /* #ifndef LONGLONG_STANDALONE */
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300339#define udiv_qrnnd(q, r, n1, n0, d) \
340do { USItype __r; \
341 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
342 (r) = __r; \
343} while (0)
344extern USItype __udiv_qrnnd();
345#endif /* LONGLONG_STANDALONE */
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300346#endif /* hppa */
347
348/***************************************
349 ************** I370 *****************
350 ***************************************/
351#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
352#define umul_ppmm(xh, xl, m0, m1) \
353do { \
354 union {UDItype __ll; \
355 struct {USItype __h, __l; } __i; \
356 } __xx; \
357 USItype __m0 = (m0), __m1 = (m1); \
358 __asm__ ("mr %0,%3" \
359 : "=r" (__xx.__i.__h), \
360 "=r" (__xx.__i.__l) \
361 : "%1" (__m0), \
362 "r" (__m1)); \
363 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
364 (xh) += ((((SItype) __m0 >> 31) & __m1) \
365 + (((SItype) __m1 >> 31) & __m0)); \
366} while (0)
367#define smul_ppmm(xh, xl, m0, m1) \
368do { \
369 union {DItype __ll; \
370 struct {USItype __h, __l; } __i; \
371 } __xx; \
372 __asm__ ("mr %0,%3" \
373 : "=r" (__xx.__i.__h), \
374 "=r" (__xx.__i.__l) \
375 : "%1" (m0), \
376 "r" (m1)); \
377 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
378} while (0)
379#define sdiv_qrnnd(q, r, n1, n0, d) \
380do { \
381 union {DItype __ll; \
382 struct {USItype __h, __l; } __i; \
383 } __xx; \
384 __xx.__i.__h = n1; __xx.__i.__l = n0; \
385 __asm__ ("dr %0,%2" \
386 : "=r" (__xx.__ll) \
387 : "0" (__xx.__ll), "r" (d)); \
388 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
389} while (0)
390#endif
391
392/***************************************
393 ************** I386 *****************
394 ***************************************/
395#undef __i386__
396#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
397#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
398 __asm__ ("addl %5,%1\n" \
399 "adcl %3,%0" \
400 : "=r" ((USItype)(sh)), \
401 "=&r" ((USItype)(sl)) \
402 : "%0" ((USItype)(ah)), \
403 "g" ((USItype)(bh)), \
404 "%1" ((USItype)(al)), \
405 "g" ((USItype)(bl)))
406#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
407 __asm__ ("subl %5,%1\n" \
408 "sbbl %3,%0" \
409 : "=r" ((USItype)(sh)), \
410 "=&r" ((USItype)(sl)) \
411 : "0" ((USItype)(ah)), \
412 "g" ((USItype)(bh)), \
413 "1" ((USItype)(al)), \
414 "g" ((USItype)(bl)))
415#define umul_ppmm(w1, w0, u, v) \
416 __asm__ ("mull %3" \
417 : "=a" ((USItype)(w0)), \
418 "=d" ((USItype)(w1)) \
419 : "%0" ((USItype)(u)), \
420 "rm" ((USItype)(v)))
421#define udiv_qrnnd(q, r, n1, n0, d) \
422 __asm__ ("divl %4" \
423 : "=a" ((USItype)(q)), \
424 "=d" ((USItype)(r)) \
425 : "0" ((USItype)(n0)), \
426 "1" ((USItype)(n1)), \
427 "rm" ((USItype)(d)))
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300428#ifndef UMUL_TIME
429#define UMUL_TIME 40
430#endif
431#ifndef UDIV_TIME
432#define UDIV_TIME 40
433#endif
434#endif /* 80x86 */
435
436/***************************************
437 ************** I860 *****************
438 ***************************************/
439#if defined(__i860__) && W_TYPE_SIZE == 32
440#define rshift_rhlc(r, h, l, c) \
441 __asm__ ("shr %3,r0,r0\n" \
442 "shrd %1,%2,%0" \
443 "=r" (r) : "r" (h), "r" (l), "rn" (c))
444#endif /* i860 */
445
446/***************************************
447 ************** I960 *****************
448 ***************************************/
449#if defined(__i960__) && W_TYPE_SIZE == 32
450#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
451 __asm__ ("cmpo 1,0\n" \
452 "addc %5,%4,%1\n" \
453 "addc %3,%2,%0" \
454 : "=r" ((USItype)(sh)), \
455 "=&r" ((USItype)(sl)) \
456 : "%dI" ((USItype)(ah)), \
457 "dI" ((USItype)(bh)), \
458 "%dI" ((USItype)(al)), \
459 "dI" ((USItype)(bl)))
460#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
461 __asm__ ("cmpo 0,0\n" \
462 "subc %5,%4,%1\n" \
463 "subc %3,%2,%0" \
464 : "=r" ((USItype)(sh)), \
465 "=&r" ((USItype)(sl)) \
466 : "dI" ((USItype)(ah)), \
467 "dI" ((USItype)(bh)), \
468 "dI" ((USItype)(al)), \
469 "dI" ((USItype)(bl)))
470#define umul_ppmm(w1, w0, u, v) \
471 ({union {UDItype __ll; \
472 struct {USItype __l, __h; } __i; \
473 } __xx; \
474 __asm__ ("emul %2,%1,%0" \
475 : "=d" (__xx.__ll) \
476 : "%dI" ((USItype)(u)), \
477 "dI" ((USItype)(v))); \
478 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
479#define __umulsidi3(u, v) \
480 ({UDItype __w; \
481 __asm__ ("emul %2,%1,%0" \
482 : "=d" (__w) \
483 : "%dI" ((USItype)(u)), \
484 "dI" ((USItype)(v))); \
485 __w; })
486#define udiv_qrnnd(q, r, nh, nl, d) \
487do { \
488 union {UDItype __ll; \
489 struct {USItype __l, __h; } __i; \
490 } __nn; \
491 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
492 __asm__ ("ediv %d,%n,%0" \
493 : "=d" (__rq.__ll) \
494 : "dI" (__nn.__ll), \
495 "dI" ((USItype)(d))); \
496 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
497} while (0)
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300498#if defined(__i960mx) /* what is the proper symbol to test??? */
499#define rshift_rhlc(r, h, l, c) \
500do { \
501 union {UDItype __ll; \
502 struct {USItype __l, __h; } __i; \
503 } __nn; \
504 __nn.__i.__h = (h); __nn.__i.__l = (l); \
505 __asm__ ("shre %2,%1,%0" \
506 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
507}
508#endif /* i960mx */
509#endif /* i960 */
510
511/***************************************
512 ************** 68000 ****************
513 ***************************************/
514#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
515#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
516 __asm__ ("add%.l %5,%1\n" \
517 "addx%.l %3,%0" \
518 : "=d" ((USItype)(sh)), \
519 "=&d" ((USItype)(sl)) \
520 : "%0" ((USItype)(ah)), \
521 "d" ((USItype)(bh)), \
522 "%1" ((USItype)(al)), \
523 "g" ((USItype)(bl)))
524#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
525 __asm__ ("sub%.l %5,%1\n" \
526 "subx%.l %3,%0" \
527 : "=d" ((USItype)(sh)), \
528 "=&d" ((USItype)(sl)) \
529 : "0" ((USItype)(ah)), \
530 "d" ((USItype)(bh)), \
531 "1" ((USItype)(al)), \
532 "g" ((USItype)(bl)))
533#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
534#define umul_ppmm(w1, w0, u, v) \
535 __asm__ ("mulu%.l %3,%1:%0" \
536 : "=d" ((USItype)(w0)), \
537 "=d" ((USItype)(w1)) \
538 : "%0" ((USItype)(u)), \
539 "dmi" ((USItype)(v)))
540#define UMUL_TIME 45
541#define udiv_qrnnd(q, r, n1, n0, d) \
542 __asm__ ("divu%.l %4,%1:%0" \
543 : "=d" ((USItype)(q)), \
544 "=d" ((USItype)(r)) \
545 : "0" ((USItype)(n0)), \
546 "1" ((USItype)(n1)), \
547 "dmi" ((USItype)(d)))
548#define UDIV_TIME 90
549#define sdiv_qrnnd(q, r, n1, n0, d) \
550 __asm__ ("divs%.l %4,%1:%0" \
551 : "=d" ((USItype)(q)), \
552 "=d" ((USItype)(r)) \
553 : "0" ((USItype)(n0)), \
554 "1" ((USItype)(n1)), \
555 "dmi" ((USItype)(d)))
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300556#else /* not mc68020 */
557#define umul_ppmm(xh, xl, a, b) \
558do { USItype __umul_tmp1, __umul_tmp2; \
559 __asm__ ("| Inlined umul_ppmm\n" \
560 "move%.l %5,%3\n" \
561 "move%.l %2,%0\n" \
562 "move%.w %3,%1\n" \
563 "swap %3\n" \
564 "swap %0\n" \
565 "mulu %2,%1\n" \
566 "mulu %3,%0\n" \
567 "mulu %2,%3\n" \
568 "swap %2\n" \
569 "mulu %5,%2\n" \
570 "add%.l %3,%2\n" \
571 "jcc 1f\n" \
572 "add%.l %#0x10000,%0\n" \
573 "1: move%.l %2,%3\n" \
574 "clr%.w %2\n" \
575 "swap %2\n" \
576 "swap %3\n" \
577 "clr%.w %3\n" \
578 "add%.l %3,%1\n" \
579 "addx%.l %2,%0\n" \
580 "| End inlined umul_ppmm" \
581 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
582 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
583 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
584} while (0)
585#define UMUL_TIME 100
586#define UDIV_TIME 400
587#endif /* not mc68020 */
588#endif /* mc68000 */
589
590/***************************************
591 ************** 88000 ****************
592 ***************************************/
593#if defined(__m88000__) && W_TYPE_SIZE == 32
594#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
595 __asm__ ("addu.co %1,%r4,%r5\n" \
596 "addu.ci %0,%r2,%r3" \
597 : "=r" ((USItype)(sh)), \
598 "=&r" ((USItype)(sl)) \
599 : "%rJ" ((USItype)(ah)), \
600 "rJ" ((USItype)(bh)), \
601 "%rJ" ((USItype)(al)), \
602 "rJ" ((USItype)(bl)))
603#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
604 __asm__ ("subu.co %1,%r4,%r5\n" \
605 "subu.ci %0,%r2,%r3" \
606 : "=r" ((USItype)(sh)), \
607 "=&r" ((USItype)(sl)) \
608 : "rJ" ((USItype)(ah)), \
609 "rJ" ((USItype)(bh)), \
610 "rJ" ((USItype)(al)), \
611 "rJ" ((USItype)(bl)))
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300612#if defined(__m88110__)
613#define umul_ppmm(wh, wl, u, v) \
614do { \
615 union {UDItype __ll; \
616 struct {USItype __h, __l; } __i; \
617 } __x; \
618 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
619 (wh) = __x.__i.__h; \
620 (wl) = __x.__i.__l; \
621} while (0)
622#define udiv_qrnnd(q, r, n1, n0, d) \
623 ({union {UDItype __ll; \
624 struct {USItype __h, __l; } __i; \
625 } __x, __q; \
626 __x.__i.__h = (n1); __x.__i.__l = (n0); \
627 __asm__ ("divu.d %0,%1,%2" \
628 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
629 (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
630#define UMUL_TIME 5
631#define UDIV_TIME 25
632#else
633#define UMUL_TIME 17
634#define UDIV_TIME 150
635#endif /* __m88110__ */
636#endif /* __m88000__ */
637
638/***************************************
639 ************** MIPS *****************
640 ***************************************/
641#if defined(__mips__) && W_TYPE_SIZE == 32
Jaedon Shin36f58112015-06-12 18:04:14 +0900642#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
Manuel Laussa3cea982012-11-22 11:58:22 +0100643#define umul_ppmm(w1, w0, u, v) \
644do { \
645 UDItype __ll = (UDItype)(u) * (v); \
646 w1 = __ll >> 32; \
647 w0 = __ll; \
648} while (0)
649#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300650#define umul_ppmm(w1, w0, u, v) \
651 __asm__ ("multu %2,%3" \
652 : "=l" ((USItype)(w0)), \
653 "=h" ((USItype)(w1)) \
654 : "d" ((USItype)(u)), \
655 "d" ((USItype)(v)))
656#else
657#define umul_ppmm(w1, w0, u, v) \
658 __asm__ ("multu %2,%3\n" \
659 "mflo %0\n" \
660 "mfhi %1" \
661 : "=d" ((USItype)(w0)), \
662 "=d" ((USItype)(w1)) \
663 : "d" ((USItype)(u)), \
664 "d" ((USItype)(v)))
665#endif
666#define UMUL_TIME 10
667#define UDIV_TIME 100
668#endif /* __mips__ */
669
670/***************************************
671 ************** MIPS/64 **************
672 ***************************************/
673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
James Hogand9868db2017-12-05 23:31:35 +0000674#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
675/*
676 * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
677 * code below, so we special case MIPS64r6 until the compiler can do better.
678 */
679#define umul_ppmm(w1, w0, u, v) \
680do { \
681 __asm__ ("dmulu %0,%1,%2" \
682 : "=d" ((UDItype)(w0)) \
683 : "d" ((UDItype)(u)), \
684 "d" ((UDItype)(v))); \
685 __asm__ ("dmuhu %0,%1,%2" \
686 : "=d" ((UDItype)(w1)) \
687 : "d" ((UDItype)(u)), \
688 "d" ((UDItype)(v))); \
689} while (0)
690#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
Manuel Laussa3cea982012-11-22 11:58:22 +0100691#define umul_ppmm(w1, w0, u, v) \
692do { \
693 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
694 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
695 w1 = __ll >> 64; \
696 w0 = __ll; \
697} while (0)
698#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300699#define umul_ppmm(w1, w0, u, v) \
700 __asm__ ("dmultu %2,%3" \
701 : "=l" ((UDItype)(w0)), \
702 "=h" ((UDItype)(w1)) \
703 : "d" ((UDItype)(u)), \
704 "d" ((UDItype)(v)))
705#else
706#define umul_ppmm(w1, w0, u, v) \
707 __asm__ ("dmultu %2,%3\n" \
708 "mflo %0\n" \
709 "mfhi %1" \
710 : "=d" ((UDItype)(w0)), \
711 "=d" ((UDItype)(w1)) \
712 : "d" ((UDItype)(u)), \
713 "d" ((UDItype)(v)))
714#endif
715#define UMUL_TIME 20
716#define UDIV_TIME 140
717#endif /* __mips__ */
718
719/***************************************
720 ************** 32000 ****************
721 ***************************************/
722#if defined(__ns32000__) && W_TYPE_SIZE == 32
723#define umul_ppmm(w1, w0, u, v) \
724 ({union {UDItype __ll; \
725 struct {USItype __l, __h; } __i; \
726 } __xx; \
727 __asm__ ("meid %2,%0" \
728 : "=g" (__xx.__ll) \
729 : "%0" ((USItype)(u)), \
730 "g" ((USItype)(v))); \
731 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
732#define __umulsidi3(u, v) \
733 ({UDItype __w; \
734 __asm__ ("meid %2,%0" \
735 : "=g" (__w) \
736 : "%0" ((USItype)(u)), \
737 "g" ((USItype)(v))); \
738 __w; })
739#define udiv_qrnnd(q, r, n1, n0, d) \
740 ({union {UDItype __ll; \
741 struct {USItype __l, __h; } __i; \
742 } __xx; \
743 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
744 __asm__ ("deid %2,%0" \
745 : "=g" (__xx.__ll) \
746 : "0" (__xx.__ll), \
747 "g" ((USItype)(d))); \
748 (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300749#endif /* __ns32000__ */
750
751/***************************************
752 ************** PPC ******************
753 ***************************************/
754#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
755#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
756do { \
757 if (__builtin_constant_p(bh) && (bh) == 0) \
758 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
759 : "=r" ((USItype)(sh)), \
760 "=&r" ((USItype)(sl)) \
761 : "%r" ((USItype)(ah)), \
762 "%r" ((USItype)(al)), \
763 "rI" ((USItype)(bl))); \
764 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
765 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
766 : "=r" ((USItype)(sh)), \
767 "=&r" ((USItype)(sl)) \
768 : "%r" ((USItype)(ah)), \
769 "%r" ((USItype)(al)), \
770 "rI" ((USItype)(bl))); \
771 else \
772 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
773 : "=r" ((USItype)(sh)), \
774 "=&r" ((USItype)(sl)) \
775 : "%r" ((USItype)(ah)), \
776 "r" ((USItype)(bh)), \
777 "%r" ((USItype)(al)), \
778 "rI" ((USItype)(bl))); \
779} while (0)
780#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
781do { \
782 if (__builtin_constant_p(ah) && (ah) == 0) \
783 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
784 : "=r" ((USItype)(sh)), \
785 "=&r" ((USItype)(sl)) \
786 : "r" ((USItype)(bh)), \
787 "rI" ((USItype)(al)), \
788 "r" ((USItype)(bl))); \
789 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
790 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
791 : "=r" ((USItype)(sh)), \
792 "=&r" ((USItype)(sl)) \
793 : "r" ((USItype)(bh)), \
794 "rI" ((USItype)(al)), \
795 "r" ((USItype)(bl))); \
796 else if (__builtin_constant_p(bh) && (bh) == 0) \
797 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
798 : "=r" ((USItype)(sh)), \
799 "=&r" ((USItype)(sl)) \
800 : "r" ((USItype)(ah)), \
801 "rI" ((USItype)(al)), \
802 "r" ((USItype)(bl))); \
803 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
804 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
805 : "=r" ((USItype)(sh)), \
806 "=&r" ((USItype)(sl)) \
807 : "r" ((USItype)(ah)), \
808 "rI" ((USItype)(al)), \
809 "r" ((USItype)(bl))); \
810 else \
811 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
812 : "=r" ((USItype)(sh)), \
813 "=&r" ((USItype)(sl)) \
814 : "r" ((USItype)(ah)), \
815 "r" ((USItype)(bh)), \
816 "rI" ((USItype)(al)), \
817 "r" ((USItype)(bl))); \
818} while (0)
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300819#if defined(_ARCH_PPC)
820#define umul_ppmm(ph, pl, m0, m1) \
821do { \
822 USItype __m0 = (m0), __m1 = (m1); \
823 __asm__ ("mulhwu %0,%1,%2" \
824 : "=r" ((USItype) ph) \
825 : "%r" (__m0), \
826 "r" (__m1)); \
827 (pl) = __m0 * __m1; \
828} while (0)
829#define UMUL_TIME 15
830#define smul_ppmm(ph, pl, m0, m1) \
831do { \
832 SItype __m0 = (m0), __m1 = (m1); \
833 __asm__ ("mulhw %0,%1,%2" \
834 : "=r" ((SItype) ph) \
835 : "%r" (__m0), \
836 "r" (__m1)); \
837 (pl) = __m0 * __m1; \
838} while (0)
839#define SMUL_TIME 14
840#define UDIV_TIME 120
841#else
842#define umul_ppmm(xh, xl, m0, m1) \
843do { \
844 USItype __m0 = (m0), __m1 = (m1); \
845 __asm__ ("mul %0,%2,%3" \
846 : "=r" ((USItype)(xh)), \
847 "=q" ((USItype)(xl)) \
848 : "r" (__m0), \
849 "r" (__m1)); \
850 (xh) += ((((SItype) __m0 >> 31) & __m1) \
851 + (((SItype) __m1 >> 31) & __m0)); \
852} while (0)
853#define UMUL_TIME 8
854#define smul_ppmm(xh, xl, m0, m1) \
855 __asm__ ("mul %0,%2,%3" \
856 : "=r" ((SItype)(xh)), \
857 "=q" ((SItype)(xl)) \
858 : "r" (m0), \
859 "r" (m1))
860#define SMUL_TIME 4
861#define sdiv_qrnnd(q, r, nh, nl, d) \
862 __asm__ ("div %0,%2,%4" \
863 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
864 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
865#define UDIV_TIME 100
866#endif
867#endif /* Power architecture variants. */
868
869/***************************************
870 ************** PYR ******************
871 ***************************************/
872#if defined(__pyr__) && W_TYPE_SIZE == 32
873#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
874 __asm__ ("addw %5,%1\n" \
875 "addwc %3,%0" \
876 : "=r" ((USItype)(sh)), \
877 "=&r" ((USItype)(sl)) \
878 : "%0" ((USItype)(ah)), \
879 "g" ((USItype)(bh)), \
880 "%1" ((USItype)(al)), \
881 "g" ((USItype)(bl)))
882#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
883 __asm__ ("subw %5,%1\n" \
884 "subwb %3,%0" \
885 : "=r" ((USItype)(sh)), \
886 "=&r" ((USItype)(sl)) \
887 : "0" ((USItype)(ah)), \
888 "g" ((USItype)(bh)), \
889 "1" ((USItype)(al)), \
890 "g" ((USItype)(bl)))
891 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
892#define umul_ppmm(w1, w0, u, v) \
893 ({union {UDItype __ll; \
894 struct {USItype __h, __l; } __i; \
895 } __xx; \
896 __asm__ ("movw %1,%R0\n" \
897 "uemul %2,%0" \
898 : "=&r" (__xx.__ll) \
899 : "g" ((USItype) (u)), \
900 "g" ((USItype)(v))); \
901 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
902#endif /* __pyr__ */
903
904/***************************************
905 ************** RT/ROMP **************
906 ***************************************/
907#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
908#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
909 __asm__ ("a %1,%5\n" \
910 "ae %0,%3" \
911 : "=r" ((USItype)(sh)), \
912 "=&r" ((USItype)(sl)) \
913 : "%0" ((USItype)(ah)), \
914 "r" ((USItype)(bh)), \
915 "%1" ((USItype)(al)), \
916 "r" ((USItype)(bl)))
917#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
918 __asm__ ("s %1,%5\n" \
919 "se %0,%3" \
920 : "=r" ((USItype)(sh)), \
921 "=&r" ((USItype)(sl)) \
922 : "0" ((USItype)(ah)), \
923 "r" ((USItype)(bh)), \
924 "1" ((USItype)(al)), \
925 "r" ((USItype)(bl)))
926#define umul_ppmm(ph, pl, m0, m1) \
927do { \
928 USItype __m0 = (m0), __m1 = (m1); \
929 __asm__ ( \
930 "s r2,r2\n" \
931 "mts r10,%2\n" \
932 "m r2,%3\n" \
933 "m r2,%3\n" \
934 "m r2,%3\n" \
935 "m r2,%3\n" \
936 "m r2,%3\n" \
937 "m r2,%3\n" \
938 "m r2,%3\n" \
939 "m r2,%3\n" \
940 "m r2,%3\n" \
941 "m r2,%3\n" \
942 "m r2,%3\n" \
943 "m r2,%3\n" \
944 "m r2,%3\n" \
945 "m r2,%3\n" \
946 "m r2,%3\n" \
947 "m r2,%3\n" \
948 "cas %0,r2,r0\n" \
949 "mfs r10,%1" \
950 : "=r" ((USItype)(ph)), \
951 "=r" ((USItype)(pl)) \
952 : "%r" (__m0), \
953 "r" (__m1) \
954 : "r2"); \
955 (ph) += ((((SItype) __m0 >> 31) & __m1) \
956 + (((SItype) __m1 >> 31) & __m0)); \
957} while (0)
958#define UMUL_TIME 20
959#define UDIV_TIME 200
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +0300960#endif /* RT/ROMP */
961
962/***************************************
963 ************** SH2 ******************
964 ***************************************/
965#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
966 && W_TYPE_SIZE == 32
967#define umul_ppmm(w1, w0, u, v) \
968 __asm__ ( \
969 "dmulu.l %2,%3\n" \
970 "sts macl,%1\n" \
971 "sts mach,%0" \
972 : "=r" ((USItype)(w1)), \
973 "=r" ((USItype)(w0)) \
974 : "r" ((USItype)(u)), \
975 "r" ((USItype)(v)) \
976 : "macl", "mach")
977#define UMUL_TIME 5
978#endif
979
980/***************************************
981 ************** SPARC ****************
982 ***************************************/
983#if defined(__sparc__) && W_TYPE_SIZE == 32
984#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
985 __asm__ ("addcc %r4,%5,%1\n" \
986 "addx %r2,%3,%0" \
987 : "=r" ((USItype)(sh)), \
988 "=&r" ((USItype)(sl)) \
989 : "%rJ" ((USItype)(ah)), \
990 "rI" ((USItype)(bh)), \
991 "%rJ" ((USItype)(al)), \
992 "rI" ((USItype)(bl)) \
993 __CLOBBER_CC)
994#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
995 __asm__ ("subcc %r4,%5,%1\n" \
996 "subx %r2,%3,%0" \
997 : "=r" ((USItype)(sh)), \
998 "=&r" ((USItype)(sl)) \
999 : "rJ" ((USItype)(ah)), \
1000 "rI" ((USItype)(bh)), \
1001 "rJ" ((USItype)(al)), \
1002 "rI" ((USItype)(bl)) \
1003 __CLOBBER_CC)
1004#if defined(__sparc_v8__)
1005/* Don't match immediate range because, 1) it is not often useful,
1006 2) the 'I' flag thinks of the range as a 13 bit signed interval,
1007 while we want to match a 13 bit interval, sign extended to 32 bits,
1008 but INTERPRETED AS UNSIGNED. */
1009#define umul_ppmm(w1, w0, u, v) \
1010 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1011 : "=r" ((USItype)(w1)), \
1012 "=r" ((USItype)(w0)) \
1013 : "r" ((USItype)(u)), \
1014 "r" ((USItype)(v)))
1015#define UMUL_TIME 5
1016#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
1017#define udiv_qrnnd(q, r, n1, n0, d) \
1018do { \
1019 USItype __q; \
1020 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1021 : "=r" ((USItype)(__q)) \
1022 : "r" ((USItype)(n1)), \
1023 "r" ((USItype)(n0)), \
1024 "r" ((USItype)(d))); \
1025 (r) = (n0) - __q * (d); \
1026 (q) = __q; \
1027} while (0)
1028#define UDIV_TIME 25
1029#endif /* SUPERSPARC */
1030#else /* ! __sparc_v8__ */
1031#if defined(__sparclite__)
1032/* This has hardware multiply but not divide. It also has two additional
1033 instructions scan (ffs from high bit) and divscc. */
1034#define umul_ppmm(w1, w0, u, v) \
1035 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1036 : "=r" ((USItype)(w1)), \
1037 "=r" ((USItype)(w0)) \
1038 : "r" ((USItype)(u)), \
1039 "r" ((USItype)(v)))
1040#define UMUL_TIME 5
1041#define udiv_qrnnd(q, r, n1, n0, d) \
1042 __asm__ ("! Inlined udiv_qrnnd\n" \
1043 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1044 "tst %%g0\n" \
1045 "divscc %3,%4,%%g1\n" \
1046 "divscc %%g1,%4,%%g1\n" \
1047 "divscc %%g1,%4,%%g1\n" \
1048 "divscc %%g1,%4,%%g1\n" \
1049 "divscc %%g1,%4,%%g1\n" \
1050 "divscc %%g1,%4,%%g1\n" \
1051 "divscc %%g1,%4,%%g1\n" \
1052 "divscc %%g1,%4,%%g1\n" \
1053 "divscc %%g1,%4,%%g1\n" \
1054 "divscc %%g1,%4,%%g1\n" \
1055 "divscc %%g1,%4,%%g1\n" \
1056 "divscc %%g1,%4,%%g1\n" \
1057 "divscc %%g1,%4,%%g1\n" \
1058 "divscc %%g1,%4,%%g1\n" \
1059 "divscc %%g1,%4,%%g1\n" \
1060 "divscc %%g1,%4,%%g1\n" \
1061 "divscc %%g1,%4,%%g1\n" \
1062 "divscc %%g1,%4,%%g1\n" \
1063 "divscc %%g1,%4,%%g1\n" \
1064 "divscc %%g1,%4,%%g1\n" \
1065 "divscc %%g1,%4,%%g1\n" \
1066 "divscc %%g1,%4,%%g1\n" \
1067 "divscc %%g1,%4,%%g1\n" \
1068 "divscc %%g1,%4,%%g1\n" \
1069 "divscc %%g1,%4,%%g1\n" \
1070 "divscc %%g1,%4,%%g1\n" \
1071 "divscc %%g1,%4,%%g1\n" \
1072 "divscc %%g1,%4,%%g1\n" \
1073 "divscc %%g1,%4,%%g1\n" \
1074 "divscc %%g1,%4,%%g1\n" \
1075 "divscc %%g1,%4,%%g1\n" \
1076 "divscc %%g1,%4,%0\n" \
1077 "rd %%y,%1\n" \
1078 "bl,a 1f\n" \
1079 "add %1,%4,%1\n" \
1080 "1: ! End of inline udiv_qrnnd" \
1081 : "=r" ((USItype)(q)), \
1082 "=r" ((USItype)(r)) \
1083 : "r" ((USItype)(n1)), \
1084 "r" ((USItype)(n0)), \
1085 "rI" ((USItype)(d)) \
1086 : "%g1" __AND_CLOBBER_CC)
1087#define UDIV_TIME 37
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +03001088#endif /* __sparclite__ */
1089#endif /* __sparc_v8__ */
1090 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1091#ifndef umul_ppmm
1092#define umul_ppmm(w1, w0, u, v) \
1093 __asm__ ("! Inlined umul_ppmm\n" \
1094 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
1095 "sra %3,31,%%g2 ! Don't move this insn\n" \
1096 "and %2,%%g2,%%g2 ! Don't move this insn\n" \
1097 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1098 "mulscc %%g1,%3,%%g1\n" \
1099 "mulscc %%g1,%3,%%g1\n" \
1100 "mulscc %%g1,%3,%%g1\n" \
1101 "mulscc %%g1,%3,%%g1\n" \
1102 "mulscc %%g1,%3,%%g1\n" \
1103 "mulscc %%g1,%3,%%g1\n" \
1104 "mulscc %%g1,%3,%%g1\n" \
1105 "mulscc %%g1,%3,%%g1\n" \
1106 "mulscc %%g1,%3,%%g1\n" \
1107 "mulscc %%g1,%3,%%g1\n" \
1108 "mulscc %%g1,%3,%%g1\n" \
1109 "mulscc %%g1,%3,%%g1\n" \
1110 "mulscc %%g1,%3,%%g1\n" \
1111 "mulscc %%g1,%3,%%g1\n" \
1112 "mulscc %%g1,%3,%%g1\n" \
1113 "mulscc %%g1,%3,%%g1\n" \
1114 "mulscc %%g1,%3,%%g1\n" \
1115 "mulscc %%g1,%3,%%g1\n" \
1116 "mulscc %%g1,%3,%%g1\n" \
1117 "mulscc %%g1,%3,%%g1\n" \
1118 "mulscc %%g1,%3,%%g1\n" \
1119 "mulscc %%g1,%3,%%g1\n" \
1120 "mulscc %%g1,%3,%%g1\n" \
1121 "mulscc %%g1,%3,%%g1\n" \
1122 "mulscc %%g1,%3,%%g1\n" \
1123 "mulscc %%g1,%3,%%g1\n" \
1124 "mulscc %%g1,%3,%%g1\n" \
1125 "mulscc %%g1,%3,%%g1\n" \
1126 "mulscc %%g1,%3,%%g1\n" \
1127 "mulscc %%g1,%3,%%g1\n" \
1128 "mulscc %%g1,%3,%%g1\n" \
1129 "mulscc %%g1,%3,%%g1\n" \
1130 "mulscc %%g1,0,%%g1\n" \
1131 "add %%g1,%%g2,%0\n" \
1132 "rd %%y,%1" \
1133 : "=r" ((USItype)(w1)), \
1134 "=r" ((USItype)(w0)) \
1135 : "%rI" ((USItype)(u)), \
1136 "r" ((USItype)(v)) \
1137 : "%g1", "%g2" __AND_CLOBBER_CC)
1138#define UMUL_TIME 39 /* 39 instructions */
David Millera99e7e52012-02-02 00:17:55 +02001139/* It's quite necessary to add this much assembler for the sparc.
1140 The default udiv_qrnnd (in C) is more than 10 times slower! */
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +03001141#define udiv_qrnnd(q, r, n1, n0, d) \
David Millera99e7e52012-02-02 00:17:55 +02001142 __asm__ ("! Inlined udiv_qrnnd\n\t" \
1143 "mov 32,%%g1\n\t" \
1144 "subcc %1,%2,%%g0\n\t" \
1145 "1: bcs 5f\n\t" \
1146 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1147 "sub %1,%2,%1 ! this kills msb of n\n\t" \
1148 "addx %1,%1,%1 ! so this can't give carry\n\t" \
1149 "subcc %%g1,1,%%g1\n\t" \
1150 "2: bne 1b\n\t" \
1151 "subcc %1,%2,%%g0\n\t" \
1152 "bcs 3f\n\t" \
1153 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1154 "b 3f\n\t" \
1155 "sub %1,%2,%1 ! this kills msb of n\n\t" \
1156 "4: sub %1,%2,%1\n\t" \
1157 "5: addxcc %1,%1,%1\n\t" \
1158 "bcc 2b\n\t" \
1159 "subcc %%g1,1,%%g1\n\t" \
1160 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
1161 "bne 4b\n\t" \
1162 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
1163 "sub %1,%2,%1\n\t" \
1164 "3: xnor %0,0,%0\n\t" \
1165 "! End of inline udiv_qrnnd\n" \
1166 : "=&r" ((USItype)(q)), \
1167 "=&r" ((USItype)(r)) \
1168 : "r" ((USItype)(d)), \
1169 "1" ((USItype)(n1)), \
1170 "0" ((USItype)(n0)) : "%g1", "cc")
1171#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1172#endif
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +03001173#endif /* __sparc__ */
1174
1175/***************************************
1176 ************** VAX ******************
1177 ***************************************/
1178#if defined(__vax__) && W_TYPE_SIZE == 32
1179#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1180 __asm__ ("addl2 %5,%1\n" \
1181 "adwc %3,%0" \
1182 : "=g" ((USItype)(sh)), \
1183 "=&g" ((USItype)(sl)) \
1184 : "%0" ((USItype)(ah)), \
1185 "g" ((USItype)(bh)), \
1186 "%1" ((USItype)(al)), \
1187 "g" ((USItype)(bl)))
1188#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1189 __asm__ ("subl2 %5,%1\n" \
1190 "sbwc %3,%0" \
1191 : "=g" ((USItype)(sh)), \
1192 "=&g" ((USItype)(sl)) \
1193 : "0" ((USItype)(ah)), \
1194 "g" ((USItype)(bh)), \
1195 "1" ((USItype)(al)), \
1196 "g" ((USItype)(bl)))
1197#define umul_ppmm(xh, xl, m0, m1) \
1198do { \
1199 union {UDItype __ll; \
1200 struct {USItype __l, __h; } __i; \
1201 } __xx; \
1202 USItype __m0 = (m0), __m1 = (m1); \
1203 __asm__ ("emul %1,%2,$0,%0" \
1204 : "=g" (__xx.__ll) \
1205 : "g" (__m0), \
1206 "g" (__m1)); \
1207 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1208 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1209 + (((SItype) __m1 >> 31) & __m0)); \
1210} while (0)
1211#define sdiv_qrnnd(q, r, n1, n0, d) \
1212do { \
1213 union {DItype __ll; \
1214 struct {SItype __l, __h; } __i; \
1215 } __xx; \
1216 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1217 __asm__ ("ediv %3,%2,%0,%1" \
1218 : "=g" (q), "=g" (r) \
1219 : "g" (__xx.__ll), "g" (d)); \
1220} while (0)
1221#endif /* __vax__ */
1222
1223/***************************************
1224 ************** Z8000 ****************
1225 ***************************************/
1226#if defined(__z8000__) && W_TYPE_SIZE == 16
1227#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1228 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1229 : "=r" ((unsigned int)(sh)), \
1230 "=&r" ((unsigned int)(sl)) \
1231 : "%0" ((unsigned int)(ah)), \
1232 "r" ((unsigned int)(bh)), \
1233 "%1" ((unsigned int)(al)), \
1234 "rQR" ((unsigned int)(bl)))
1235#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1236 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1237 : "=r" ((unsigned int)(sh)), \
1238 "=&r" ((unsigned int)(sl)) \
1239 : "0" ((unsigned int)(ah)), \
1240 "r" ((unsigned int)(bh)), \
1241 "1" ((unsigned int)(al)), \
1242 "rQR" ((unsigned int)(bl)))
1243#define umul_ppmm(xh, xl, m0, m1) \
1244do { \
1245 union {long int __ll; \
1246 struct {unsigned int __h, __l; } __i; \
1247 } __xx; \
1248 unsigned int __m0 = (m0), __m1 = (m1); \
1249 __asm__ ("mult %S0,%H3" \
1250 : "=r" (__xx.__i.__h), \
1251 "=r" (__xx.__i.__l) \
1252 : "%1" (__m0), \
1253 "rQR" (__m1)); \
1254 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1255 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1256 + (((signed int) __m1 >> 15) & __m0)); \
1257} while (0)
1258#endif /* __z8000__ */
1259
1260#endif /* __GNUC__ */
1261
1262/***************************************
1263 *********** Generic Versions ********
1264 ***************************************/
1265#if !defined(umul_ppmm) && defined(__umulsidi3)
1266#define umul_ppmm(ph, pl, m0, m1) \
1267{ \
1268 UDWtype __ll = __umulsidi3(m0, m1); \
1269 ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1270 pl = (UWtype) __ll; \
1271}
1272#endif
1273
1274#if !defined(__umulsidi3)
1275#define __umulsidi3(u, v) \
1276 ({UWtype __hi, __lo; \
1277 umul_ppmm(__hi, __lo, u, v); \
1278 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1279#endif
1280
1281 /* If this machine has no inline assembler, use C macros. */
1282
1283#if !defined(add_ssaaaa)
1284#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1285do { \
1286 UWtype __x; \
1287 __x = (al) + (bl); \
1288 (sh) = (ah) + (bh) + (__x < (al)); \
1289 (sl) = __x; \
1290} while (0)
1291#endif
1292
1293#if !defined(sub_ddmmss)
1294#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1295do { \
1296 UWtype __x; \
1297 __x = (al) - (bl); \
1298 (sh) = (ah) - (bh) - (__x > (al)); \
1299 (sl) = __x; \
1300} while (0)
1301#endif
1302
1303#if !defined(umul_ppmm)
1304#define umul_ppmm(w1, w0, u, v) \
1305do { \
1306 UWtype __x0, __x1, __x2, __x3; \
1307 UHWtype __ul, __vl, __uh, __vh; \
1308 UWtype __u = (u), __v = (v); \
1309 \
1310 __ul = __ll_lowpart(__u); \
1311 __uh = __ll_highpart(__u); \
1312 __vl = __ll_lowpart(__v); \
1313 __vh = __ll_highpart(__v); \
1314 \
1315 __x0 = (UWtype) __ul * __vl; \
1316 __x1 = (UWtype) __ul * __vh; \
1317 __x2 = (UWtype) __uh * __vl; \
1318 __x3 = (UWtype) __uh * __vh; \
1319 \
1320 __x1 += __ll_highpart(__x0);/* this can't give carry */ \
1321 __x1 += __x2; /* but this indeed can */ \
1322 if (__x1 < __x2) /* did we get it? */ \
1323 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1324 \
1325 (w1) = __x3 + __ll_highpart(__x1); \
1326 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
1327} while (0)
1328#endif
1329
1330#if !defined(umul_ppmm)
1331#define smul_ppmm(w1, w0, u, v) \
1332do { \
1333 UWtype __w1; \
1334 UWtype __m0 = (u), __m1 = (v); \
1335 umul_ppmm(__w1, w0, __m0, __m1); \
1336 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
1337 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
1338} while (0)
1339#endif
1340
1341 /* Define this unconditionally, so it can be used for debugging. */
1342#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1343do { \
1344 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1345 __d1 = __ll_highpart(d); \
1346 __d0 = __ll_lowpart(d); \
1347 \
1348 __r1 = (n1) % __d1; \
1349 __q1 = (n1) / __d1; \
1350 __m = (UWtype) __q1 * __d0; \
1351 __r1 = __r1 * __ll_B | __ll_highpart(n0); \
1352 if (__r1 < __m) { \
1353 __q1--, __r1 += (d); \
1354 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
1355 if (__r1 < __m) \
1356 __q1--, __r1 += (d); \
1357 } \
1358 __r1 -= __m; \
1359 \
1360 __r0 = __r1 % __d1; \
1361 __q0 = __r1 / __d1; \
1362 __m = (UWtype) __q0 * __d0; \
1363 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
1364 if (__r0 < __m) { \
1365 __q0--, __r0 += (d); \
1366 if (__r0 >= (d)) \
1367 if (__r0 < __m) \
1368 __q0--, __r0 += (d); \
1369 } \
1370 __r0 -= __m; \
1371 \
1372 (q) = (UWtype) __q1 * __ll_B | __q0; \
1373 (r) = __r0; \
1374} while (0)
1375
1376/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1377 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1378#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
1379#define udiv_qrnnd(q, r, nh, nl, d) \
1380do { \
1381 UWtype __r; \
1382 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1383 (r) = __r; \
1384} while (0)
1385#endif
1386
1387 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1388#if !defined(udiv_qrnnd)
1389#define UDIV_NEEDS_NORMALIZATION 1
1390#define udiv_qrnnd __udiv_qrnnd_c
1391#endif
1392
Dmitry Kasatkin5ce3e312011-08-31 14:05:16 +03001393#ifndef UDIV_NEEDS_NORMALIZATION
1394#define UDIV_NEEDS_NORMALIZATION 0
1395#endif