blob: 029f9bbfe7b5b93145280f8ad6e06669d1c420e3 [file] [log] [blame]
Thomas Gleixner660662f2019-05-24 12:04:10 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * Linux/PA-RISC Project (http://www.parisc-linux.org/)
4 *
5 * Floating-point emulation code
6 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 */
8/*
9 * BEGIN_DESC
10 *
11 * File:
12 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
13 *
14 * Purpose:
15 * Double Floating-point Multiply Fused Add
16 * Double Floating-point Multiply Negate Fused Add
17 * Single Floating-point Multiply Fused Add
18 * Single Floating-point Multiply Negate Fused Add
19 *
20 * External Interfaces:
21 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
22 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
23 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
24 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
25 *
26 * Internal Interfaces:
27 *
28 * Theory:
29 * <<please update with a overview of the operation of this file>>
30 *
31 * END_DESC
32*/
33
34
35#include "float.h"
36#include "sgl_float.h"
37#include "dbl_float.h"
38
39
40/*
41 * Double Floating-point Multiply Fused Add
42 */
43
44int
45dbl_fmpyfadd(
46 dbl_floating_point *src1ptr,
47 dbl_floating_point *src2ptr,
48 dbl_floating_point *src3ptr,
49 unsigned int *status,
50 dbl_floating_point *dstptr)
51{
52 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
53 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
54 unsigned int rightp1, rightp2, rightp3, rightp4;
55 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
56 register int mpy_exponent, add_exponent, count;
57 boolean inexact = FALSE, is_tiny = FALSE;
58
59 unsigned int signlessleft1, signlessright1, save;
60 register int result_exponent, diff_exponent;
61 int sign_save, jumpsize;
62
63 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
64 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
65 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
66
67 /*
68 * set sign bit of result of multiply
69 */
70 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
71 Dbl_setnegativezerop1(resultp1);
72 else Dbl_setzerop1(resultp1);
73
74 /*
75 * Generate multiply exponent
76 */
77 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
78
79 /*
80 * check first operand for NaN's or infinity
81 */
82 if (Dbl_isinfinity_exponent(opnd1p1)) {
83 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
84 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
85 Dbl_isnotnan(opnd3p1,opnd3p2)) {
86 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
87 /*
88 * invalid since operands are infinity
89 * and zero
90 */
91 if (Is_invalidtrap_enabled())
92 return(OPC_2E_INVALIDEXCEPTION);
93 Set_invalidflag();
94 Dbl_makequietnan(resultp1,resultp2);
95 Dbl_copytoptr(resultp1,resultp2,dstptr);
96 return(NOEXCEPTION);
97 }
98 /*
99 * Check third operand for infinity with a
100 * sign opposite of the multiply result
101 */
102 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
103 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
104 /*
105 * invalid since attempting a magnitude
106 * subtraction of infinities
107 */
108 if (Is_invalidtrap_enabled())
109 return(OPC_2E_INVALIDEXCEPTION);
110 Set_invalidflag();
111 Dbl_makequietnan(resultp1,resultp2);
112 Dbl_copytoptr(resultp1,resultp2,dstptr);
113 return(NOEXCEPTION);
114 }
115
116 /*
117 * return infinity
118 */
119 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
120 Dbl_copytoptr(resultp1,resultp2,dstptr);
121 return(NOEXCEPTION);
122 }
123 }
124 else {
125 /*
126 * is NaN; signaling or quiet?
127 */
128 if (Dbl_isone_signaling(opnd1p1)) {
129 /* trap if INVALIDTRAP enabled */
130 if (Is_invalidtrap_enabled())
131 return(OPC_2E_INVALIDEXCEPTION);
132 /* make NaN quiet */
133 Set_invalidflag();
134 Dbl_set_quiet(opnd1p1);
135 }
136 /*
137 * is second operand a signaling NaN?
138 */
139 else if (Dbl_is_signalingnan(opnd2p1)) {
140 /* trap if INVALIDTRAP enabled */
141 if (Is_invalidtrap_enabled())
142 return(OPC_2E_INVALIDEXCEPTION);
143 /* make NaN quiet */
144 Set_invalidflag();
145 Dbl_set_quiet(opnd2p1);
146 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
147 return(NOEXCEPTION);
148 }
149 /*
150 * is third operand a signaling NaN?
151 */
152 else if (Dbl_is_signalingnan(opnd3p1)) {
153 /* trap if INVALIDTRAP enabled */
154 if (Is_invalidtrap_enabled())
155 return(OPC_2E_INVALIDEXCEPTION);
156 /* make NaN quiet */
157 Set_invalidflag();
158 Dbl_set_quiet(opnd3p1);
159 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
160 return(NOEXCEPTION);
161 }
162 /*
163 * return quiet NaN
164 */
165 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
166 return(NOEXCEPTION);
167 }
168 }
169
170 /*
171 * check second operand for NaN's or infinity
172 */
173 if (Dbl_isinfinity_exponent(opnd2p1)) {
174 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
175 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
176 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
177 /*
178 * invalid since multiply operands are
179 * zero & infinity
180 */
181 if (Is_invalidtrap_enabled())
182 return(OPC_2E_INVALIDEXCEPTION);
183 Set_invalidflag();
184 Dbl_makequietnan(opnd2p1,opnd2p2);
185 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
186 return(NOEXCEPTION);
187 }
188
189 /*
190 * Check third operand for infinity with a
191 * sign opposite of the multiply result
192 */
193 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
194 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
195 /*
196 * invalid since attempting a magnitude
197 * subtraction of infinities
198 */
199 if (Is_invalidtrap_enabled())
200 return(OPC_2E_INVALIDEXCEPTION);
201 Set_invalidflag();
202 Dbl_makequietnan(resultp1,resultp2);
203 Dbl_copytoptr(resultp1,resultp2,dstptr);
204 return(NOEXCEPTION);
205 }
206
207 /*
208 * return infinity
209 */
210 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
211 Dbl_copytoptr(resultp1,resultp2,dstptr);
212 return(NOEXCEPTION);
213 }
214 }
215 else {
216 /*
217 * is NaN; signaling or quiet?
218 */
219 if (Dbl_isone_signaling(opnd2p1)) {
220 /* trap if INVALIDTRAP enabled */
221 if (Is_invalidtrap_enabled())
222 return(OPC_2E_INVALIDEXCEPTION);
223 /* make NaN quiet */
224 Set_invalidflag();
225 Dbl_set_quiet(opnd2p1);
226 }
227 /*
228 * is third operand a signaling NaN?
229 */
230 else if (Dbl_is_signalingnan(opnd3p1)) {
231 /* trap if INVALIDTRAP enabled */
232 if (Is_invalidtrap_enabled())
233 return(OPC_2E_INVALIDEXCEPTION);
234 /* make NaN quiet */
235 Set_invalidflag();
236 Dbl_set_quiet(opnd3p1);
237 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
238 return(NOEXCEPTION);
239 }
240 /*
241 * return quiet NaN
242 */
243 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
244 return(NOEXCEPTION);
245 }
246 }
247
248 /*
249 * check third operand for NaN's or infinity
250 */
251 if (Dbl_isinfinity_exponent(opnd3p1)) {
252 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
253 /* return infinity */
254 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
255 return(NOEXCEPTION);
256 } else {
257 /*
258 * is NaN; signaling or quiet?
259 */
260 if (Dbl_isone_signaling(opnd3p1)) {
261 /* trap if INVALIDTRAP enabled */
262 if (Is_invalidtrap_enabled())
263 return(OPC_2E_INVALIDEXCEPTION);
264 /* make NaN quiet */
265 Set_invalidflag();
266 Dbl_set_quiet(opnd3p1);
267 }
268 /*
269 * return quiet NaN
270 */
271 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
272 return(NOEXCEPTION);
273 }
274 }
275
276 /*
277 * Generate multiply mantissa
278 */
279 if (Dbl_isnotzero_exponent(opnd1p1)) {
280 /* set hidden bit */
281 Dbl_clear_signexponent_set_hidden(opnd1p1);
282 }
283 else {
284 /* check for zero */
285 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
286 /*
287 * Perform the add opnd3 with zero here.
288 */
289 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
290 if (Is_rounding_mode(ROUNDMINUS)) {
291 Dbl_or_signs(opnd3p1,resultp1);
292 } else {
293 Dbl_and_signs(opnd3p1,resultp1);
294 }
295 }
296 /*
297 * Now let's check for trapped underflow case.
298 */
299 else if (Dbl_iszero_exponent(opnd3p1) &&
300 Is_underflowtrap_enabled()) {
301 /* need to normalize results mantissa */
302 sign_save = Dbl_signextendedsign(opnd3p1);
303 result_exponent = 0;
304 Dbl_leftshiftby1(opnd3p1,opnd3p2);
305 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
306 Dbl_set_sign(opnd3p1,/*using*/sign_save);
307 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
308 unfl);
309 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
310 /* inexact = FALSE */
311 return(OPC_2E_UNDERFLOWEXCEPTION);
312 }
313 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
314 return(NOEXCEPTION);
315 }
316 /* is denormalized, adjust exponent */
317 Dbl_clear_signexponent(opnd1p1);
318 Dbl_leftshiftby1(opnd1p1,opnd1p2);
319 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
320 }
321 /* opnd2 needs to have hidden bit set with msb in hidden bit */
322 if (Dbl_isnotzero_exponent(opnd2p1)) {
323 Dbl_clear_signexponent_set_hidden(opnd2p1);
324 }
325 else {
326 /* check for zero */
327 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
328 /*
329 * Perform the add opnd3 with zero here.
330 */
331 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
332 if (Is_rounding_mode(ROUNDMINUS)) {
333 Dbl_or_signs(opnd3p1,resultp1);
334 } else {
335 Dbl_and_signs(opnd3p1,resultp1);
336 }
337 }
338 /*
339 * Now let's check for trapped underflow case.
340 */
341 else if (Dbl_iszero_exponent(opnd3p1) &&
342 Is_underflowtrap_enabled()) {
343 /* need to normalize results mantissa */
344 sign_save = Dbl_signextendedsign(opnd3p1);
345 result_exponent = 0;
346 Dbl_leftshiftby1(opnd3p1,opnd3p2);
347 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
348 Dbl_set_sign(opnd3p1,/*using*/sign_save);
349 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
350 unfl);
351 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
352 /* inexact = FALSE */
353 return(OPC_2E_UNDERFLOWEXCEPTION);
354 }
355 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
356 return(NOEXCEPTION);
357 }
358 /* is denormalized; want to normalize */
359 Dbl_clear_signexponent(opnd2p1);
360 Dbl_leftshiftby1(opnd2p1,opnd2p2);
361 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
362 }
363
364 /* Multiply the first two source mantissas together */
365
366 /*
367 * The intermediate result will be kept in tmpres,
368 * which needs enough room for 106 bits of mantissa,
369 * so lets call it a Double extended.
370 */
371 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
372
373 /*
374 * Four bits at a time are inspected in each loop, and a
375 * simple shift and add multiply algorithm is used.
376 */
377 for (count = DBL_P-1; count >= 0; count -= 4) {
378 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
379 if (Dbit28p2(opnd1p2)) {
380 /* Fourword_add should be an ADD followed by 3 ADDC's */
381 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
382 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
383 }
384 if (Dbit29p2(opnd1p2)) {
385 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
386 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
387 }
388 if (Dbit30p2(opnd1p2)) {
389 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
390 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
391 }
392 if (Dbit31p2(opnd1p2)) {
393 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
394 opnd2p1, opnd2p2, 0, 0);
395 }
396 Dbl_rightshiftby4(opnd1p1,opnd1p2);
397 }
398 if (Is_dexthiddenoverflow(tmpresp1)) {
399 /* result mantissa >= 2 (mantissa overflow) */
400 mpy_exponent++;
401 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
402 }
403
404 /*
405 * Restore the sign of the mpy result which was saved in resultp1.
406 * The exponent will continue to be kept in mpy_exponent.
407 */
408 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
409
410 /*
411 * No rounding is required, since the result of the multiply
412 * is exact in the extended format.
413 */
414
415 /*
416 * Now we are ready to perform the add portion of the operation.
417 *
418 * The exponents need to be kept as integers for now, since the
419 * multiply result might not fit into the exponent field. We
420 * can't overflow or underflow because of this yet, since the
421 * add could bring the final result back into range.
422 */
423 add_exponent = Dbl_exponent(opnd3p1);
424
425 /*
426 * Check for denormalized or zero add operand.
427 */
428 if (add_exponent == 0) {
429 /* check for zero */
430 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
431 /* right is zero */
432 /* Left can't be zero and must be result.
433 *
434 * The final result is now in tmpres and mpy_exponent,
435 * and needs to be rounded and squeezed back into
436 * double precision format from double extended.
437 */
438 result_exponent = mpy_exponent;
439 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
440 resultp1,resultp2,resultp3,resultp4);
441 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
442 goto round;
443 }
444
445 /*
446 * Neither are zeroes.
447 * Adjust exponent and normalize add operand.
448 */
449 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
450 Dbl_clear_signexponent(opnd3p1);
451 Dbl_leftshiftby1(opnd3p1,opnd3p2);
452 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
453 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
454 } else {
455 Dbl_clear_exponent_set_hidden(opnd3p1);
456 }
457 /*
458 * Copy opnd3 to the double extended variable called right.
459 */
460 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
461
462 /*
463 * A zero "save" helps discover equal operands (for later),
464 * and is used in swapping operands (if needed).
465 */
466 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
467
468 /*
469 * Compare magnitude of operands.
470 */
471 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
472 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
473 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
474 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
475 /*
476 * Set the left operand to the larger one by XOR swap.
477 * First finish the first word "save".
478 */
479 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
480 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
481 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
482 rightp2,rightp3,rightp4);
483 /* also setup exponents used in rest of routine */
484 diff_exponent = add_exponent - mpy_exponent;
485 result_exponent = add_exponent;
486 } else {
487 /* also setup exponents used in rest of routine */
488 diff_exponent = mpy_exponent - add_exponent;
489 result_exponent = mpy_exponent;
490 }
491 /* Invariant: left is not smaller than right. */
492
493 /*
494 * Special case alignment of operands that would force alignment
495 * beyond the extent of the extension. A further optimization
496 * could special case this but only reduces the path length for
497 * this infrequent case.
498 */
499 if (diff_exponent > DBLEXT_THRESHOLD) {
500 diff_exponent = DBLEXT_THRESHOLD;
501 }
502
503 /* Align right operand by shifting it to the right */
504 Dblext_clear_sign(rightp1);
505 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
506 /*shifted by*/diff_exponent);
507
508 /* Treat sum and difference of the operands separately. */
509 if ((int)save < 0) {
510 /*
511 * Difference of the two operands. Overflow can occur if the
512 * multiply overflowed. A borrow can occur out of the hidden
513 * bit and force a post normalization phase.
514 */
515 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
516 rightp1,rightp2,rightp3,rightp4,
517 resultp1,resultp2,resultp3,resultp4);
518 sign_save = Dbl_signextendedsign(resultp1);
519 if (Dbl_iszero_hidden(resultp1)) {
520 /* Handle normalization */
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300521 /* A straightforward algorithm would now shift the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 * result and extension left until the hidden bit
523 * becomes one. Not all of the extension bits need
524 * participate in the shift. Only the two most
525 * significant bits (round and guard) are needed.
526 * If only a single shift is needed then the guard
527 * bit becomes a significant low order bit and the
528 * extension must participate in the rounding.
529 * If more than a single shift is needed, then all
530 * bits to the right of the guard bit are zeros,
531 * and the guard bit may or may not be zero. */
532 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
533 resultp4);
534
535 /* Need to check for a zero result. The sign and
536 * exponent fields have already been zeroed. The more
537 * efficient test of the full object can be used.
538 */
539 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
540 /* Must have been "x-x" or "x+(-x)". */
541 if (Is_rounding_mode(ROUNDMINUS))
542 Dbl_setone_sign(resultp1);
543 Dbl_copytoptr(resultp1,resultp2,dstptr);
544 return(NOEXCEPTION);
545 }
546 result_exponent--;
547
548 /* Look to see if normalization is finished. */
549 if (Dbl_isone_hidden(resultp1)) {
550 /* No further normalization is needed */
551 goto round;
552 }
553
554 /* Discover first one bit to determine shift amount.
555 * Use a modified binary search. We have already
556 * shifted the result one position right and still
557 * not found a one so the remainder of the extension
558 * must be zero and simplifies rounding. */
559 /* Scan bytes */
560 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
561 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
562 result_exponent -= 8;
563 }
564 /* Now narrow it down to the nibble */
565 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
566 /* The lower nibble contains the
567 * normalizing one */
568 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
569 result_exponent -= 4;
570 }
571 /* Select case where first bit is set (already
572 * normalized) otherwise select the proper shift. */
573 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
574 if (jumpsize <= 7) switch(jumpsize) {
575 case 1:
576 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
577 resultp4);
578 result_exponent -= 3;
579 break;
580 case 2:
581 case 3:
582 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
583 resultp4);
584 result_exponent -= 2;
585 break;
586 case 4:
587 case 5:
588 case 6:
589 case 7:
590 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
591 resultp4);
592 result_exponent -= 1;
593 break;
594 }
595 } /* end if (hidden...)... */
596 /* Fall through and round */
597 } /* end if (save < 0)... */
598 else {
599 /* Add magnitudes */
600 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
601 rightp1,rightp2,rightp3,rightp4,
602 /*to*/resultp1,resultp2,resultp3,resultp4);
603 sign_save = Dbl_signextendedsign(resultp1);
604 if (Dbl_isone_hiddenoverflow(resultp1)) {
605 /* Prenormalization required. */
606 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
607 resultp4);
608 result_exponent++;
609 } /* end if hiddenoverflow... */
610 } /* end else ...add magnitudes... */
611
612 /* Round the result. If the extension and lower two words are
613 * all zeros, then the result is exact. Otherwise round in the
614 * correct direction. Underflow is possible. If a postnormalization
615 * is necessary, then the mantissa is all zeros so no shift is needed.
616 */
617 round:
618 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
619 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
620 result_exponent,is_tiny);
621 }
622 Dbl_set_sign(resultp1,/*using*/sign_save);
623 if (Dblext_isnotzero_mantissap3(resultp3) ||
624 Dblext_isnotzero_mantissap4(resultp4)) {
625 inexact = TRUE;
626 switch(Rounding_mode()) {
627 case ROUNDNEAREST: /* The default. */
628 if (Dblext_isone_highp3(resultp3)) {
629 /* at least 1/2 ulp */
630 if (Dblext_isnotzero_low31p3(resultp3) ||
631 Dblext_isnotzero_mantissap4(resultp4) ||
632 Dblext_isone_lowp2(resultp2)) {
633 /* either exactly half way and odd or
634 * more than 1/2ulp */
635 Dbl_increment(resultp1,resultp2);
636 }
637 }
638 break;
639
640 case ROUNDPLUS:
641 if (Dbl_iszero_sign(resultp1)) {
642 /* Round up positive results */
643 Dbl_increment(resultp1,resultp2);
644 }
645 break;
646
647 case ROUNDMINUS:
648 if (Dbl_isone_sign(resultp1)) {
649 /* Round down negative results */
650 Dbl_increment(resultp1,resultp2);
651 }
652
653 case ROUNDZERO:;
654 /* truncate is simple */
655 } /* end switch... */
656 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
657 }
658 if (result_exponent >= DBL_INFINITY_EXPONENT) {
659 /* trap if OVERFLOWTRAP enabled */
660 if (Is_overflowtrap_enabled()) {
661 /*
662 * Adjust bias of result
663 */
664 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
665 Dbl_copytoptr(resultp1,resultp2,dstptr);
666 if (inexact)
667 if (Is_inexacttrap_enabled())
668 return (OPC_2E_OVERFLOWEXCEPTION |
669 OPC_2E_INEXACTEXCEPTION);
670 else Set_inexactflag();
671 return (OPC_2E_OVERFLOWEXCEPTION);
672 }
673 inexact = TRUE;
674 Set_overflowflag();
675 /* set result to infinity or largest number */
676 Dbl_setoverflow(resultp1,resultp2);
677
678 } else if (result_exponent <= 0) { /* underflow case */
679 if (Is_underflowtrap_enabled()) {
680 /*
681 * Adjust bias of result
682 */
683 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
684 Dbl_copytoptr(resultp1,resultp2,dstptr);
685 if (inexact)
686 if (Is_inexacttrap_enabled())
687 return (OPC_2E_UNDERFLOWEXCEPTION |
688 OPC_2E_INEXACTEXCEPTION);
689 else Set_inexactflag();
690 return(OPC_2E_UNDERFLOWEXCEPTION);
691 }
692 else if (inexact && is_tiny) Set_underflowflag();
693 }
694 else Dbl_set_exponent(resultp1,result_exponent);
695 Dbl_copytoptr(resultp1,resultp2,dstptr);
696 if (inexact)
697 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
698 else Set_inexactflag();
699 return(NOEXCEPTION);
700}
701
702/*
703 * Double Floating-point Multiply Negate Fused Add
704 */
705
706dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
707
708dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
709unsigned int *status;
710{
711 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
712 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
713 unsigned int rightp1, rightp2, rightp3, rightp4;
714 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
715 register int mpy_exponent, add_exponent, count;
716 boolean inexact = FALSE, is_tiny = FALSE;
717
718 unsigned int signlessleft1, signlessright1, save;
719 register int result_exponent, diff_exponent;
720 int sign_save, jumpsize;
721
722 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
723 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
724 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
725
726 /*
727 * set sign bit of result of multiply
728 */
729 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
730 Dbl_setzerop1(resultp1);
731 else
732 Dbl_setnegativezerop1(resultp1);
733
734 /*
735 * Generate multiply exponent
736 */
737 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
738
739 /*
740 * check first operand for NaN's or infinity
741 */
742 if (Dbl_isinfinity_exponent(opnd1p1)) {
743 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
744 if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
745 Dbl_isnotnan(opnd3p1,opnd3p2)) {
746 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
747 /*
748 * invalid since operands are infinity
749 * and zero
750 */
751 if (Is_invalidtrap_enabled())
752 return(OPC_2E_INVALIDEXCEPTION);
753 Set_invalidflag();
754 Dbl_makequietnan(resultp1,resultp2);
755 Dbl_copytoptr(resultp1,resultp2,dstptr);
756 return(NOEXCEPTION);
757 }
758 /*
759 * Check third operand for infinity with a
760 * sign opposite of the multiply result
761 */
762 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
763 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
764 /*
765 * invalid since attempting a magnitude
766 * subtraction of infinities
767 */
768 if (Is_invalidtrap_enabled())
769 return(OPC_2E_INVALIDEXCEPTION);
770 Set_invalidflag();
771 Dbl_makequietnan(resultp1,resultp2);
772 Dbl_copytoptr(resultp1,resultp2,dstptr);
773 return(NOEXCEPTION);
774 }
775
776 /*
777 * return infinity
778 */
779 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
780 Dbl_copytoptr(resultp1,resultp2,dstptr);
781 return(NOEXCEPTION);
782 }
783 }
784 else {
785 /*
786 * is NaN; signaling or quiet?
787 */
788 if (Dbl_isone_signaling(opnd1p1)) {
789 /* trap if INVALIDTRAP enabled */
790 if (Is_invalidtrap_enabled())
791 return(OPC_2E_INVALIDEXCEPTION);
792 /* make NaN quiet */
793 Set_invalidflag();
794 Dbl_set_quiet(opnd1p1);
795 }
796 /*
797 * is second operand a signaling NaN?
798 */
799 else if (Dbl_is_signalingnan(opnd2p1)) {
800 /* trap if INVALIDTRAP enabled */
801 if (Is_invalidtrap_enabled())
802 return(OPC_2E_INVALIDEXCEPTION);
803 /* make NaN quiet */
804 Set_invalidflag();
805 Dbl_set_quiet(opnd2p1);
806 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
807 return(NOEXCEPTION);
808 }
809 /*
810 * is third operand a signaling NaN?
811 */
812 else if (Dbl_is_signalingnan(opnd3p1)) {
813 /* trap if INVALIDTRAP enabled */
814 if (Is_invalidtrap_enabled())
815 return(OPC_2E_INVALIDEXCEPTION);
816 /* make NaN quiet */
817 Set_invalidflag();
818 Dbl_set_quiet(opnd3p1);
819 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
820 return(NOEXCEPTION);
821 }
822 /*
823 * return quiet NaN
824 */
825 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
826 return(NOEXCEPTION);
827 }
828 }
829
830 /*
831 * check second operand for NaN's or infinity
832 */
833 if (Dbl_isinfinity_exponent(opnd2p1)) {
834 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
835 if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
836 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
837 /*
838 * invalid since multiply operands are
839 * zero & infinity
840 */
841 if (Is_invalidtrap_enabled())
842 return(OPC_2E_INVALIDEXCEPTION);
843 Set_invalidflag();
844 Dbl_makequietnan(opnd2p1,opnd2p2);
845 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
846 return(NOEXCEPTION);
847 }
848
849 /*
850 * Check third operand for infinity with a
851 * sign opposite of the multiply result
852 */
853 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
854 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
855 /*
856 * invalid since attempting a magnitude
857 * subtraction of infinities
858 */
859 if (Is_invalidtrap_enabled())
860 return(OPC_2E_INVALIDEXCEPTION);
861 Set_invalidflag();
862 Dbl_makequietnan(resultp1,resultp2);
863 Dbl_copytoptr(resultp1,resultp2,dstptr);
864 return(NOEXCEPTION);
865 }
866
867 /*
868 * return infinity
869 */
870 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
871 Dbl_copytoptr(resultp1,resultp2,dstptr);
872 return(NOEXCEPTION);
873 }
874 }
875 else {
876 /*
877 * is NaN; signaling or quiet?
878 */
879 if (Dbl_isone_signaling(opnd2p1)) {
880 /* trap if INVALIDTRAP enabled */
881 if (Is_invalidtrap_enabled())
882 return(OPC_2E_INVALIDEXCEPTION);
883 /* make NaN quiet */
884 Set_invalidflag();
885 Dbl_set_quiet(opnd2p1);
886 }
887 /*
888 * is third operand a signaling NaN?
889 */
890 else if (Dbl_is_signalingnan(opnd3p1)) {
891 /* trap if INVALIDTRAP enabled */
892 if (Is_invalidtrap_enabled())
893 return(OPC_2E_INVALIDEXCEPTION);
894 /* make NaN quiet */
895 Set_invalidflag();
896 Dbl_set_quiet(opnd3p1);
897 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
898 return(NOEXCEPTION);
899 }
900 /*
901 * return quiet NaN
902 */
903 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
904 return(NOEXCEPTION);
905 }
906 }
907
908 /*
909 * check third operand for NaN's or infinity
910 */
911 if (Dbl_isinfinity_exponent(opnd3p1)) {
912 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
913 /* return infinity */
914 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
915 return(NOEXCEPTION);
916 } else {
917 /*
918 * is NaN; signaling or quiet?
919 */
920 if (Dbl_isone_signaling(opnd3p1)) {
921 /* trap if INVALIDTRAP enabled */
922 if (Is_invalidtrap_enabled())
923 return(OPC_2E_INVALIDEXCEPTION);
924 /* make NaN quiet */
925 Set_invalidflag();
926 Dbl_set_quiet(opnd3p1);
927 }
928 /*
929 * return quiet NaN
930 */
931 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
932 return(NOEXCEPTION);
933 }
934 }
935
936 /*
937 * Generate multiply mantissa
938 */
939 if (Dbl_isnotzero_exponent(opnd1p1)) {
940 /* set hidden bit */
941 Dbl_clear_signexponent_set_hidden(opnd1p1);
942 }
943 else {
944 /* check for zero */
945 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
946 /*
947 * Perform the add opnd3 with zero here.
948 */
949 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
950 if (Is_rounding_mode(ROUNDMINUS)) {
951 Dbl_or_signs(opnd3p1,resultp1);
952 } else {
953 Dbl_and_signs(opnd3p1,resultp1);
954 }
955 }
956 /*
957 * Now let's check for trapped underflow case.
958 */
959 else if (Dbl_iszero_exponent(opnd3p1) &&
960 Is_underflowtrap_enabled()) {
961 /* need to normalize results mantissa */
962 sign_save = Dbl_signextendedsign(opnd3p1);
963 result_exponent = 0;
964 Dbl_leftshiftby1(opnd3p1,opnd3p2);
965 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
966 Dbl_set_sign(opnd3p1,/*using*/sign_save);
967 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
968 unfl);
969 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
970 /* inexact = FALSE */
971 return(OPC_2E_UNDERFLOWEXCEPTION);
972 }
973 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
974 return(NOEXCEPTION);
975 }
976 /* is denormalized, adjust exponent */
977 Dbl_clear_signexponent(opnd1p1);
978 Dbl_leftshiftby1(opnd1p1,opnd1p2);
979 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
980 }
981 /* opnd2 needs to have hidden bit set with msb in hidden bit */
982 if (Dbl_isnotzero_exponent(opnd2p1)) {
983 Dbl_clear_signexponent_set_hidden(opnd2p1);
984 }
985 else {
986 /* check for zero */
987 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
988 /*
989 * Perform the add opnd3 with zero here.
990 */
991 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
992 if (Is_rounding_mode(ROUNDMINUS)) {
993 Dbl_or_signs(opnd3p1,resultp1);
994 } else {
995 Dbl_and_signs(opnd3p1,resultp1);
996 }
997 }
998 /*
999 * Now let's check for trapped underflow case.
1000 */
1001 else if (Dbl_iszero_exponent(opnd3p1) &&
1002 Is_underflowtrap_enabled()) {
1003 /* need to normalize results mantissa */
1004 sign_save = Dbl_signextendedsign(opnd3p1);
1005 result_exponent = 0;
1006 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1007 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1008 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1009 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1010 unfl);
1011 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1012 /* inexact = FALSE */
1013 return(OPC_2E_UNDERFLOWEXCEPTION);
1014 }
1015 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1016 return(NOEXCEPTION);
1017 }
1018 /* is denormalized; want to normalize */
1019 Dbl_clear_signexponent(opnd2p1);
1020 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1021 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1022 }
1023
1024 /* Multiply the first two source mantissas together */
1025
1026 /*
1027 * The intermediate result will be kept in tmpres,
1028 * which needs enough room for 106 bits of mantissa,
1029 * so lets call it a Double extended.
1030 */
1031 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1032
1033 /*
1034 * Four bits at a time are inspected in each loop, and a
1035 * simple shift and add multiply algorithm is used.
1036 */
1037 for (count = DBL_P-1; count >= 0; count -= 4) {
1038 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1039 if (Dbit28p2(opnd1p2)) {
1040 /* Fourword_add should be an ADD followed by 3 ADDC's */
1041 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1042 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1043 }
1044 if (Dbit29p2(opnd1p2)) {
1045 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1046 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1047 }
1048 if (Dbit30p2(opnd1p2)) {
1049 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1050 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1051 }
1052 if (Dbit31p2(opnd1p2)) {
1053 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1054 opnd2p1, opnd2p2, 0, 0);
1055 }
1056 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1057 }
1058 if (Is_dexthiddenoverflow(tmpresp1)) {
1059 /* result mantissa >= 2 (mantissa overflow) */
1060 mpy_exponent++;
1061 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1062 }
1063
1064 /*
1065 * Restore the sign of the mpy result which was saved in resultp1.
1066 * The exponent will continue to be kept in mpy_exponent.
1067 */
1068 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1069
1070 /*
1071 * No rounding is required, since the result of the multiply
1072 * is exact in the extended format.
1073 */
1074
1075 /*
1076 * Now we are ready to perform the add portion of the operation.
1077 *
1078 * The exponents need to be kept as integers for now, since the
1079 * multiply result might not fit into the exponent field. We
1080 * can't overflow or underflow because of this yet, since the
1081 * add could bring the final result back into range.
1082 */
1083 add_exponent = Dbl_exponent(opnd3p1);
1084
1085 /*
1086 * Check for denormalized or zero add operand.
1087 */
1088 if (add_exponent == 0) {
1089 /* check for zero */
1090 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1091 /* right is zero */
1092 /* Left can't be zero and must be result.
1093 *
1094 * The final result is now in tmpres and mpy_exponent,
1095 * and needs to be rounded and squeezed back into
1096 * double precision format from double extended.
1097 */
1098 result_exponent = mpy_exponent;
1099 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1100 resultp1,resultp2,resultp3,resultp4);
1101 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1102 goto round;
1103 }
1104
1105 /*
1106 * Neither are zeroes.
1107 * Adjust exponent and normalize add operand.
1108 */
1109 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
1110 Dbl_clear_signexponent(opnd3p1);
1111 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1112 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1113 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
1114 } else {
1115 Dbl_clear_exponent_set_hidden(opnd3p1);
1116 }
1117 /*
1118 * Copy opnd3 to the double extended variable called right.
1119 */
1120 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1121
1122 /*
1123 * A zero "save" helps discover equal operands (for later),
1124 * and is used in swapping operands (if needed).
1125 */
1126 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1127
1128 /*
1129 * Compare magnitude of operands.
1130 */
1131 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1132 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1133 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1134 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1135 /*
1136 * Set the left operand to the larger one by XOR swap.
1137 * First finish the first word "save".
1138 */
1139 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1140 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1141 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1142 rightp2,rightp3,rightp4);
1143 /* also setup exponents used in rest of routine */
1144 diff_exponent = add_exponent - mpy_exponent;
1145 result_exponent = add_exponent;
1146 } else {
1147 /* also setup exponents used in rest of routine */
1148 diff_exponent = mpy_exponent - add_exponent;
1149 result_exponent = mpy_exponent;
1150 }
1151 /* Invariant: left is not smaller than right. */
1152
1153 /*
1154 * Special case alignment of operands that would force alignment
1155 * beyond the extent of the extension. A further optimization
1156 * could special case this but only reduces the path length for
1157 * this infrequent case.
1158 */
1159 if (diff_exponent > DBLEXT_THRESHOLD) {
1160 diff_exponent = DBLEXT_THRESHOLD;
1161 }
1162
1163 /* Align right operand by shifting it to the right */
1164 Dblext_clear_sign(rightp1);
1165 Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1166 /*shifted by*/diff_exponent);
1167
1168 /* Treat sum and difference of the operands separately. */
1169 if ((int)save < 0) {
1170 /*
1171 * Difference of the two operands. Overflow can occur if the
1172 * multiply overflowed. A borrow can occur out of the hidden
1173 * bit and force a post normalization phase.
1174 */
1175 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1176 rightp1,rightp2,rightp3,rightp4,
1177 resultp1,resultp2,resultp3,resultp4);
1178 sign_save = Dbl_signextendedsign(resultp1);
1179 if (Dbl_iszero_hidden(resultp1)) {
1180 /* Handle normalization */
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001181 /* A straightforward algorithm would now shift the
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 * result and extension left until the hidden bit
1183 * becomes one. Not all of the extension bits need
1184 * participate in the shift. Only the two most
1185 * significant bits (round and guard) are needed.
1186 * If only a single shift is needed then the guard
1187 * bit becomes a significant low order bit and the
1188 * extension must participate in the rounding.
1189 * If more than a single shift is needed, then all
1190 * bits to the right of the guard bit are zeros,
1191 * and the guard bit may or may not be zero. */
1192 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1193 resultp4);
1194
1195 /* Need to check for a zero result. The sign and
1196 * exponent fields have already been zeroed. The more
1197 * efficient test of the full object can be used.
1198 */
1199 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1200 /* Must have been "x-x" or "x+(-x)". */
1201 if (Is_rounding_mode(ROUNDMINUS))
1202 Dbl_setone_sign(resultp1);
1203 Dbl_copytoptr(resultp1,resultp2,dstptr);
1204 return(NOEXCEPTION);
1205 }
1206 result_exponent--;
1207
1208 /* Look to see if normalization is finished. */
1209 if (Dbl_isone_hidden(resultp1)) {
1210 /* No further normalization is needed */
1211 goto round;
1212 }
1213
1214 /* Discover first one bit to determine shift amount.
1215 * Use a modified binary search. We have already
1216 * shifted the result one position right and still
1217 * not found a one so the remainder of the extension
1218 * must be zero and simplifies rounding. */
1219 /* Scan bytes */
1220 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1221 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1222 result_exponent -= 8;
1223 }
1224 /* Now narrow it down to the nibble */
1225 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1226 /* The lower nibble contains the
1227 * normalizing one */
1228 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1229 result_exponent -= 4;
1230 }
1231 /* Select case where first bit is set (already
1232 * normalized) otherwise select the proper shift. */
1233 jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1234 if (jumpsize <= 7) switch(jumpsize) {
1235 case 1:
1236 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1237 resultp4);
1238 result_exponent -= 3;
1239 break;
1240 case 2:
1241 case 3:
1242 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1243 resultp4);
1244 result_exponent -= 2;
1245 break;
1246 case 4:
1247 case 5:
1248 case 6:
1249 case 7:
1250 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1251 resultp4);
1252 result_exponent -= 1;
1253 break;
1254 }
1255 } /* end if (hidden...)... */
1256 /* Fall through and round */
1257 } /* end if (save < 0)... */
1258 else {
1259 /* Add magnitudes */
1260 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1261 rightp1,rightp2,rightp3,rightp4,
1262 /*to*/resultp1,resultp2,resultp3,resultp4);
1263 sign_save = Dbl_signextendedsign(resultp1);
1264 if (Dbl_isone_hiddenoverflow(resultp1)) {
1265 /* Prenormalization required. */
1266 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1267 resultp4);
1268 result_exponent++;
1269 } /* end if hiddenoverflow... */
1270 } /* end else ...add magnitudes... */
1271
1272 /* Round the result. If the extension and lower two words are
1273 * all zeros, then the result is exact. Otherwise round in the
1274 * correct direction. Underflow is possible. If a postnormalization
1275 * is necessary, then the mantissa is all zeros so no shift is needed.
1276 */
1277 round:
1278 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1279 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1280 result_exponent,is_tiny);
1281 }
1282 Dbl_set_sign(resultp1,/*using*/sign_save);
1283 if (Dblext_isnotzero_mantissap3(resultp3) ||
1284 Dblext_isnotzero_mantissap4(resultp4)) {
1285 inexact = TRUE;
1286 switch(Rounding_mode()) {
1287 case ROUNDNEAREST: /* The default. */
1288 if (Dblext_isone_highp3(resultp3)) {
1289 /* at least 1/2 ulp */
1290 if (Dblext_isnotzero_low31p3(resultp3) ||
1291 Dblext_isnotzero_mantissap4(resultp4) ||
1292 Dblext_isone_lowp2(resultp2)) {
1293 /* either exactly half way and odd or
1294 * more than 1/2ulp */
1295 Dbl_increment(resultp1,resultp2);
1296 }
1297 }
1298 break;
1299
1300 case ROUNDPLUS:
1301 if (Dbl_iszero_sign(resultp1)) {
1302 /* Round up positive results */
1303 Dbl_increment(resultp1,resultp2);
1304 }
1305 break;
1306
1307 case ROUNDMINUS:
1308 if (Dbl_isone_sign(resultp1)) {
1309 /* Round down negative results */
1310 Dbl_increment(resultp1,resultp2);
1311 }
1312
1313 case ROUNDZERO:;
1314 /* truncate is simple */
1315 } /* end switch... */
1316 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1317 }
1318 if (result_exponent >= DBL_INFINITY_EXPONENT) {
1319 /* Overflow */
1320 if (Is_overflowtrap_enabled()) {
1321 /*
1322 * Adjust bias of result
1323 */
1324 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1325 Dbl_copytoptr(resultp1,resultp2,dstptr);
1326 if (inexact)
1327 if (Is_inexacttrap_enabled())
1328 return (OPC_2E_OVERFLOWEXCEPTION |
1329 OPC_2E_INEXACTEXCEPTION);
1330 else Set_inexactflag();
1331 return (OPC_2E_OVERFLOWEXCEPTION);
1332 }
1333 inexact = TRUE;
1334 Set_overflowflag();
1335 Dbl_setoverflow(resultp1,resultp2);
1336 } else if (result_exponent <= 0) { /* underflow case */
1337 if (Is_underflowtrap_enabled()) {
1338 /*
1339 * Adjust bias of result
1340 */
1341 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1342 Dbl_copytoptr(resultp1,resultp2,dstptr);
1343 if (inexact)
1344 if (Is_inexacttrap_enabled())
1345 return (OPC_2E_UNDERFLOWEXCEPTION |
1346 OPC_2E_INEXACTEXCEPTION);
1347 else Set_inexactflag();
1348 return(OPC_2E_UNDERFLOWEXCEPTION);
1349 }
1350 else if (inexact && is_tiny) Set_underflowflag();
1351 }
1352 else Dbl_set_exponent(resultp1,result_exponent);
1353 Dbl_copytoptr(resultp1,resultp2,dstptr);
1354 if (inexact)
1355 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1356 else Set_inexactflag();
1357 return(NOEXCEPTION);
1358}
1359
1360/*
1361 * Single Floating-point Multiply Fused Add
1362 */
1363
1364sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1365
1366sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1367unsigned int *status;
1368{
1369 unsigned int opnd1, opnd2, opnd3;
1370 register unsigned int tmpresp1, tmpresp2;
1371 unsigned int rightp1, rightp2;
1372 unsigned int resultp1, resultp2 = 0;
1373 register int mpy_exponent, add_exponent, count;
1374 boolean inexact = FALSE, is_tiny = FALSE;
1375
1376 unsigned int signlessleft1, signlessright1, save;
1377 register int result_exponent, diff_exponent;
1378 int sign_save, jumpsize;
1379
1380 Sgl_copyfromptr(src1ptr,opnd1);
1381 Sgl_copyfromptr(src2ptr,opnd2);
1382 Sgl_copyfromptr(src3ptr,opnd3);
1383
1384 /*
1385 * set sign bit of result of multiply
1386 */
1387 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
1388 Sgl_setnegativezero(resultp1);
1389 else Sgl_setzero(resultp1);
1390
1391 /*
1392 * Generate multiply exponent
1393 */
1394 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1395
1396 /*
1397 * check first operand for NaN's or infinity
1398 */
1399 if (Sgl_isinfinity_exponent(opnd1)) {
1400 if (Sgl_iszero_mantissa(opnd1)) {
1401 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1402 if (Sgl_iszero_exponentmantissa(opnd2)) {
1403 /*
1404 * invalid since operands are infinity
1405 * and zero
1406 */
1407 if (Is_invalidtrap_enabled())
1408 return(OPC_2E_INVALIDEXCEPTION);
1409 Set_invalidflag();
1410 Sgl_makequietnan(resultp1);
1411 Sgl_copytoptr(resultp1,dstptr);
1412 return(NOEXCEPTION);
1413 }
1414 /*
1415 * Check third operand for infinity with a
1416 * sign opposite of the multiply result
1417 */
1418 if (Sgl_isinfinity(opnd3) &&
1419 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1420 /*
1421 * invalid since attempting a magnitude
1422 * subtraction of infinities
1423 */
1424 if (Is_invalidtrap_enabled())
1425 return(OPC_2E_INVALIDEXCEPTION);
1426 Set_invalidflag();
1427 Sgl_makequietnan(resultp1);
1428 Sgl_copytoptr(resultp1,dstptr);
1429 return(NOEXCEPTION);
1430 }
1431
1432 /*
1433 * return infinity
1434 */
1435 Sgl_setinfinity_exponentmantissa(resultp1);
1436 Sgl_copytoptr(resultp1,dstptr);
1437 return(NOEXCEPTION);
1438 }
1439 }
1440 else {
1441 /*
1442 * is NaN; signaling or quiet?
1443 */
1444 if (Sgl_isone_signaling(opnd1)) {
1445 /* trap if INVALIDTRAP enabled */
1446 if (Is_invalidtrap_enabled())
1447 return(OPC_2E_INVALIDEXCEPTION);
1448 /* make NaN quiet */
1449 Set_invalidflag();
1450 Sgl_set_quiet(opnd1);
1451 }
1452 /*
1453 * is second operand a signaling NaN?
1454 */
1455 else if (Sgl_is_signalingnan(opnd2)) {
1456 /* trap if INVALIDTRAP enabled */
1457 if (Is_invalidtrap_enabled())
1458 return(OPC_2E_INVALIDEXCEPTION);
1459 /* make NaN quiet */
1460 Set_invalidflag();
1461 Sgl_set_quiet(opnd2);
1462 Sgl_copytoptr(opnd2,dstptr);
1463 return(NOEXCEPTION);
1464 }
1465 /*
1466 * is third operand a signaling NaN?
1467 */
1468 else if (Sgl_is_signalingnan(opnd3)) {
1469 /* trap if INVALIDTRAP enabled */
1470 if (Is_invalidtrap_enabled())
1471 return(OPC_2E_INVALIDEXCEPTION);
1472 /* make NaN quiet */
1473 Set_invalidflag();
1474 Sgl_set_quiet(opnd3);
1475 Sgl_copytoptr(opnd3,dstptr);
1476 return(NOEXCEPTION);
1477 }
1478 /*
1479 * return quiet NaN
1480 */
1481 Sgl_copytoptr(opnd1,dstptr);
1482 return(NOEXCEPTION);
1483 }
1484 }
1485
1486 /*
1487 * check second operand for NaN's or infinity
1488 */
1489 if (Sgl_isinfinity_exponent(opnd2)) {
1490 if (Sgl_iszero_mantissa(opnd2)) {
1491 if (Sgl_isnotnan(opnd3)) {
1492 if (Sgl_iszero_exponentmantissa(opnd1)) {
1493 /*
1494 * invalid since multiply operands are
1495 * zero & infinity
1496 */
1497 if (Is_invalidtrap_enabled())
1498 return(OPC_2E_INVALIDEXCEPTION);
1499 Set_invalidflag();
1500 Sgl_makequietnan(opnd2);
1501 Sgl_copytoptr(opnd2,dstptr);
1502 return(NOEXCEPTION);
1503 }
1504
1505 /*
1506 * Check third operand for infinity with a
1507 * sign opposite of the multiply result
1508 */
1509 if (Sgl_isinfinity(opnd3) &&
1510 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1511 /*
1512 * invalid since attempting a magnitude
1513 * subtraction of infinities
1514 */
1515 if (Is_invalidtrap_enabled())
1516 return(OPC_2E_INVALIDEXCEPTION);
1517 Set_invalidflag();
1518 Sgl_makequietnan(resultp1);
1519 Sgl_copytoptr(resultp1,dstptr);
1520 return(NOEXCEPTION);
1521 }
1522
1523 /*
1524 * return infinity
1525 */
1526 Sgl_setinfinity_exponentmantissa(resultp1);
1527 Sgl_copytoptr(resultp1,dstptr);
1528 return(NOEXCEPTION);
1529 }
1530 }
1531 else {
1532 /*
1533 * is NaN; signaling or quiet?
1534 */
1535 if (Sgl_isone_signaling(opnd2)) {
1536 /* trap if INVALIDTRAP enabled */
1537 if (Is_invalidtrap_enabled())
1538 return(OPC_2E_INVALIDEXCEPTION);
1539 /* make NaN quiet */
1540 Set_invalidflag();
1541 Sgl_set_quiet(opnd2);
1542 }
1543 /*
1544 * is third operand a signaling NaN?
1545 */
1546 else if (Sgl_is_signalingnan(opnd3)) {
1547 /* trap if INVALIDTRAP enabled */
1548 if (Is_invalidtrap_enabled())
1549 return(OPC_2E_INVALIDEXCEPTION);
1550 /* make NaN quiet */
1551 Set_invalidflag();
1552 Sgl_set_quiet(opnd3);
1553 Sgl_copytoptr(opnd3,dstptr);
1554 return(NOEXCEPTION);
1555 }
1556 /*
1557 * return quiet NaN
1558 */
1559 Sgl_copytoptr(opnd2,dstptr);
1560 return(NOEXCEPTION);
1561 }
1562 }
1563
1564 /*
1565 * check third operand for NaN's or infinity
1566 */
1567 if (Sgl_isinfinity_exponent(opnd3)) {
1568 if (Sgl_iszero_mantissa(opnd3)) {
1569 /* return infinity */
1570 Sgl_copytoptr(opnd3,dstptr);
1571 return(NOEXCEPTION);
1572 } else {
1573 /*
1574 * is NaN; signaling or quiet?
1575 */
1576 if (Sgl_isone_signaling(opnd3)) {
1577 /* trap if INVALIDTRAP enabled */
1578 if (Is_invalidtrap_enabled())
1579 return(OPC_2E_INVALIDEXCEPTION);
1580 /* make NaN quiet */
1581 Set_invalidflag();
1582 Sgl_set_quiet(opnd3);
1583 }
1584 /*
1585 * return quiet NaN
1586 */
1587 Sgl_copytoptr(opnd3,dstptr);
1588 return(NOEXCEPTION);
1589 }
1590 }
1591
1592 /*
1593 * Generate multiply mantissa
1594 */
1595 if (Sgl_isnotzero_exponent(opnd1)) {
1596 /* set hidden bit */
1597 Sgl_clear_signexponent_set_hidden(opnd1);
1598 }
1599 else {
1600 /* check for zero */
1601 if (Sgl_iszero_mantissa(opnd1)) {
1602 /*
1603 * Perform the add opnd3 with zero here.
1604 */
1605 if (Sgl_iszero_exponentmantissa(opnd3)) {
1606 if (Is_rounding_mode(ROUNDMINUS)) {
1607 Sgl_or_signs(opnd3,resultp1);
1608 } else {
1609 Sgl_and_signs(opnd3,resultp1);
1610 }
1611 }
1612 /*
1613 * Now let's check for trapped underflow case.
1614 */
1615 else if (Sgl_iszero_exponent(opnd3) &&
1616 Is_underflowtrap_enabled()) {
1617 /* need to normalize results mantissa */
1618 sign_save = Sgl_signextendedsign(opnd3);
1619 result_exponent = 0;
1620 Sgl_leftshiftby1(opnd3);
1621 Sgl_normalize(opnd3,result_exponent);
1622 Sgl_set_sign(opnd3,/*using*/sign_save);
1623 Sgl_setwrapped_exponent(opnd3,result_exponent,
1624 unfl);
1625 Sgl_copytoptr(opnd3,dstptr);
1626 /* inexact = FALSE */
1627 return(OPC_2E_UNDERFLOWEXCEPTION);
1628 }
1629 Sgl_copytoptr(opnd3,dstptr);
1630 return(NOEXCEPTION);
1631 }
1632 /* is denormalized, adjust exponent */
1633 Sgl_clear_signexponent(opnd1);
1634 Sgl_leftshiftby1(opnd1);
1635 Sgl_normalize(opnd1,mpy_exponent);
1636 }
1637 /* opnd2 needs to have hidden bit set with msb in hidden bit */
1638 if (Sgl_isnotzero_exponent(opnd2)) {
1639 Sgl_clear_signexponent_set_hidden(opnd2);
1640 }
1641 else {
1642 /* check for zero */
1643 if (Sgl_iszero_mantissa(opnd2)) {
1644 /*
1645 * Perform the add opnd3 with zero here.
1646 */
1647 if (Sgl_iszero_exponentmantissa(opnd3)) {
1648 if (Is_rounding_mode(ROUNDMINUS)) {
1649 Sgl_or_signs(opnd3,resultp1);
1650 } else {
1651 Sgl_and_signs(opnd3,resultp1);
1652 }
1653 }
1654 /*
1655 * Now let's check for trapped underflow case.
1656 */
1657 else if (Sgl_iszero_exponent(opnd3) &&
1658 Is_underflowtrap_enabled()) {
1659 /* need to normalize results mantissa */
1660 sign_save = Sgl_signextendedsign(opnd3);
1661 result_exponent = 0;
1662 Sgl_leftshiftby1(opnd3);
1663 Sgl_normalize(opnd3,result_exponent);
1664 Sgl_set_sign(opnd3,/*using*/sign_save);
1665 Sgl_setwrapped_exponent(opnd3,result_exponent,
1666 unfl);
1667 Sgl_copytoptr(opnd3,dstptr);
1668 /* inexact = FALSE */
1669 return(OPC_2E_UNDERFLOWEXCEPTION);
1670 }
1671 Sgl_copytoptr(opnd3,dstptr);
1672 return(NOEXCEPTION);
1673 }
1674 /* is denormalized; want to normalize */
1675 Sgl_clear_signexponent(opnd2);
1676 Sgl_leftshiftby1(opnd2);
1677 Sgl_normalize(opnd2,mpy_exponent);
1678 }
1679
1680 /* Multiply the first two source mantissas together */
1681
1682 /*
1683 * The intermediate result will be kept in tmpres,
1684 * which needs enough room for 106 bits of mantissa,
1685 * so lets call it a Double extended.
1686 */
1687 Sglext_setzero(tmpresp1,tmpresp2);
1688
1689 /*
1690 * Four bits at a time are inspected in each loop, and a
1691 * simple shift and add multiply algorithm is used.
1692 */
1693 for (count = SGL_P-1; count >= 0; count -= 4) {
1694 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1695 if (Sbit28(opnd1)) {
1696 /* Twoword_add should be an ADD followed by 2 ADDC's */
1697 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1698 }
1699 if (Sbit29(opnd1)) {
1700 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1701 }
1702 if (Sbit30(opnd1)) {
1703 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1704 }
1705 if (Sbit31(opnd1)) {
1706 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1707 }
1708 Sgl_rightshiftby4(opnd1);
1709 }
1710 if (Is_sexthiddenoverflow(tmpresp1)) {
1711 /* result mantissa >= 2 (mantissa overflow) */
1712 mpy_exponent++;
1713 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1714 } else {
1715 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1716 }
1717
1718 /*
1719 * Restore the sign of the mpy result which was saved in resultp1.
1720 * The exponent will continue to be kept in mpy_exponent.
1721 */
1722 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1723
1724 /*
1725 * No rounding is required, since the result of the multiply
1726 * is exact in the extended format.
1727 */
1728
1729 /*
1730 * Now we are ready to perform the add portion of the operation.
1731 *
1732 * The exponents need to be kept as integers for now, since the
1733 * multiply result might not fit into the exponent field. We
1734 * can't overflow or underflow because of this yet, since the
1735 * add could bring the final result back into range.
1736 */
1737 add_exponent = Sgl_exponent(opnd3);
1738
1739 /*
1740 * Check for denormalized or zero add operand.
1741 */
1742 if (add_exponent == 0) {
1743 /* check for zero */
1744 if (Sgl_iszero_mantissa(opnd3)) {
1745 /* right is zero */
1746 /* Left can't be zero and must be result.
1747 *
1748 * The final result is now in tmpres and mpy_exponent,
1749 * and needs to be rounded and squeezed back into
1750 * double precision format from double extended.
1751 */
1752 result_exponent = mpy_exponent;
1753 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1754 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1755 goto round;
1756 }
1757
1758 /*
1759 * Neither are zeroes.
1760 * Adjust exponent and normalize add operand.
1761 */
1762 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
1763 Sgl_clear_signexponent(opnd3);
1764 Sgl_leftshiftby1(opnd3);
1765 Sgl_normalize(opnd3,add_exponent);
1766 Sgl_set_sign(opnd3,sign_save); /* restore sign */
1767 } else {
1768 Sgl_clear_exponent_set_hidden(opnd3);
1769 }
1770 /*
1771 * Copy opnd3 to the double extended variable called right.
1772 */
1773 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1774
1775 /*
1776 * A zero "save" helps discover equal operands (for later),
1777 * and is used in swapping operands (if needed).
1778 */
1779 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1780
1781 /*
1782 * Compare magnitude of operands.
1783 */
1784 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1785 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1786 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1787 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1788 /*
1789 * Set the left operand to the larger one by XOR swap.
1790 * First finish the first word "save".
1791 */
1792 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1793 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1794 Sglext_swap_lower(tmpresp2,rightp2);
1795 /* also setup exponents used in rest of routine */
1796 diff_exponent = add_exponent - mpy_exponent;
1797 result_exponent = add_exponent;
1798 } else {
1799 /* also setup exponents used in rest of routine */
1800 diff_exponent = mpy_exponent - add_exponent;
1801 result_exponent = mpy_exponent;
1802 }
1803 /* Invariant: left is not smaller than right. */
1804
1805 /*
1806 * Special case alignment of operands that would force alignment
1807 * beyond the extent of the extension. A further optimization
1808 * could special case this but only reduces the path length for
1809 * this infrequent case.
1810 */
1811 if (diff_exponent > SGLEXT_THRESHOLD) {
1812 diff_exponent = SGLEXT_THRESHOLD;
1813 }
1814
1815 /* Align right operand by shifting it to the right */
1816 Sglext_clear_sign(rightp1);
1817 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1818
1819 /* Treat sum and difference of the operands separately. */
1820 if ((int)save < 0) {
1821 /*
1822 * Difference of the two operands. Overflow can occur if the
1823 * multiply overflowed. A borrow can occur out of the hidden
1824 * bit and force a post normalization phase.
1825 */
1826 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1827 resultp1,resultp2);
1828 sign_save = Sgl_signextendedsign(resultp1);
1829 if (Sgl_iszero_hidden(resultp1)) {
1830 /* Handle normalization */
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001831 /* A straightforward algorithm would now shift the
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 * result and extension left until the hidden bit
1833 * becomes one. Not all of the extension bits need
1834 * participate in the shift. Only the two most
1835 * significant bits (round and guard) are needed.
1836 * If only a single shift is needed then the guard
1837 * bit becomes a significant low order bit and the
1838 * extension must participate in the rounding.
1839 * If more than a single shift is needed, then all
1840 * bits to the right of the guard bit are zeros,
1841 * and the guard bit may or may not be zero. */
1842 Sglext_leftshiftby1(resultp1,resultp2);
1843
1844 /* Need to check for a zero result. The sign and
1845 * exponent fields have already been zeroed. The more
1846 * efficient test of the full object can be used.
1847 */
1848 if (Sglext_iszero(resultp1,resultp2)) {
1849 /* Must have been "x-x" or "x+(-x)". */
1850 if (Is_rounding_mode(ROUNDMINUS))
1851 Sgl_setone_sign(resultp1);
1852 Sgl_copytoptr(resultp1,dstptr);
1853 return(NOEXCEPTION);
1854 }
1855 result_exponent--;
1856
1857 /* Look to see if normalization is finished. */
1858 if (Sgl_isone_hidden(resultp1)) {
1859 /* No further normalization is needed */
1860 goto round;
1861 }
1862
1863 /* Discover first one bit to determine shift amount.
1864 * Use a modified binary search. We have already
1865 * shifted the result one position right and still
1866 * not found a one so the remainder of the extension
1867 * must be zero and simplifies rounding. */
1868 /* Scan bytes */
1869 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1870 Sglext_leftshiftby8(resultp1,resultp2);
1871 result_exponent -= 8;
1872 }
1873 /* Now narrow it down to the nibble */
1874 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1875 /* The lower nibble contains the
1876 * normalizing one */
1877 Sglext_leftshiftby4(resultp1,resultp2);
1878 result_exponent -= 4;
1879 }
1880 /* Select case where first bit is set (already
1881 * normalized) otherwise select the proper shift. */
1882 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1883 if (jumpsize <= 7) switch(jumpsize) {
1884 case 1:
1885 Sglext_leftshiftby3(resultp1,resultp2);
1886 result_exponent -= 3;
1887 break;
1888 case 2:
1889 case 3:
1890 Sglext_leftshiftby2(resultp1,resultp2);
1891 result_exponent -= 2;
1892 break;
1893 case 4:
1894 case 5:
1895 case 6:
1896 case 7:
1897 Sglext_leftshiftby1(resultp1,resultp2);
1898 result_exponent -= 1;
1899 break;
1900 }
1901 } /* end if (hidden...)... */
1902 /* Fall through and round */
1903 } /* end if (save < 0)... */
1904 else {
1905 /* Add magnitudes */
1906 Sglext_addition(tmpresp1,tmpresp2,
1907 rightp1,rightp2, /*to*/resultp1,resultp2);
1908 sign_save = Sgl_signextendedsign(resultp1);
1909 if (Sgl_isone_hiddenoverflow(resultp1)) {
1910 /* Prenormalization required. */
1911 Sglext_arithrightshiftby1(resultp1,resultp2);
1912 result_exponent++;
1913 } /* end if hiddenoverflow... */
1914 } /* end else ...add magnitudes... */
1915
1916 /* Round the result. If the extension and lower two words are
1917 * all zeros, then the result is exact. Otherwise round in the
1918 * correct direction. Underflow is possible. If a postnormalization
1919 * is necessary, then the mantissa is all zeros so no shift is needed.
1920 */
1921 round:
1922 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1923 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1924 }
1925 Sgl_set_sign(resultp1,/*using*/sign_save);
1926 if (Sglext_isnotzero_mantissap2(resultp2)) {
1927 inexact = TRUE;
1928 switch(Rounding_mode()) {
1929 case ROUNDNEAREST: /* The default. */
1930 if (Sglext_isone_highp2(resultp2)) {
1931 /* at least 1/2 ulp */
1932 if (Sglext_isnotzero_low31p2(resultp2) ||
1933 Sglext_isone_lowp1(resultp1)) {
1934 /* either exactly half way and odd or
1935 * more than 1/2ulp */
1936 Sgl_increment(resultp1);
1937 }
1938 }
1939 break;
1940
1941 case ROUNDPLUS:
1942 if (Sgl_iszero_sign(resultp1)) {
1943 /* Round up positive results */
1944 Sgl_increment(resultp1);
1945 }
1946 break;
1947
1948 case ROUNDMINUS:
1949 if (Sgl_isone_sign(resultp1)) {
1950 /* Round down negative results */
1951 Sgl_increment(resultp1);
1952 }
1953
1954 case ROUNDZERO:;
1955 /* truncate is simple */
1956 } /* end switch... */
1957 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1958 }
1959 if (result_exponent >= SGL_INFINITY_EXPONENT) {
1960 /* Overflow */
1961 if (Is_overflowtrap_enabled()) {
1962 /*
1963 * Adjust bias of result
1964 */
1965 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1966 Sgl_copytoptr(resultp1,dstptr);
1967 if (inexact)
1968 if (Is_inexacttrap_enabled())
1969 return (OPC_2E_OVERFLOWEXCEPTION |
1970 OPC_2E_INEXACTEXCEPTION);
1971 else Set_inexactflag();
1972 return (OPC_2E_OVERFLOWEXCEPTION);
1973 }
1974 inexact = TRUE;
1975 Set_overflowflag();
1976 Sgl_setoverflow(resultp1);
1977 } else if (result_exponent <= 0) { /* underflow case */
1978 if (Is_underflowtrap_enabled()) {
1979 /*
1980 * Adjust bias of result
1981 */
1982 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1983 Sgl_copytoptr(resultp1,dstptr);
1984 if (inexact)
1985 if (Is_inexacttrap_enabled())
1986 return (OPC_2E_UNDERFLOWEXCEPTION |
1987 OPC_2E_INEXACTEXCEPTION);
1988 else Set_inexactflag();
1989 return(OPC_2E_UNDERFLOWEXCEPTION);
1990 }
1991 else if (inexact && is_tiny) Set_underflowflag();
1992 }
1993 else Sgl_set_exponent(resultp1,result_exponent);
1994 Sgl_copytoptr(resultp1,dstptr);
1995 if (inexact)
1996 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1997 else Set_inexactflag();
1998 return(NOEXCEPTION);
1999}
2000
2001/*
2002 * Single Floating-point Multiply Negate Fused Add
2003 */
2004
2005sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2006
2007sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2008unsigned int *status;
2009{
2010 unsigned int opnd1, opnd2, opnd3;
2011 register unsigned int tmpresp1, tmpresp2;
2012 unsigned int rightp1, rightp2;
2013 unsigned int resultp1, resultp2 = 0;
2014 register int mpy_exponent, add_exponent, count;
2015 boolean inexact = FALSE, is_tiny = FALSE;
2016
2017 unsigned int signlessleft1, signlessright1, save;
2018 register int result_exponent, diff_exponent;
2019 int sign_save, jumpsize;
2020
2021 Sgl_copyfromptr(src1ptr,opnd1);
2022 Sgl_copyfromptr(src2ptr,opnd2);
2023 Sgl_copyfromptr(src3ptr,opnd3);
2024
2025 /*
2026 * set sign bit of result of multiply
2027 */
2028 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
2029 Sgl_setzero(resultp1);
2030 else
2031 Sgl_setnegativezero(resultp1);
2032
2033 /*
2034 * Generate multiply exponent
2035 */
2036 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2037
2038 /*
2039 * check first operand for NaN's or infinity
2040 */
2041 if (Sgl_isinfinity_exponent(opnd1)) {
2042 if (Sgl_iszero_mantissa(opnd1)) {
2043 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2044 if (Sgl_iszero_exponentmantissa(opnd2)) {
2045 /*
2046 * invalid since operands are infinity
2047 * and zero
2048 */
2049 if (Is_invalidtrap_enabled())
2050 return(OPC_2E_INVALIDEXCEPTION);
2051 Set_invalidflag();
2052 Sgl_makequietnan(resultp1);
2053 Sgl_copytoptr(resultp1,dstptr);
2054 return(NOEXCEPTION);
2055 }
2056 /*
2057 * Check third operand for infinity with a
2058 * sign opposite of the multiply result
2059 */
2060 if (Sgl_isinfinity(opnd3) &&
2061 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2062 /*
2063 * invalid since attempting a magnitude
2064 * subtraction of infinities
2065 */
2066 if (Is_invalidtrap_enabled())
2067 return(OPC_2E_INVALIDEXCEPTION);
2068 Set_invalidflag();
2069 Sgl_makequietnan(resultp1);
2070 Sgl_copytoptr(resultp1,dstptr);
2071 return(NOEXCEPTION);
2072 }
2073
2074 /*
2075 * return infinity
2076 */
2077 Sgl_setinfinity_exponentmantissa(resultp1);
2078 Sgl_copytoptr(resultp1,dstptr);
2079 return(NOEXCEPTION);
2080 }
2081 }
2082 else {
2083 /*
2084 * is NaN; signaling or quiet?
2085 */
2086 if (Sgl_isone_signaling(opnd1)) {
2087 /* trap if INVALIDTRAP enabled */
2088 if (Is_invalidtrap_enabled())
2089 return(OPC_2E_INVALIDEXCEPTION);
2090 /* make NaN quiet */
2091 Set_invalidflag();
2092 Sgl_set_quiet(opnd1);
2093 }
2094 /*
2095 * is second operand a signaling NaN?
2096 */
2097 else if (Sgl_is_signalingnan(opnd2)) {
2098 /* trap if INVALIDTRAP enabled */
2099 if (Is_invalidtrap_enabled())
2100 return(OPC_2E_INVALIDEXCEPTION);
2101 /* make NaN quiet */
2102 Set_invalidflag();
2103 Sgl_set_quiet(opnd2);
2104 Sgl_copytoptr(opnd2,dstptr);
2105 return(NOEXCEPTION);
2106 }
2107 /*
2108 * is third operand a signaling NaN?
2109 */
2110 else if (Sgl_is_signalingnan(opnd3)) {
2111 /* trap if INVALIDTRAP enabled */
2112 if (Is_invalidtrap_enabled())
2113 return(OPC_2E_INVALIDEXCEPTION);
2114 /* make NaN quiet */
2115 Set_invalidflag();
2116 Sgl_set_quiet(opnd3);
2117 Sgl_copytoptr(opnd3,dstptr);
2118 return(NOEXCEPTION);
2119 }
2120 /*
2121 * return quiet NaN
2122 */
2123 Sgl_copytoptr(opnd1,dstptr);
2124 return(NOEXCEPTION);
2125 }
2126 }
2127
2128 /*
2129 * check second operand for NaN's or infinity
2130 */
2131 if (Sgl_isinfinity_exponent(opnd2)) {
2132 if (Sgl_iszero_mantissa(opnd2)) {
2133 if (Sgl_isnotnan(opnd3)) {
2134 if (Sgl_iszero_exponentmantissa(opnd1)) {
2135 /*
2136 * invalid since multiply operands are
2137 * zero & infinity
2138 */
2139 if (Is_invalidtrap_enabled())
2140 return(OPC_2E_INVALIDEXCEPTION);
2141 Set_invalidflag();
2142 Sgl_makequietnan(opnd2);
2143 Sgl_copytoptr(opnd2,dstptr);
2144 return(NOEXCEPTION);
2145 }
2146
2147 /*
2148 * Check third operand for infinity with a
2149 * sign opposite of the multiply result
2150 */
2151 if (Sgl_isinfinity(opnd3) &&
2152 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2153 /*
2154 * invalid since attempting a magnitude
2155 * subtraction of infinities
2156 */
2157 if (Is_invalidtrap_enabled())
2158 return(OPC_2E_INVALIDEXCEPTION);
2159 Set_invalidflag();
2160 Sgl_makequietnan(resultp1);
2161 Sgl_copytoptr(resultp1,dstptr);
2162 return(NOEXCEPTION);
2163 }
2164
2165 /*
2166 * return infinity
2167 */
2168 Sgl_setinfinity_exponentmantissa(resultp1);
2169 Sgl_copytoptr(resultp1,dstptr);
2170 return(NOEXCEPTION);
2171 }
2172 }
2173 else {
2174 /*
2175 * is NaN; signaling or quiet?
2176 */
2177 if (Sgl_isone_signaling(opnd2)) {
2178 /* trap if INVALIDTRAP enabled */
2179 if (Is_invalidtrap_enabled())
2180 return(OPC_2E_INVALIDEXCEPTION);
2181 /* make NaN quiet */
2182 Set_invalidflag();
2183 Sgl_set_quiet(opnd2);
2184 }
2185 /*
2186 * is third operand a signaling NaN?
2187 */
2188 else if (Sgl_is_signalingnan(opnd3)) {
2189 /* trap if INVALIDTRAP enabled */
2190 if (Is_invalidtrap_enabled())
2191 return(OPC_2E_INVALIDEXCEPTION);
2192 /* make NaN quiet */
2193 Set_invalidflag();
2194 Sgl_set_quiet(opnd3);
2195 Sgl_copytoptr(opnd3,dstptr);
2196 return(NOEXCEPTION);
2197 }
2198 /*
2199 * return quiet NaN
2200 */
2201 Sgl_copytoptr(opnd2,dstptr);
2202 return(NOEXCEPTION);
2203 }
2204 }
2205
2206 /*
2207 * check third operand for NaN's or infinity
2208 */
2209 if (Sgl_isinfinity_exponent(opnd3)) {
2210 if (Sgl_iszero_mantissa(opnd3)) {
2211 /* return infinity */
2212 Sgl_copytoptr(opnd3,dstptr);
2213 return(NOEXCEPTION);
2214 } else {
2215 /*
2216 * is NaN; signaling or quiet?
2217 */
2218 if (Sgl_isone_signaling(opnd3)) {
2219 /* trap if INVALIDTRAP enabled */
2220 if (Is_invalidtrap_enabled())
2221 return(OPC_2E_INVALIDEXCEPTION);
2222 /* make NaN quiet */
2223 Set_invalidflag();
2224 Sgl_set_quiet(opnd3);
2225 }
2226 /*
2227 * return quiet NaN
2228 */
2229 Sgl_copytoptr(opnd3,dstptr);
2230 return(NOEXCEPTION);
2231 }
2232 }
2233
2234 /*
2235 * Generate multiply mantissa
2236 */
2237 if (Sgl_isnotzero_exponent(opnd1)) {
2238 /* set hidden bit */
2239 Sgl_clear_signexponent_set_hidden(opnd1);
2240 }
2241 else {
2242 /* check for zero */
2243 if (Sgl_iszero_mantissa(opnd1)) {
2244 /*
2245 * Perform the add opnd3 with zero here.
2246 */
2247 if (Sgl_iszero_exponentmantissa(opnd3)) {
2248 if (Is_rounding_mode(ROUNDMINUS)) {
2249 Sgl_or_signs(opnd3,resultp1);
2250 } else {
2251 Sgl_and_signs(opnd3,resultp1);
2252 }
2253 }
2254 /*
2255 * Now let's check for trapped underflow case.
2256 */
2257 else if (Sgl_iszero_exponent(opnd3) &&
2258 Is_underflowtrap_enabled()) {
2259 /* need to normalize results mantissa */
2260 sign_save = Sgl_signextendedsign(opnd3);
2261 result_exponent = 0;
2262 Sgl_leftshiftby1(opnd3);
2263 Sgl_normalize(opnd3,result_exponent);
2264 Sgl_set_sign(opnd3,/*using*/sign_save);
2265 Sgl_setwrapped_exponent(opnd3,result_exponent,
2266 unfl);
2267 Sgl_copytoptr(opnd3,dstptr);
2268 /* inexact = FALSE */
2269 return(OPC_2E_UNDERFLOWEXCEPTION);
2270 }
2271 Sgl_copytoptr(opnd3,dstptr);
2272 return(NOEXCEPTION);
2273 }
2274 /* is denormalized, adjust exponent */
2275 Sgl_clear_signexponent(opnd1);
2276 Sgl_leftshiftby1(opnd1);
2277 Sgl_normalize(opnd1,mpy_exponent);
2278 }
2279 /* opnd2 needs to have hidden bit set with msb in hidden bit */
2280 if (Sgl_isnotzero_exponent(opnd2)) {
2281 Sgl_clear_signexponent_set_hidden(opnd2);
2282 }
2283 else {
2284 /* check for zero */
2285 if (Sgl_iszero_mantissa(opnd2)) {
2286 /*
2287 * Perform the add opnd3 with zero here.
2288 */
2289 if (Sgl_iszero_exponentmantissa(opnd3)) {
2290 if (Is_rounding_mode(ROUNDMINUS)) {
2291 Sgl_or_signs(opnd3,resultp1);
2292 } else {
2293 Sgl_and_signs(opnd3,resultp1);
2294 }
2295 }
2296 /*
2297 * Now let's check for trapped underflow case.
2298 */
2299 else if (Sgl_iszero_exponent(opnd3) &&
2300 Is_underflowtrap_enabled()) {
2301 /* need to normalize results mantissa */
2302 sign_save = Sgl_signextendedsign(opnd3);
2303 result_exponent = 0;
2304 Sgl_leftshiftby1(opnd3);
2305 Sgl_normalize(opnd3,result_exponent);
2306 Sgl_set_sign(opnd3,/*using*/sign_save);
2307 Sgl_setwrapped_exponent(opnd3,result_exponent,
2308 unfl);
2309 Sgl_copytoptr(opnd3,dstptr);
2310 /* inexact = FALSE */
2311 return(OPC_2E_UNDERFLOWEXCEPTION);
2312 }
2313 Sgl_copytoptr(opnd3,dstptr);
2314 return(NOEXCEPTION);
2315 }
2316 /* is denormalized; want to normalize */
2317 Sgl_clear_signexponent(opnd2);
2318 Sgl_leftshiftby1(opnd2);
2319 Sgl_normalize(opnd2,mpy_exponent);
2320 }
2321
2322 /* Multiply the first two source mantissas together */
2323
2324 /*
2325 * The intermediate result will be kept in tmpres,
2326 * which needs enough room for 106 bits of mantissa,
2327 * so lets call it a Double extended.
2328 */
2329 Sglext_setzero(tmpresp1,tmpresp2);
2330
2331 /*
2332 * Four bits at a time are inspected in each loop, and a
2333 * simple shift and add multiply algorithm is used.
2334 */
2335 for (count = SGL_P-1; count >= 0; count -= 4) {
2336 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2337 if (Sbit28(opnd1)) {
2338 /* Twoword_add should be an ADD followed by 2 ADDC's */
2339 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2340 }
2341 if (Sbit29(opnd1)) {
2342 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2343 }
2344 if (Sbit30(opnd1)) {
2345 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2346 }
2347 if (Sbit31(opnd1)) {
2348 Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2349 }
2350 Sgl_rightshiftby4(opnd1);
2351 }
2352 if (Is_sexthiddenoverflow(tmpresp1)) {
2353 /* result mantissa >= 2 (mantissa overflow) */
2354 mpy_exponent++;
2355 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2356 } else {
2357 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2358 }
2359
2360 /*
2361 * Restore the sign of the mpy result which was saved in resultp1.
2362 * The exponent will continue to be kept in mpy_exponent.
2363 */
2364 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2365
2366 /*
2367 * No rounding is required, since the result of the multiply
2368 * is exact in the extended format.
2369 */
2370
2371 /*
2372 * Now we are ready to perform the add portion of the operation.
2373 *
2374 * The exponents need to be kept as integers for now, since the
2375 * multiply result might not fit into the exponent field. We
2376 * can't overflow or underflow because of this yet, since the
2377 * add could bring the final result back into range.
2378 */
2379 add_exponent = Sgl_exponent(opnd3);
2380
2381 /*
2382 * Check for denormalized or zero add operand.
2383 */
2384 if (add_exponent == 0) {
2385 /* check for zero */
2386 if (Sgl_iszero_mantissa(opnd3)) {
2387 /* right is zero */
2388 /* Left can't be zero and must be result.
2389 *
2390 * The final result is now in tmpres and mpy_exponent,
2391 * and needs to be rounded and squeezed back into
2392 * double precision format from double extended.
2393 */
2394 result_exponent = mpy_exponent;
2395 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2396 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2397 goto round;
2398 }
2399
2400 /*
2401 * Neither are zeroes.
2402 * Adjust exponent and normalize add operand.
2403 */
2404 sign_save = Sgl_signextendedsign(opnd3); /* save sign */
2405 Sgl_clear_signexponent(opnd3);
2406 Sgl_leftshiftby1(opnd3);
2407 Sgl_normalize(opnd3,add_exponent);
2408 Sgl_set_sign(opnd3,sign_save); /* restore sign */
2409 } else {
2410 Sgl_clear_exponent_set_hidden(opnd3);
2411 }
2412 /*
2413 * Copy opnd3 to the double extended variable called right.
2414 */
2415 Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2416
2417 /*
2418 * A zero "save" helps discover equal operands (for later),
2419 * and is used in swapping operands (if needed).
2420 */
2421 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2422
2423 /*
2424 * Compare magnitude of operands.
2425 */
2426 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2427 Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2428 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2429 Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2430 /*
2431 * Set the left operand to the larger one by XOR swap.
2432 * First finish the first word "save".
2433 */
2434 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2435 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2436 Sglext_swap_lower(tmpresp2,rightp2);
2437 /* also setup exponents used in rest of routine */
2438 diff_exponent = add_exponent - mpy_exponent;
2439 result_exponent = add_exponent;
2440 } else {
2441 /* also setup exponents used in rest of routine */
2442 diff_exponent = mpy_exponent - add_exponent;
2443 result_exponent = mpy_exponent;
2444 }
2445 /* Invariant: left is not smaller than right. */
2446
2447 /*
2448 * Special case alignment of operands that would force alignment
2449 * beyond the extent of the extension. A further optimization
2450 * could special case this but only reduces the path length for
2451 * this infrequent case.
2452 */
2453 if (diff_exponent > SGLEXT_THRESHOLD) {
2454 diff_exponent = SGLEXT_THRESHOLD;
2455 }
2456
2457 /* Align right operand by shifting it to the right */
2458 Sglext_clear_sign(rightp1);
2459 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2460
2461 /* Treat sum and difference of the operands separately. */
2462 if ((int)save < 0) {
2463 /*
2464 * Difference of the two operands. Overflow can occur if the
2465 * multiply overflowed. A borrow can occur out of the hidden
2466 * bit and force a post normalization phase.
2467 */
2468 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2469 resultp1,resultp2);
2470 sign_save = Sgl_signextendedsign(resultp1);
2471 if (Sgl_iszero_hidden(resultp1)) {
2472 /* Handle normalization */
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002473 /* A straightforward algorithm would now shift the
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 * result and extension left until the hidden bit
2475 * becomes one. Not all of the extension bits need
2476 * participate in the shift. Only the two most
2477 * significant bits (round and guard) are needed.
2478 * If only a single shift is needed then the guard
2479 * bit becomes a significant low order bit and the
2480 * extension must participate in the rounding.
2481 * If more than a single shift is needed, then all
2482 * bits to the right of the guard bit are zeros,
2483 * and the guard bit may or may not be zero. */
2484 Sglext_leftshiftby1(resultp1,resultp2);
2485
2486 /* Need to check for a zero result. The sign and
2487 * exponent fields have already been zeroed. The more
2488 * efficient test of the full object can be used.
2489 */
2490 if (Sglext_iszero(resultp1,resultp2)) {
2491 /* Must have been "x-x" or "x+(-x)". */
2492 if (Is_rounding_mode(ROUNDMINUS))
2493 Sgl_setone_sign(resultp1);
2494 Sgl_copytoptr(resultp1,dstptr);
2495 return(NOEXCEPTION);
2496 }
2497 result_exponent--;
2498
2499 /* Look to see if normalization is finished. */
2500 if (Sgl_isone_hidden(resultp1)) {
2501 /* No further normalization is needed */
2502 goto round;
2503 }
2504
2505 /* Discover first one bit to determine shift amount.
2506 * Use a modified binary search. We have already
2507 * shifted the result one position right and still
2508 * not found a one so the remainder of the extension
2509 * must be zero and simplifies rounding. */
2510 /* Scan bytes */
2511 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2512 Sglext_leftshiftby8(resultp1,resultp2);
2513 result_exponent -= 8;
2514 }
2515 /* Now narrow it down to the nibble */
2516 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2517 /* The lower nibble contains the
2518 * normalizing one */
2519 Sglext_leftshiftby4(resultp1,resultp2);
2520 result_exponent -= 4;
2521 }
2522 /* Select case where first bit is set (already
2523 * normalized) otherwise select the proper shift. */
2524 jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2525 if (jumpsize <= 7) switch(jumpsize) {
2526 case 1:
2527 Sglext_leftshiftby3(resultp1,resultp2);
2528 result_exponent -= 3;
2529 break;
2530 case 2:
2531 case 3:
2532 Sglext_leftshiftby2(resultp1,resultp2);
2533 result_exponent -= 2;
2534 break;
2535 case 4:
2536 case 5:
2537 case 6:
2538 case 7:
2539 Sglext_leftshiftby1(resultp1,resultp2);
2540 result_exponent -= 1;
2541 break;
2542 }
2543 } /* end if (hidden...)... */
2544 /* Fall through and round */
2545 } /* end if (save < 0)... */
2546 else {
2547 /* Add magnitudes */
2548 Sglext_addition(tmpresp1,tmpresp2,
2549 rightp1,rightp2, /*to*/resultp1,resultp2);
2550 sign_save = Sgl_signextendedsign(resultp1);
2551 if (Sgl_isone_hiddenoverflow(resultp1)) {
2552 /* Prenormalization required. */
2553 Sglext_arithrightshiftby1(resultp1,resultp2);
2554 result_exponent++;
2555 } /* end if hiddenoverflow... */
2556 } /* end else ...add magnitudes... */
2557
2558 /* Round the result. If the extension and lower two words are
2559 * all zeros, then the result is exact. Otherwise round in the
2560 * correct direction. Underflow is possible. If a postnormalization
2561 * is necessary, then the mantissa is all zeros so no shift is needed.
2562 */
2563 round:
2564 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2565 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2566 }
2567 Sgl_set_sign(resultp1,/*using*/sign_save);
2568 if (Sglext_isnotzero_mantissap2(resultp2)) {
2569 inexact = TRUE;
2570 switch(Rounding_mode()) {
2571 case ROUNDNEAREST: /* The default. */
2572 if (Sglext_isone_highp2(resultp2)) {
2573 /* at least 1/2 ulp */
2574 if (Sglext_isnotzero_low31p2(resultp2) ||
2575 Sglext_isone_lowp1(resultp1)) {
2576 /* either exactly half way and odd or
2577 * more than 1/2ulp */
2578 Sgl_increment(resultp1);
2579 }
2580 }
2581 break;
2582
2583 case ROUNDPLUS:
2584 if (Sgl_iszero_sign(resultp1)) {
2585 /* Round up positive results */
2586 Sgl_increment(resultp1);
2587 }
2588 break;
2589
2590 case ROUNDMINUS:
2591 if (Sgl_isone_sign(resultp1)) {
2592 /* Round down negative results */
2593 Sgl_increment(resultp1);
2594 }
2595
2596 case ROUNDZERO:;
2597 /* truncate is simple */
2598 } /* end switch... */
2599 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2600 }
2601 if (result_exponent >= SGL_INFINITY_EXPONENT) {
2602 /* Overflow */
2603 if (Is_overflowtrap_enabled()) {
2604 /*
2605 * Adjust bias of result
2606 */
2607 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2608 Sgl_copytoptr(resultp1,dstptr);
2609 if (inexact)
2610 if (Is_inexacttrap_enabled())
2611 return (OPC_2E_OVERFLOWEXCEPTION |
2612 OPC_2E_INEXACTEXCEPTION);
2613 else Set_inexactflag();
2614 return (OPC_2E_OVERFLOWEXCEPTION);
2615 }
2616 inexact = TRUE;
2617 Set_overflowflag();
2618 Sgl_setoverflow(resultp1);
2619 } else if (result_exponent <= 0) { /* underflow case */
2620 if (Is_underflowtrap_enabled()) {
2621 /*
2622 * Adjust bias of result
2623 */
2624 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2625 Sgl_copytoptr(resultp1,dstptr);
2626 if (inexact)
2627 if (Is_inexacttrap_enabled())
2628 return (OPC_2E_UNDERFLOWEXCEPTION |
2629 OPC_2E_INEXACTEXCEPTION);
2630 else Set_inexactflag();
2631 return(OPC_2E_UNDERFLOWEXCEPTION);
2632 }
2633 else if (inexact && is_tiny) Set_underflowflag();
2634 }
2635 else Sgl_set_exponent(resultp1,result_exponent);
2636 Sgl_copytoptr(resultp1,dstptr);
2637 if (inexact)
2638 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2639 else Set_inexactflag();
2640 return(NOEXCEPTION);
2641}
2642