Blame - arch/parisc/math-emu/fmpyfadd.c - SHIFTPHONES/mainline/linux

blob: 029f9bbfe7b5b93145280f8ad6e06669d1c420e3 [file] [log] [blame]

Thomas Gleixner	660662f	2019-05-24 12:04:10 +0200	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
				3	* Linux/PA-RISC Project (http://www.parisc-linux.org/)
				4	*
				5	* Floating-point emulation code
				6	* Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	*/
				8	/*
				9	* BEGIN_DESC
				10	*
				11	* File:
				12	* @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $
				13	*
				14	* Purpose:
				15	* Double Floating-point Multiply Fused Add
				16	* Double Floating-point Multiply Negate Fused Add
				17	* Single Floating-point Multiply Fused Add
				18	* Single Floating-point Multiply Negate Fused Add
				19	*
				20	* External Interfaces:
				21	* dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				22	* dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				23	* sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				24	* sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				25	*
				26	* Internal Interfaces:
				27	*
				28	* Theory:
				29	* <<please update with a overview of the operation of this file>>
				30	*
				31	* END_DESC
				32	*/
				33
				34
				35	#include "float.h"
				36	#include "sgl_float.h"
				37	#include "dbl_float.h"
				38
				39
				40	/*
				41	* Double Floating-point Multiply Fused Add
				42	*/
				43
				44	int
				45	dbl_fmpyfadd(
				46	dbl_floating_point *src1ptr,
				47	dbl_floating_point *src2ptr,
				48	dbl_floating_point *src3ptr,
				49	unsigned int *status,
				50	dbl_floating_point *dstptr)
				51	{
				52	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
				53	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
				54	unsigned int rightp1, rightp2, rightp3, rightp4;
				55	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
				56	register int mpy_exponent, add_exponent, count;
				57	boolean inexact = FALSE, is_tiny = FALSE;
				58
				59	unsigned int signlessleft1, signlessright1, save;
				60	register int result_exponent, diff_exponent;
				61	int sign_save, jumpsize;
				62
				63	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
				64	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
				65	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
				66
				67	/*
				68	* set sign bit of result of multiply
				69	*/
				70	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
				71	Dbl_setnegativezerop1(resultp1);
				72	else Dbl_setzerop1(resultp1);
				73
				74	/*
				75	* Generate multiply exponent
				76	*/
				77	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
				78
				79	/*
				80	* check first operand for NaN's or infinity
				81	*/
				82	if (Dbl_isinfinity_exponent(opnd1p1)) {
				83	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				84	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
				85	Dbl_isnotnan(opnd3p1,opnd3p2)) {
				86	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
				87	/*
				88	* invalid since operands are infinity
				89	* and zero
				90	*/
				91	if (Is_invalidtrap_enabled())
				92	return(OPC_2E_INVALIDEXCEPTION);
				93	Set_invalidflag();
				94	Dbl_makequietnan(resultp1,resultp2);
				95	Dbl_copytoptr(resultp1,resultp2,dstptr);
				96	return(NOEXCEPTION);
				97	}
				98	/*
				99	* Check third operand for infinity with a
				100	* sign opposite of the multiply result
				101	*/
				102	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				103	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				104	/*
				105	* invalid since attempting a magnitude
				106	* subtraction of infinities
				107	*/
				108	if (Is_invalidtrap_enabled())
				109	return(OPC_2E_INVALIDEXCEPTION);
				110	Set_invalidflag();
				111	Dbl_makequietnan(resultp1,resultp2);
				112	Dbl_copytoptr(resultp1,resultp2,dstptr);
				113	return(NOEXCEPTION);
				114	}
				115
				116	/*
				117	* return infinity
				118	*/
				119	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				120	Dbl_copytoptr(resultp1,resultp2,dstptr);
				121	return(NOEXCEPTION);
				122	}
				123	}
				124	else {
				125	/*
				126	* is NaN; signaling or quiet?
				127	*/
				128	if (Dbl_isone_signaling(opnd1p1)) {
				129	/* trap if INVALIDTRAP enabled */
				130	if (Is_invalidtrap_enabled())
				131	return(OPC_2E_INVALIDEXCEPTION);
				132	/* make NaN quiet */
				133	Set_invalidflag();
				134	Dbl_set_quiet(opnd1p1);
				135	}
				136	/*
				137	* is second operand a signaling NaN?
				138	*/
				139	else if (Dbl_is_signalingnan(opnd2p1)) {
				140	/* trap if INVALIDTRAP enabled */
				141	if (Is_invalidtrap_enabled())
				142	return(OPC_2E_INVALIDEXCEPTION);
				143	/* make NaN quiet */
				144	Set_invalidflag();
				145	Dbl_set_quiet(opnd2p1);
				146	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				147	return(NOEXCEPTION);
				148	}
				149	/*
				150	* is third operand a signaling NaN?
				151	*/
				152	else if (Dbl_is_signalingnan(opnd3p1)) {
				153	/* trap if INVALIDTRAP enabled */
				154	if (Is_invalidtrap_enabled())
				155	return(OPC_2E_INVALIDEXCEPTION);
				156	/* make NaN quiet */
				157	Set_invalidflag();
				158	Dbl_set_quiet(opnd3p1);
				159	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				160	return(NOEXCEPTION);
				161	}
				162	/*
				163	* return quiet NaN
				164	*/
				165	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
				166	return(NOEXCEPTION);
				167	}
				168	}
				169
				170	/*
				171	* check second operand for NaN's or infinity
				172	*/
				173	if (Dbl_isinfinity_exponent(opnd2p1)) {
				174	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				175	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
				176	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
				177	/*
				178	* invalid since multiply operands are
				179	* zero & infinity
				180	*/
				181	if (Is_invalidtrap_enabled())
				182	return(OPC_2E_INVALIDEXCEPTION);
				183	Set_invalidflag();
				184	Dbl_makequietnan(opnd2p1,opnd2p2);
				185	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				186	return(NOEXCEPTION);
				187	}
				188
				189	/*
				190	* Check third operand for infinity with a
				191	* sign opposite of the multiply result
				192	*/
				193	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				194	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				195	/*
				196	* invalid since attempting a magnitude
				197	* subtraction of infinities
				198	*/
				199	if (Is_invalidtrap_enabled())
				200	return(OPC_2E_INVALIDEXCEPTION);
				201	Set_invalidflag();
				202	Dbl_makequietnan(resultp1,resultp2);
				203	Dbl_copytoptr(resultp1,resultp2,dstptr);
				204	return(NOEXCEPTION);
				205	}
				206
				207	/*
				208	* return infinity
				209	*/
				210	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				211	Dbl_copytoptr(resultp1,resultp2,dstptr);
				212	return(NOEXCEPTION);
				213	}
				214	}
				215	else {
				216	/*
				217	* is NaN; signaling or quiet?
				218	*/
				219	if (Dbl_isone_signaling(opnd2p1)) {
				220	/* trap if INVALIDTRAP enabled */
				221	if (Is_invalidtrap_enabled())
				222	return(OPC_2E_INVALIDEXCEPTION);
				223	/* make NaN quiet */
				224	Set_invalidflag();
				225	Dbl_set_quiet(opnd2p1);
				226	}
				227	/*
				228	* is third operand a signaling NaN?
				229	*/
				230	else if (Dbl_is_signalingnan(opnd3p1)) {
				231	/* trap if INVALIDTRAP enabled */
				232	if (Is_invalidtrap_enabled())
				233	return(OPC_2E_INVALIDEXCEPTION);
				234	/* make NaN quiet */
				235	Set_invalidflag();
				236	Dbl_set_quiet(opnd3p1);
				237	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				238	return(NOEXCEPTION);
				239	}
				240	/*
				241	* return quiet NaN
				242	*/
				243	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				244	return(NOEXCEPTION);
				245	}
				246	}
				247
				248	/*
				249	* check third operand for NaN's or infinity
				250	*/
				251	if (Dbl_isinfinity_exponent(opnd3p1)) {
				252	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				253	/* return infinity */
				254	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				255	return(NOEXCEPTION);
				256	} else {
				257	/*
				258	* is NaN; signaling or quiet?
				259	*/
				260	if (Dbl_isone_signaling(opnd3p1)) {
				261	/* trap if INVALIDTRAP enabled */
				262	if (Is_invalidtrap_enabled())
				263	return(OPC_2E_INVALIDEXCEPTION);
				264	/* make NaN quiet */
				265	Set_invalidflag();
				266	Dbl_set_quiet(opnd3p1);
				267	}
				268	/*
				269	* return quiet NaN
				270	*/
				271	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				272	return(NOEXCEPTION);
				273	}
				274	}
				275
				276	/*
				277	* Generate multiply mantissa
				278	*/
				279	if (Dbl_isnotzero_exponent(opnd1p1)) {
				280	/* set hidden bit */
				281	Dbl_clear_signexponent_set_hidden(opnd1p1);
				282	}
				283	else {
				284	/* check for zero */
				285	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				286	/*
				287	* Perform the add opnd3 with zero here.
				288	*/
				289	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				290	if (Is_rounding_mode(ROUNDMINUS)) {
				291	Dbl_or_signs(opnd3p1,resultp1);
				292	} else {
				293	Dbl_and_signs(opnd3p1,resultp1);
				294	}
				295	}
				296	/*
				297	* Now let's check for trapped underflow case.
				298	*/
				299	else if (Dbl_iszero_exponent(opnd3p1) &&
				300	Is_underflowtrap_enabled()) {
				301	/* need to normalize results mantissa */
				302	sign_save = Dbl_signextendedsign(opnd3p1);
				303	result_exponent = 0;
				304	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				305	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				306	Dbl_set_sign(opnd3p1,/using/sign_save);
				307	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				308	unfl);
				309	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				310	/* inexact = FALSE */
				311	return(OPC_2E_UNDERFLOWEXCEPTION);
				312	}
				313	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				314	return(NOEXCEPTION);
				315	}
				316	/* is denormalized, adjust exponent */
				317	Dbl_clear_signexponent(opnd1p1);
				318	Dbl_leftshiftby1(opnd1p1,opnd1p2);
				319	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
				320	}
				321	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				322	if (Dbl_isnotzero_exponent(opnd2p1)) {
				323	Dbl_clear_signexponent_set_hidden(opnd2p1);
				324	}
				325	else {
				326	/* check for zero */
				327	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				328	/*
				329	* Perform the add opnd3 with zero here.
				330	*/
				331	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				332	if (Is_rounding_mode(ROUNDMINUS)) {
				333	Dbl_or_signs(opnd3p1,resultp1);
				334	} else {
				335	Dbl_and_signs(opnd3p1,resultp1);
				336	}
				337	}
				338	/*
				339	* Now let's check for trapped underflow case.
				340	*/
				341	else if (Dbl_iszero_exponent(opnd3p1) &&
				342	Is_underflowtrap_enabled()) {
				343	/* need to normalize results mantissa */
				344	sign_save = Dbl_signextendedsign(opnd3p1);
				345	result_exponent = 0;
				346	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				347	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				348	Dbl_set_sign(opnd3p1,/using/sign_save);
				349	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				350	unfl);
				351	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				352	/* inexact = FALSE */
				353	return(OPC_2E_UNDERFLOWEXCEPTION);
				354	}
				355	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				356	return(NOEXCEPTION);
				357	}
				358	/* is denormalized; want to normalize */
				359	Dbl_clear_signexponent(opnd2p1);
				360	Dbl_leftshiftby1(opnd2p1,opnd2p2);
				361	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
				362	}
				363
				364	/* Multiply the first two source mantissas together */
				365
				366	/*
				367	* The intermediate result will be kept in tmpres,
				368	* which needs enough room for 106 bits of mantissa,
				369	* so lets call it a Double extended.
				370	*/
				371	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				372
				373	/*
				374	* Four bits at a time are inspected in each loop, and a
				375	* simple shift and add multiply algorithm is used.
				376	*/
				377	for (count = DBL_P-1; count >= 0; count -= 4) {
				378	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				379	if (Dbit28p2(opnd1p2)) {
				380	/* Fourword_add should be an ADD followed by 3 ADDC's */
				381	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				382	opnd2p1<<3 \| opnd2p2>>29, opnd2p2<<3, 0, 0);
				383	}
				384	if (Dbit29p2(opnd1p2)) {
				385	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				386	opnd2p1<<2 \| opnd2p2>>30, opnd2p2<<2, 0, 0);
				387	}
				388	if (Dbit30p2(opnd1p2)) {
				389	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				390	opnd2p1<<1 \| opnd2p2>>31, opnd2p2<<1, 0, 0);
				391	}
				392	if (Dbit31p2(opnd1p2)) {
				393	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				394	opnd2p1, opnd2p2, 0, 0);
				395	}
				396	Dbl_rightshiftby4(opnd1p1,opnd1p2);
				397	}
				398	if (Is_dexthiddenoverflow(tmpresp1)) {
				399	/* result mantissa >= 2 (mantissa overflow) */
				400	mpy_exponent++;
				401	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				402	}
				403
				404	/*
				405	* Restore the sign of the mpy result which was saved in resultp1.
				406	* The exponent will continue to be kept in mpy_exponent.
				407	*/
				408	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
				409
				410	/*
				411	* No rounding is required, since the result of the multiply
				412	* is exact in the extended format.
				413	*/
				414
				415	/*
				416	* Now we are ready to perform the add portion of the operation.
				417	*
				418	* The exponents need to be kept as integers for now, since the
				419	* multiply result might not fit into the exponent field. We
				420	* can't overflow or underflow because of this yet, since the
				421	* add could bring the final result back into range.
				422	*/
				423	add_exponent = Dbl_exponent(opnd3p1);
				424
				425	/*
				426	* Check for denormalized or zero add operand.
				427	*/
				428	if (add_exponent == 0) {
				429	/* check for zero */
				430	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				431	/* right is zero */
				432	/* Left can't be zero and must be result.
				433	*
				434	* The final result is now in tmpres and mpy_exponent,
				435	* and needs to be rounded and squeezed back into
				436	* double precision format from double extended.
				437	*/
				438	result_exponent = mpy_exponent;
				439	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				440	resultp1,resultp2,resultp3,resultp4);
				441	sign_save = Dbl_signextendedsign(resultp1);/save sign/
				442	goto round;
				443	}
				444
				445	/*
				446	* Neither are zeroes.
				447	* Adjust exponent and normalize add operand.
				448	*/
				449	sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
				450	Dbl_clear_signexponent(opnd3p1);
				451	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				452	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
				453	Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
				454	} else {
				455	Dbl_clear_exponent_set_hidden(opnd3p1);
				456	}
				457	/*
				458	* Copy opnd3 to the double extended variable called right.
				459	*/
				460	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
				461
				462	/*
				463	* A zero "save" helps discover equal operands (for later),
				464	* and is used in swapping operands (if needed).
				465	*/
				466	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
				467
				468	/*
				469	* Compare magnitude of operands.
				470	*/
				471	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
				472	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
				473	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				474	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
				475	/*
				476	* Set the left operand to the larger one by XOR swap.
				477	* First finish the first word "save".
				478	*/
				479	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
				480	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				481	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
				482	rightp2,rightp3,rightp4);
				483	/* also setup exponents used in rest of routine */
				484	diff_exponent = add_exponent - mpy_exponent;
				485	result_exponent = add_exponent;
				486	} else {
				487	/* also setup exponents used in rest of routine */
				488	diff_exponent = mpy_exponent - add_exponent;
				489	result_exponent = mpy_exponent;
				490	}
				491	/* Invariant: left is not smaller than right. */
				492
				493	/*
				494	* Special case alignment of operands that would force alignment
				495	* beyond the extent of the extension. A further optimization
				496	* could special case this but only reduces the path length for
				497	* this infrequent case.
				498	*/
				499	if (diff_exponent > DBLEXT_THRESHOLD) {
				500	diff_exponent = DBLEXT_THRESHOLD;
				501	}
				502
				503	/* Align right operand by shifting it to the right */
				504	Dblext_clear_sign(rightp1);
				505	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
				506	/shifted by/diff_exponent);
				507
				508	/* Treat sum and difference of the operands separately. */
				509	if ((int)save < 0) {
				510	/*
				511	* Difference of the two operands. Overflow can occur if the
				512	* multiply overflowed. A borrow can occur out of the hidden
				513	* bit and force a post normalization phase.
				514	*/
				515	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				516	rightp1,rightp2,rightp3,rightp4,
				517	resultp1,resultp2,resultp3,resultp4);
				518	sign_save = Dbl_signextendedsign(resultp1);
				519	if (Dbl_iszero_hidden(resultp1)) {
				520	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	521	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	522	* result and extension left until the hidden bit
				523	* becomes one. Not all of the extension bits need
				524	* participate in the shift. Only the two most
				525	* significant bits (round and guard) are needed.
				526	* If only a single shift is needed then the guard
				527	* bit becomes a significant low order bit and the
				528	* extension must participate in the rounding.
				529	* If more than a single shift is needed, then all
				530	* bits to the right of the guard bit are zeros,
				531	* and the guard bit may or may not be zero. */
				532	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				533	resultp4);
				534
				535	/* Need to check for a zero result. The sign and
				536	* exponent fields have already been zeroed. The more
				537	* efficient test of the full object can be used.
				538	*/
				539	if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
				540	/* Must have been "x-x" or "x+(-x)". */
				541	if (Is_rounding_mode(ROUNDMINUS))
				542	Dbl_setone_sign(resultp1);
				543	Dbl_copytoptr(resultp1,resultp2,dstptr);
				544	return(NOEXCEPTION);
				545	}
				546	result_exponent--;
				547
				548	/* Look to see if normalization is finished. */
				549	if (Dbl_isone_hidden(resultp1)) {
				550	/* No further normalization is needed */
				551	goto round;
				552	}
				553
				554	/* Discover first one bit to determine shift amount.
				555	* Use a modified binary search. We have already
				556	* shifted the result one position right and still
				557	* not found a one so the remainder of the extension
				558	* must be zero and simplifies rounding. */
				559	/* Scan bytes */
				560	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
				561	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
				562	result_exponent -= 8;
				563	}
				564	/* Now narrow it down to the nibble */
				565	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
				566	/* The lower nibble contains the
				567	* normalizing one */
				568	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
				569	result_exponent -= 4;
				570	}
				571	/* Select case where first bit is set (already
				572	* normalized) otherwise select the proper shift. */
				573	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
				574	if (jumpsize <= 7) switch(jumpsize) {
				575	case 1:
				576	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
				577	resultp4);
				578	result_exponent -= 3;
				579	break;
				580	case 2:
				581	case 3:
				582	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
				583	resultp4);
				584	result_exponent -= 2;
				585	break;
				586	case 4:
				587	case 5:
				588	case 6:
				589	case 7:
				590	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				591	resultp4);
				592	result_exponent -= 1;
				593	break;
				594	}
				595	} /* end if (hidden...)... */
				596	/* Fall through and round */
				597	} /* end if (save < 0)... */
				598	else {
				599	/* Add magnitudes */
				600	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				601	rightp1,rightp2,rightp3,rightp4,
				602	/to/resultp1,resultp2,resultp3,resultp4);
				603	sign_save = Dbl_signextendedsign(resultp1);
				604	if (Dbl_isone_hiddenoverflow(resultp1)) {
				605	/* Prenormalization required. */
				606	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
				607	resultp4);
				608	result_exponent++;
				609	} /* end if hiddenoverflow... */
				610	} /* end else ...add magnitudes... */
				611
				612	/* Round the result. If the extension and lower two words are
				613	* all zeros, then the result is exact. Otherwise round in the
				614	* correct direction. Underflow is possible. If a postnormalization
				615	* is necessary, then the mantissa is all zeros so no shift is needed.
				616	*/
				617	round:
				618	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				619	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
				620	result_exponent,is_tiny);
				621	}
				622	Dbl_set_sign(resultp1,/using/sign_save);
				623	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
				624	Dblext_isnotzero_mantissap4(resultp4)) {
				625	inexact = TRUE;
				626	switch(Rounding_mode()) {
				627	case ROUNDNEAREST: /* The default. */
				628	if (Dblext_isone_highp3(resultp3)) {
				629	/* at least 1/2 ulp */
				630	if (Dblext_isnotzero_low31p3(resultp3) \|\|
				631	Dblext_isnotzero_mantissap4(resultp4) \|\|
				632	Dblext_isone_lowp2(resultp2)) {
				633	/* either exactly half way and odd or
				634	* more than 1/2ulp */
				635	Dbl_increment(resultp1,resultp2);
				636	}
				637	}
				638	break;
				639
				640	case ROUNDPLUS:
				641	if (Dbl_iszero_sign(resultp1)) {
				642	/* Round up positive results */
				643	Dbl_increment(resultp1,resultp2);
				644	}
				645	break;
				646
				647	case ROUNDMINUS:
				648	if (Dbl_isone_sign(resultp1)) {
				649	/* Round down negative results */
				650	Dbl_increment(resultp1,resultp2);
				651	}
				652
				653	case ROUNDZERO:;
				654	/* truncate is simple */
				655	} /* end switch... */
				656	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
				657	}
				658	if (result_exponent >= DBL_INFINITY_EXPONENT) {
				659	/* trap if OVERFLOWTRAP enabled */
				660	if (Is_overflowtrap_enabled()) {
				661	/*
				662	* Adjust bias of result
				663	*/
				664	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				665	Dbl_copytoptr(resultp1,resultp2,dstptr);
				666	if (inexact)
				667	if (Is_inexacttrap_enabled())
				668	return (OPC_2E_OVERFLOWEXCEPTION \|
				669	OPC_2E_INEXACTEXCEPTION);
				670	else Set_inexactflag();
				671	return (OPC_2E_OVERFLOWEXCEPTION);
				672	}
				673	inexact = TRUE;
				674	Set_overflowflag();
				675	/* set result to infinity or largest number */
				676	Dbl_setoverflow(resultp1,resultp2);
				677
				678	} else if (result_exponent <= 0) { /* underflow case */
				679	if (Is_underflowtrap_enabled()) {
				680	/*
				681	* Adjust bias of result
				682	*/
				683	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
				684	Dbl_copytoptr(resultp1,resultp2,dstptr);
				685	if (inexact)
				686	if (Is_inexacttrap_enabled())
				687	return (OPC_2E_UNDERFLOWEXCEPTION \|
				688	OPC_2E_INEXACTEXCEPTION);
				689	else Set_inexactflag();
				690	return(OPC_2E_UNDERFLOWEXCEPTION);
				691	}
				692	else if (inexact && is_tiny) Set_underflowflag();
				693	}
				694	else Dbl_set_exponent(resultp1,result_exponent);
				695	Dbl_copytoptr(resultp1,resultp2,dstptr);
				696	if (inexact)
				697	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				698	else Set_inexactflag();
				699	return(NOEXCEPTION);
				700	}
				701
				702	/*
				703	* Double Floating-point Multiply Negate Fused Add
				704	*/
				705
				706	dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				707
				708	dbl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				709	unsigned int *status;
				710	{
				711	unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
				712	register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
				713	unsigned int rightp1, rightp2, rightp3, rightp4;
				714	unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
				715	register int mpy_exponent, add_exponent, count;
				716	boolean inexact = FALSE, is_tiny = FALSE;
				717
				718	unsigned int signlessleft1, signlessright1, save;
				719	register int result_exponent, diff_exponent;
				720	int sign_save, jumpsize;
				721
				722	Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
				723	Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
				724	Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
				725
				726	/*
				727	* set sign bit of result of multiply
				728	*/
				729	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
				730	Dbl_setzerop1(resultp1);
				731	else
				732	Dbl_setnegativezerop1(resultp1);
				733
				734	/*
				735	* Generate multiply exponent
				736	*/
				737	mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
				738
				739	/*
				740	* check first operand for NaN's or infinity
				741	*/
				742	if (Dbl_isinfinity_exponent(opnd1p1)) {
				743	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				744	if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
				745	Dbl_isnotnan(opnd3p1,opnd3p2)) {
				746	if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
				747	/*
				748	* invalid since operands are infinity
				749	* and zero
				750	*/
				751	if (Is_invalidtrap_enabled())
				752	return(OPC_2E_INVALIDEXCEPTION);
				753	Set_invalidflag();
				754	Dbl_makequietnan(resultp1,resultp2);
				755	Dbl_copytoptr(resultp1,resultp2,dstptr);
				756	return(NOEXCEPTION);
				757	}
				758	/*
				759	* Check third operand for infinity with a
				760	* sign opposite of the multiply result
				761	*/
				762	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				763	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				764	/*
				765	* invalid since attempting a magnitude
				766	* subtraction of infinities
				767	*/
				768	if (Is_invalidtrap_enabled())
				769	return(OPC_2E_INVALIDEXCEPTION);
				770	Set_invalidflag();
				771	Dbl_makequietnan(resultp1,resultp2);
				772	Dbl_copytoptr(resultp1,resultp2,dstptr);
				773	return(NOEXCEPTION);
				774	}
				775
				776	/*
				777	* return infinity
				778	*/
				779	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				780	Dbl_copytoptr(resultp1,resultp2,dstptr);
				781	return(NOEXCEPTION);
				782	}
				783	}
				784	else {
				785	/*
				786	* is NaN; signaling or quiet?
				787	*/
				788	if (Dbl_isone_signaling(opnd1p1)) {
				789	/* trap if INVALIDTRAP enabled */
				790	if (Is_invalidtrap_enabled())
				791	return(OPC_2E_INVALIDEXCEPTION);
				792	/* make NaN quiet */
				793	Set_invalidflag();
				794	Dbl_set_quiet(opnd1p1);
				795	}
				796	/*
				797	* is second operand a signaling NaN?
				798	*/
				799	else if (Dbl_is_signalingnan(opnd2p1)) {
				800	/* trap if INVALIDTRAP enabled */
				801	if (Is_invalidtrap_enabled())
				802	return(OPC_2E_INVALIDEXCEPTION);
				803	/* make NaN quiet */
				804	Set_invalidflag();
				805	Dbl_set_quiet(opnd2p1);
				806	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				807	return(NOEXCEPTION);
				808	}
				809	/*
				810	* is third operand a signaling NaN?
				811	*/
				812	else if (Dbl_is_signalingnan(opnd3p1)) {
				813	/* trap if INVALIDTRAP enabled */
				814	if (Is_invalidtrap_enabled())
				815	return(OPC_2E_INVALIDEXCEPTION);
				816	/* make NaN quiet */
				817	Set_invalidflag();
				818	Dbl_set_quiet(opnd3p1);
				819	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				820	return(NOEXCEPTION);
				821	}
				822	/*
				823	* return quiet NaN
				824	*/
				825	Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
				826	return(NOEXCEPTION);
				827	}
				828	}
				829
				830	/*
				831	* check second operand for NaN's or infinity
				832	*/
				833	if (Dbl_isinfinity_exponent(opnd2p1)) {
				834	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				835	if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
				836	if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
				837	/*
				838	* invalid since multiply operands are
				839	* zero & infinity
				840	*/
				841	if (Is_invalidtrap_enabled())
				842	return(OPC_2E_INVALIDEXCEPTION);
				843	Set_invalidflag();
				844	Dbl_makequietnan(opnd2p1,opnd2p2);
				845	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				846	return(NOEXCEPTION);
				847	}
				848
				849	/*
				850	* Check third operand for infinity with a
				851	* sign opposite of the multiply result
				852	*/
				853	if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
				854	(Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
				855	/*
				856	* invalid since attempting a magnitude
				857	* subtraction of infinities
				858	*/
				859	if (Is_invalidtrap_enabled())
				860	return(OPC_2E_INVALIDEXCEPTION);
				861	Set_invalidflag();
				862	Dbl_makequietnan(resultp1,resultp2);
				863	Dbl_copytoptr(resultp1,resultp2,dstptr);
				864	return(NOEXCEPTION);
				865	}
				866
				867	/*
				868	* return infinity
				869	*/
				870	Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
				871	Dbl_copytoptr(resultp1,resultp2,dstptr);
				872	return(NOEXCEPTION);
				873	}
				874	}
				875	else {
				876	/*
				877	* is NaN; signaling or quiet?
				878	*/
				879	if (Dbl_isone_signaling(opnd2p1)) {
				880	/* trap if INVALIDTRAP enabled */
				881	if (Is_invalidtrap_enabled())
				882	return(OPC_2E_INVALIDEXCEPTION);
				883	/* make NaN quiet */
				884	Set_invalidflag();
				885	Dbl_set_quiet(opnd2p1);
				886	}
				887	/*
				888	* is third operand a signaling NaN?
				889	*/
				890	else if (Dbl_is_signalingnan(opnd3p1)) {
				891	/* trap if INVALIDTRAP enabled */
				892	if (Is_invalidtrap_enabled())
				893	return(OPC_2E_INVALIDEXCEPTION);
				894	/* make NaN quiet */
				895	Set_invalidflag();
				896	Dbl_set_quiet(opnd3p1);
				897	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				898	return(NOEXCEPTION);
				899	}
				900	/*
				901	* return quiet NaN
				902	*/
				903	Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
				904	return(NOEXCEPTION);
				905	}
				906	}
				907
				908	/*
				909	* check third operand for NaN's or infinity
				910	*/
				911	if (Dbl_isinfinity_exponent(opnd3p1)) {
				912	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				913	/* return infinity */
				914	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				915	return(NOEXCEPTION);
				916	} else {
				917	/*
				918	* is NaN; signaling or quiet?
				919	*/
				920	if (Dbl_isone_signaling(opnd3p1)) {
				921	/* trap if INVALIDTRAP enabled */
				922	if (Is_invalidtrap_enabled())
				923	return(OPC_2E_INVALIDEXCEPTION);
				924	/* make NaN quiet */
				925	Set_invalidflag();
				926	Dbl_set_quiet(opnd3p1);
				927	}
				928	/*
				929	* return quiet NaN
				930	*/
				931	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				932	return(NOEXCEPTION);
				933	}
				934	}
				935
				936	/*
				937	* Generate multiply mantissa
				938	*/
				939	if (Dbl_isnotzero_exponent(opnd1p1)) {
				940	/* set hidden bit */
				941	Dbl_clear_signexponent_set_hidden(opnd1p1);
				942	}
				943	else {
				944	/* check for zero */
				945	if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
				946	/*
				947	* Perform the add opnd3 with zero here.
				948	*/
				949	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				950	if (Is_rounding_mode(ROUNDMINUS)) {
				951	Dbl_or_signs(opnd3p1,resultp1);
				952	} else {
				953	Dbl_and_signs(opnd3p1,resultp1);
				954	}
				955	}
				956	/*
				957	* Now let's check for trapped underflow case.
				958	*/
				959	else if (Dbl_iszero_exponent(opnd3p1) &&
				960	Is_underflowtrap_enabled()) {
				961	/* need to normalize results mantissa */
				962	sign_save = Dbl_signextendedsign(opnd3p1);
				963	result_exponent = 0;
				964	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				965	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				966	Dbl_set_sign(opnd3p1,/using/sign_save);
				967	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				968	unfl);
				969	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				970	/* inexact = FALSE */
				971	return(OPC_2E_UNDERFLOWEXCEPTION);
				972	}
				973	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				974	return(NOEXCEPTION);
				975	}
				976	/* is denormalized, adjust exponent */
				977	Dbl_clear_signexponent(opnd1p1);
				978	Dbl_leftshiftby1(opnd1p1,opnd1p2);
				979	Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
				980	}
				981	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				982	if (Dbl_isnotzero_exponent(opnd2p1)) {
				983	Dbl_clear_signexponent_set_hidden(opnd2p1);
				984	}
				985	else {
				986	/* check for zero */
				987	if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
				988	/*
				989	* Perform the add opnd3 with zero here.
				990	*/
				991	if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
				992	if (Is_rounding_mode(ROUNDMINUS)) {
				993	Dbl_or_signs(opnd3p1,resultp1);
				994	} else {
				995	Dbl_and_signs(opnd3p1,resultp1);
				996	}
				997	}
				998	/*
				999	* Now let's check for trapped underflow case.
				1000	*/
				1001	else if (Dbl_iszero_exponent(opnd3p1) &&
				1002	Is_underflowtrap_enabled()) {
				1003	/* need to normalize results mantissa */
				1004	sign_save = Dbl_signextendedsign(opnd3p1);
				1005	result_exponent = 0;
				1006	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				1007	Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
				1008	Dbl_set_sign(opnd3p1,/using/sign_save);
				1009	Dbl_setwrapped_exponent(opnd3p1,result_exponent,
				1010	unfl);
				1011	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				1012	/* inexact = FALSE */
				1013	return(OPC_2E_UNDERFLOWEXCEPTION);
				1014	}
				1015	Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
				1016	return(NOEXCEPTION);
				1017	}
				1018	/* is denormalized; want to normalize */
				1019	Dbl_clear_signexponent(opnd2p1);
				1020	Dbl_leftshiftby1(opnd2p1,opnd2p2);
				1021	Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
				1022	}
				1023
				1024	/* Multiply the first two source mantissas together */
				1025
				1026	/*
				1027	* The intermediate result will be kept in tmpres,
				1028	* which needs enough room for 106 bits of mantissa,
				1029	* so lets call it a Double extended.
				1030	*/
				1031	Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1032
				1033	/*
				1034	* Four bits at a time are inspected in each loop, and a
				1035	* simple shift and add multiply algorithm is used.
				1036	*/
				1037	for (count = DBL_P-1; count >= 0; count -= 4) {
				1038	Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1039	if (Dbit28p2(opnd1p2)) {
				1040	/* Fourword_add should be an ADD followed by 3 ADDC's */
				1041	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1042	opnd2p1<<3 \| opnd2p2>>29, opnd2p2<<3, 0, 0);
				1043	}
				1044	if (Dbit29p2(opnd1p2)) {
				1045	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1046	opnd2p1<<2 \| opnd2p2>>30, opnd2p2<<2, 0, 0);
				1047	}
				1048	if (Dbit30p2(opnd1p2)) {
				1049	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1050	opnd2p1<<1 \| opnd2p2>>31, opnd2p2<<1, 0, 0);
				1051	}
				1052	if (Dbit31p2(opnd1p2)) {
				1053	Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
				1054	opnd2p1, opnd2p2, 0, 0);
				1055	}
				1056	Dbl_rightshiftby4(opnd1p1,opnd1p2);
				1057	}
				1058	if (Is_dexthiddenoverflow(tmpresp1)) {
				1059	/* result mantissa >= 2 (mantissa overflow) */
				1060	mpy_exponent++;
				1061	Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
				1062	}
				1063
				1064	/*
				1065	* Restore the sign of the mpy result which was saved in resultp1.
				1066	* The exponent will continue to be kept in mpy_exponent.
				1067	*/
				1068	Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
				1069
				1070	/*
				1071	* No rounding is required, since the result of the multiply
				1072	* is exact in the extended format.
				1073	*/
				1074
				1075	/*
				1076	* Now we are ready to perform the add portion of the operation.
				1077	*
				1078	* The exponents need to be kept as integers for now, since the
				1079	* multiply result might not fit into the exponent field. We
				1080	* can't overflow or underflow because of this yet, since the
				1081	* add could bring the final result back into range.
				1082	*/
				1083	add_exponent = Dbl_exponent(opnd3p1);
				1084
				1085	/*
				1086	* Check for denormalized or zero add operand.
				1087	*/
				1088	if (add_exponent == 0) {
				1089	/* check for zero */
				1090	if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
				1091	/* right is zero */
				1092	/* Left can't be zero and must be result.
				1093	*
				1094	* The final result is now in tmpres and mpy_exponent,
				1095	* and needs to be rounded and squeezed back into
				1096	* double precision format from double extended.
				1097	*/
				1098	result_exponent = mpy_exponent;
				1099	Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1100	resultp1,resultp2,resultp3,resultp4);
				1101	sign_save = Dbl_signextendedsign(resultp1);/save sign/
				1102	goto round;
				1103	}
				1104
				1105	/*
				1106	* Neither are zeroes.
				1107	* Adjust exponent and normalize add operand.
				1108	*/
				1109	sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */
				1110	Dbl_clear_signexponent(opnd3p1);
				1111	Dbl_leftshiftby1(opnd3p1,opnd3p2);
				1112	Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
				1113	Dbl_set_sign(opnd3p1,sign_save); /* restore sign */
				1114	} else {
				1115	Dbl_clear_exponent_set_hidden(opnd3p1);
				1116	}
				1117	/*
				1118	* Copy opnd3 to the double extended variable called right.
				1119	*/
				1120	Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
				1121
				1122	/*
				1123	* A zero "save" helps discover equal operands (for later),
				1124	* and is used in swapping operands (if needed).
				1125	*/
				1126	Dblext_xortointp1(tmpresp1,rightp1,/to/save);
				1127
				1128	/*
				1129	* Compare magnitude of operands.
				1130	*/
				1131	Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
				1132	Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
				1133	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				1134	Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
				1135	/*
				1136	* Set the left operand to the larger one by XOR swap.
				1137	* First finish the first word "save".
				1138	*/
				1139	Dblext_xorfromintp1(save,rightp1,/to/rightp1);
				1140	Dblext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				1141	Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
				1142	rightp2,rightp3,rightp4);
				1143	/* also setup exponents used in rest of routine */
				1144	diff_exponent = add_exponent - mpy_exponent;
				1145	result_exponent = add_exponent;
				1146	} else {
				1147	/* also setup exponents used in rest of routine */
				1148	diff_exponent = mpy_exponent - add_exponent;
				1149	result_exponent = mpy_exponent;
				1150	}
				1151	/* Invariant: left is not smaller than right. */
				1152
				1153	/*
				1154	* Special case alignment of operands that would force alignment
				1155	* beyond the extent of the extension. A further optimization
				1156	* could special case this but only reduces the path length for
				1157	* this infrequent case.
				1158	*/
				1159	if (diff_exponent > DBLEXT_THRESHOLD) {
				1160	diff_exponent = DBLEXT_THRESHOLD;
				1161	}
				1162
				1163	/* Align right operand by shifting it to the right */
				1164	Dblext_clear_sign(rightp1);
				1165	Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
				1166	/shifted by/diff_exponent);
				1167
				1168	/* Treat sum and difference of the operands separately. */
				1169	if ((int)save < 0) {
				1170	/*
				1171	* Difference of the two operands. Overflow can occur if the
				1172	* multiply overflowed. A borrow can occur out of the hidden
				1173	* bit and force a post normalization phase.
				1174	*/
				1175	Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1176	rightp1,rightp2,rightp3,rightp4,
				1177	resultp1,resultp2,resultp3,resultp4);
				1178	sign_save = Dbl_signextendedsign(resultp1);
				1179	if (Dbl_iszero_hidden(resultp1)) {
				1180	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1181	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1182	* result and extension left until the hidden bit
				1183	* becomes one. Not all of the extension bits need
				1184	* participate in the shift. Only the two most
				1185	* significant bits (round and guard) are needed.
				1186	* If only a single shift is needed then the guard
				1187	* bit becomes a significant low order bit and the
				1188	* extension must participate in the rounding.
				1189	* If more than a single shift is needed, then all
				1190	* bits to the right of the guard bit are zeros,
				1191	* and the guard bit may or may not be zero. */
				1192	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				1193	resultp4);
				1194
				1195	/* Need to check for a zero result. The sign and
				1196	* exponent fields have already been zeroed. The more
				1197	* efficient test of the full object can be used.
				1198	*/
				1199	if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
				1200	/* Must have been "x-x" or "x+(-x)". */
				1201	if (Is_rounding_mode(ROUNDMINUS))
				1202	Dbl_setone_sign(resultp1);
				1203	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1204	return(NOEXCEPTION);
				1205	}
				1206	result_exponent--;
				1207
				1208	/* Look to see if normalization is finished. */
				1209	if (Dbl_isone_hidden(resultp1)) {
				1210	/* No further normalization is needed */
				1211	goto round;
				1212	}
				1213
				1214	/* Discover first one bit to determine shift amount.
				1215	* Use a modified binary search. We have already
				1216	* shifted the result one position right and still
				1217	* not found a one so the remainder of the extension
				1218	* must be zero and simplifies rounding. */
				1219	/* Scan bytes */
				1220	while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
				1221	Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
				1222	result_exponent -= 8;
				1223	}
				1224	/* Now narrow it down to the nibble */
				1225	if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
				1226	/* The lower nibble contains the
				1227	* normalizing one */
				1228	Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
				1229	result_exponent -= 4;
				1230	}
				1231	/* Select case where first bit is set (already
				1232	* normalized) otherwise select the proper shift. */
				1233	jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
				1234	if (jumpsize <= 7) switch(jumpsize) {
				1235	case 1:
				1236	Dblext_leftshiftby3(resultp1,resultp2,resultp3,
				1237	resultp4);
				1238	result_exponent -= 3;
				1239	break;
				1240	case 2:
				1241	case 3:
				1242	Dblext_leftshiftby2(resultp1,resultp2,resultp3,
				1243	resultp4);
				1244	result_exponent -= 2;
				1245	break;
				1246	case 4:
				1247	case 5:
				1248	case 6:
				1249	case 7:
				1250	Dblext_leftshiftby1(resultp1,resultp2,resultp3,
				1251	resultp4);
				1252	result_exponent -= 1;
				1253	break;
				1254	}
				1255	} /* end if (hidden...)... */
				1256	/* Fall through and round */
				1257	} /* end if (save < 0)... */
				1258	else {
				1259	/* Add magnitudes */
				1260	Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
				1261	rightp1,rightp2,rightp3,rightp4,
				1262	/to/resultp1,resultp2,resultp3,resultp4);
				1263	sign_save = Dbl_signextendedsign(resultp1);
				1264	if (Dbl_isone_hiddenoverflow(resultp1)) {
				1265	/* Prenormalization required. */
				1266	Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
				1267	resultp4);
				1268	result_exponent++;
				1269	} /* end if hiddenoverflow... */
				1270	} /* end else ...add magnitudes... */
				1271
				1272	/* Round the result. If the extension and lower two words are
				1273	* all zeros, then the result is exact. Otherwise round in the
				1274	* correct direction. Underflow is possible. If a postnormalization
				1275	* is necessary, then the mantissa is all zeros so no shift is needed.
				1276	*/
				1277	round:
				1278	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				1279	Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
				1280	result_exponent,is_tiny);
				1281	}
				1282	Dbl_set_sign(resultp1,/using/sign_save);
				1283	if (Dblext_isnotzero_mantissap3(resultp3) \|\|
				1284	Dblext_isnotzero_mantissap4(resultp4)) {
				1285	inexact = TRUE;
				1286	switch(Rounding_mode()) {
				1287	case ROUNDNEAREST: /* The default. */
				1288	if (Dblext_isone_highp3(resultp3)) {
				1289	/* at least 1/2 ulp */
				1290	if (Dblext_isnotzero_low31p3(resultp3) \|\|
				1291	Dblext_isnotzero_mantissap4(resultp4) \|\|
				1292	Dblext_isone_lowp2(resultp2)) {
				1293	/* either exactly half way and odd or
				1294	* more than 1/2ulp */
				1295	Dbl_increment(resultp1,resultp2);
				1296	}
				1297	}
				1298	break;
				1299
				1300	case ROUNDPLUS:
				1301	if (Dbl_iszero_sign(resultp1)) {
				1302	/* Round up positive results */
				1303	Dbl_increment(resultp1,resultp2);
				1304	}
				1305	break;
				1306
				1307	case ROUNDMINUS:
				1308	if (Dbl_isone_sign(resultp1)) {
				1309	/* Round down negative results */
				1310	Dbl_increment(resultp1,resultp2);
				1311	}
				1312
				1313	case ROUNDZERO:;
				1314	/* truncate is simple */
				1315	} /* end switch... */
				1316	if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
				1317	}
				1318	if (result_exponent >= DBL_INFINITY_EXPONENT) {
				1319	/* Overflow */
				1320	if (Is_overflowtrap_enabled()) {
				1321	/*
				1322	* Adjust bias of result
				1323	*/
				1324	Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				1325	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1326	if (inexact)
				1327	if (Is_inexacttrap_enabled())
				1328	return (OPC_2E_OVERFLOWEXCEPTION \|
				1329	OPC_2E_INEXACTEXCEPTION);
				1330	else Set_inexactflag();
				1331	return (OPC_2E_OVERFLOWEXCEPTION);
				1332	}
				1333	inexact = TRUE;
				1334	Set_overflowflag();
				1335	Dbl_setoverflow(resultp1,resultp2);
				1336	} else if (result_exponent <= 0) { /* underflow case */
				1337	if (Is_underflowtrap_enabled()) {
				1338	/*
				1339	* Adjust bias of result
				1340	*/
				1341	Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
				1342	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1343	if (inexact)
				1344	if (Is_inexacttrap_enabled())
				1345	return (OPC_2E_UNDERFLOWEXCEPTION \|
				1346	OPC_2E_INEXACTEXCEPTION);
				1347	else Set_inexactflag();
				1348	return(OPC_2E_UNDERFLOWEXCEPTION);
				1349	}
				1350	else if (inexact && is_tiny) Set_underflowflag();
				1351	}
				1352	else Dbl_set_exponent(resultp1,result_exponent);
				1353	Dbl_copytoptr(resultp1,resultp2,dstptr);
				1354	if (inexact)
				1355	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				1356	else Set_inexactflag();
				1357	return(NOEXCEPTION);
				1358	}
				1359
				1360	/*
				1361	* Single Floating-point Multiply Fused Add
				1362	*/
				1363
				1364	sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				1365
				1366	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				1367	unsigned int *status;
				1368	{
				1369	unsigned int opnd1, opnd2, opnd3;
				1370	register unsigned int tmpresp1, tmpresp2;
				1371	unsigned int rightp1, rightp2;
				1372	unsigned int resultp1, resultp2 = 0;
				1373	register int mpy_exponent, add_exponent, count;
				1374	boolean inexact = FALSE, is_tiny = FALSE;
				1375
				1376	unsigned int signlessleft1, signlessright1, save;
				1377	register int result_exponent, diff_exponent;
				1378	int sign_save, jumpsize;
				1379
				1380	Sgl_copyfromptr(src1ptr,opnd1);
				1381	Sgl_copyfromptr(src2ptr,opnd2);
				1382	Sgl_copyfromptr(src3ptr,opnd3);
				1383
				1384	/*
				1385	* set sign bit of result of multiply
				1386	*/
				1387	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
				1388	Sgl_setnegativezero(resultp1);
				1389	else Sgl_setzero(resultp1);
				1390
				1391	/*
				1392	* Generate multiply exponent
				1393	*/
				1394	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
				1395
				1396	/*
				1397	* check first operand for NaN's or infinity
				1398	*/
				1399	if (Sgl_isinfinity_exponent(opnd1)) {
				1400	if (Sgl_iszero_mantissa(opnd1)) {
				1401	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
				1402	if (Sgl_iszero_exponentmantissa(opnd2)) {
				1403	/*
				1404	* invalid since operands are infinity
				1405	* and zero
				1406	*/
				1407	if (Is_invalidtrap_enabled())
				1408	return(OPC_2E_INVALIDEXCEPTION);
				1409	Set_invalidflag();
				1410	Sgl_makequietnan(resultp1);
				1411	Sgl_copytoptr(resultp1,dstptr);
				1412	return(NOEXCEPTION);
				1413	}
				1414	/*
				1415	* Check third operand for infinity with a
				1416	* sign opposite of the multiply result
				1417	*/
				1418	if (Sgl_isinfinity(opnd3) &&
				1419	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				1420	/*
				1421	* invalid since attempting a magnitude
				1422	* subtraction of infinities
				1423	*/
				1424	if (Is_invalidtrap_enabled())
				1425	return(OPC_2E_INVALIDEXCEPTION);
				1426	Set_invalidflag();
				1427	Sgl_makequietnan(resultp1);
				1428	Sgl_copytoptr(resultp1,dstptr);
				1429	return(NOEXCEPTION);
				1430	}
				1431
				1432	/*
				1433	* return infinity
				1434	*/
				1435	Sgl_setinfinity_exponentmantissa(resultp1);
				1436	Sgl_copytoptr(resultp1,dstptr);
				1437	return(NOEXCEPTION);
				1438	}
				1439	}
				1440	else {
				1441	/*
				1442	* is NaN; signaling or quiet?
				1443	*/
				1444	if (Sgl_isone_signaling(opnd1)) {
				1445	/* trap if INVALIDTRAP enabled */
				1446	if (Is_invalidtrap_enabled())
				1447	return(OPC_2E_INVALIDEXCEPTION);
				1448	/* make NaN quiet */
				1449	Set_invalidflag();
				1450	Sgl_set_quiet(opnd1);
				1451	}
				1452	/*
				1453	* is second operand a signaling NaN?
				1454	*/
				1455	else if (Sgl_is_signalingnan(opnd2)) {
				1456	/* trap if INVALIDTRAP enabled */
				1457	if (Is_invalidtrap_enabled())
				1458	return(OPC_2E_INVALIDEXCEPTION);
				1459	/* make NaN quiet */
				1460	Set_invalidflag();
				1461	Sgl_set_quiet(opnd2);
				1462	Sgl_copytoptr(opnd2,dstptr);
				1463	return(NOEXCEPTION);
				1464	}
				1465	/*
				1466	* is third operand a signaling NaN?
				1467	*/
				1468	else if (Sgl_is_signalingnan(opnd3)) {
				1469	/* trap if INVALIDTRAP enabled */
				1470	if (Is_invalidtrap_enabled())
				1471	return(OPC_2E_INVALIDEXCEPTION);
				1472	/* make NaN quiet */
				1473	Set_invalidflag();
				1474	Sgl_set_quiet(opnd3);
				1475	Sgl_copytoptr(opnd3,dstptr);
				1476	return(NOEXCEPTION);
				1477	}
				1478	/*
				1479	* return quiet NaN
				1480	*/
				1481	Sgl_copytoptr(opnd1,dstptr);
				1482	return(NOEXCEPTION);
				1483	}
				1484	}
				1485
				1486	/*
				1487	* check second operand for NaN's or infinity
				1488	*/
				1489	if (Sgl_isinfinity_exponent(opnd2)) {
				1490	if (Sgl_iszero_mantissa(opnd2)) {
				1491	if (Sgl_isnotnan(opnd3)) {
				1492	if (Sgl_iszero_exponentmantissa(opnd1)) {
				1493	/*
				1494	* invalid since multiply operands are
				1495	* zero & infinity
				1496	*/
				1497	if (Is_invalidtrap_enabled())
				1498	return(OPC_2E_INVALIDEXCEPTION);
				1499	Set_invalidflag();
				1500	Sgl_makequietnan(opnd2);
				1501	Sgl_copytoptr(opnd2,dstptr);
				1502	return(NOEXCEPTION);
				1503	}
				1504
				1505	/*
				1506	* Check third operand for infinity with a
				1507	* sign opposite of the multiply result
				1508	*/
				1509	if (Sgl_isinfinity(opnd3) &&
				1510	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				1511	/*
				1512	* invalid since attempting a magnitude
				1513	* subtraction of infinities
				1514	*/
				1515	if (Is_invalidtrap_enabled())
				1516	return(OPC_2E_INVALIDEXCEPTION);
				1517	Set_invalidflag();
				1518	Sgl_makequietnan(resultp1);
				1519	Sgl_copytoptr(resultp1,dstptr);
				1520	return(NOEXCEPTION);
				1521	}
				1522
				1523	/*
				1524	* return infinity
				1525	*/
				1526	Sgl_setinfinity_exponentmantissa(resultp1);
				1527	Sgl_copytoptr(resultp1,dstptr);
				1528	return(NOEXCEPTION);
				1529	}
				1530	}
				1531	else {
				1532	/*
				1533	* is NaN; signaling or quiet?
				1534	*/
				1535	if (Sgl_isone_signaling(opnd2)) {
				1536	/* trap if INVALIDTRAP enabled */
				1537	if (Is_invalidtrap_enabled())
				1538	return(OPC_2E_INVALIDEXCEPTION);
				1539	/* make NaN quiet */
				1540	Set_invalidflag();
				1541	Sgl_set_quiet(opnd2);
				1542	}
				1543	/*
				1544	* is third operand a signaling NaN?
				1545	*/
				1546	else if (Sgl_is_signalingnan(opnd3)) {
				1547	/* trap if INVALIDTRAP enabled */
				1548	if (Is_invalidtrap_enabled())
				1549	return(OPC_2E_INVALIDEXCEPTION);
				1550	/* make NaN quiet */
				1551	Set_invalidflag();
				1552	Sgl_set_quiet(opnd3);
				1553	Sgl_copytoptr(opnd3,dstptr);
				1554	return(NOEXCEPTION);
				1555	}
				1556	/*
				1557	* return quiet NaN
				1558	*/
				1559	Sgl_copytoptr(opnd2,dstptr);
				1560	return(NOEXCEPTION);
				1561	}
				1562	}
				1563
				1564	/*
				1565	* check third operand for NaN's or infinity
				1566	*/
				1567	if (Sgl_isinfinity_exponent(opnd3)) {
				1568	if (Sgl_iszero_mantissa(opnd3)) {
				1569	/* return infinity */
				1570	Sgl_copytoptr(opnd3,dstptr);
				1571	return(NOEXCEPTION);
				1572	} else {
				1573	/*
				1574	* is NaN; signaling or quiet?
				1575	*/
				1576	if (Sgl_isone_signaling(opnd3)) {
				1577	/* trap if INVALIDTRAP enabled */
				1578	if (Is_invalidtrap_enabled())
				1579	return(OPC_2E_INVALIDEXCEPTION);
				1580	/* make NaN quiet */
				1581	Set_invalidflag();
				1582	Sgl_set_quiet(opnd3);
				1583	}
				1584	/*
				1585	* return quiet NaN
				1586	*/
				1587	Sgl_copytoptr(opnd3,dstptr);
				1588	return(NOEXCEPTION);
				1589	}
				1590	}
				1591
				1592	/*
				1593	* Generate multiply mantissa
				1594	*/
				1595	if (Sgl_isnotzero_exponent(opnd1)) {
				1596	/* set hidden bit */
				1597	Sgl_clear_signexponent_set_hidden(opnd1);
				1598	}
				1599	else {
				1600	/* check for zero */
				1601	if (Sgl_iszero_mantissa(opnd1)) {
				1602	/*
				1603	* Perform the add opnd3 with zero here.
				1604	*/
				1605	if (Sgl_iszero_exponentmantissa(opnd3)) {
				1606	if (Is_rounding_mode(ROUNDMINUS)) {
				1607	Sgl_or_signs(opnd3,resultp1);
				1608	} else {
				1609	Sgl_and_signs(opnd3,resultp1);
				1610	}
				1611	}
				1612	/*
				1613	* Now let's check for trapped underflow case.
				1614	*/
				1615	else if (Sgl_iszero_exponent(opnd3) &&
				1616	Is_underflowtrap_enabled()) {
				1617	/* need to normalize results mantissa */
				1618	sign_save = Sgl_signextendedsign(opnd3);
				1619	result_exponent = 0;
				1620	Sgl_leftshiftby1(opnd3);
				1621	Sgl_normalize(opnd3,result_exponent);
				1622	Sgl_set_sign(opnd3,/using/sign_save);
				1623	Sgl_setwrapped_exponent(opnd3,result_exponent,
				1624	unfl);
				1625	Sgl_copytoptr(opnd3,dstptr);
				1626	/* inexact = FALSE */
				1627	return(OPC_2E_UNDERFLOWEXCEPTION);
				1628	}
				1629	Sgl_copytoptr(opnd3,dstptr);
				1630	return(NOEXCEPTION);
				1631	}
				1632	/* is denormalized, adjust exponent */
				1633	Sgl_clear_signexponent(opnd1);
				1634	Sgl_leftshiftby1(opnd1);
				1635	Sgl_normalize(opnd1,mpy_exponent);
				1636	}
				1637	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				1638	if (Sgl_isnotzero_exponent(opnd2)) {
				1639	Sgl_clear_signexponent_set_hidden(opnd2);
				1640	}
				1641	else {
				1642	/* check for zero */
				1643	if (Sgl_iszero_mantissa(opnd2)) {
				1644	/*
				1645	* Perform the add opnd3 with zero here.
				1646	*/
				1647	if (Sgl_iszero_exponentmantissa(opnd3)) {
				1648	if (Is_rounding_mode(ROUNDMINUS)) {
				1649	Sgl_or_signs(opnd3,resultp1);
				1650	} else {
				1651	Sgl_and_signs(opnd3,resultp1);
				1652	}
				1653	}
				1654	/*
				1655	* Now let's check for trapped underflow case.
				1656	*/
				1657	else if (Sgl_iszero_exponent(opnd3) &&
				1658	Is_underflowtrap_enabled()) {
				1659	/* need to normalize results mantissa */
				1660	sign_save = Sgl_signextendedsign(opnd3);
				1661	result_exponent = 0;
				1662	Sgl_leftshiftby1(opnd3);
				1663	Sgl_normalize(opnd3,result_exponent);
				1664	Sgl_set_sign(opnd3,/using/sign_save);
				1665	Sgl_setwrapped_exponent(opnd3,result_exponent,
				1666	unfl);
				1667	Sgl_copytoptr(opnd3,dstptr);
				1668	/* inexact = FALSE */
				1669	return(OPC_2E_UNDERFLOWEXCEPTION);
				1670	}
				1671	Sgl_copytoptr(opnd3,dstptr);
				1672	return(NOEXCEPTION);
				1673	}
				1674	/* is denormalized; want to normalize */
				1675	Sgl_clear_signexponent(opnd2);
				1676	Sgl_leftshiftby1(opnd2);
				1677	Sgl_normalize(opnd2,mpy_exponent);
				1678	}
				1679
				1680	/* Multiply the first two source mantissas together */
				1681
				1682	/*
				1683	* The intermediate result will be kept in tmpres,
				1684	* which needs enough room for 106 bits of mantissa,
				1685	* so lets call it a Double extended.
				1686	*/
				1687	Sglext_setzero(tmpresp1,tmpresp2);
				1688
				1689	/*
				1690	* Four bits at a time are inspected in each loop, and a
				1691	* simple shift and add multiply algorithm is used.
				1692	*/
				1693	for (count = SGL_P-1; count >= 0; count -= 4) {
				1694	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				1695	if (Sbit28(opnd1)) {
				1696	/* Twoword_add should be an ADD followed by 2 ADDC's */
				1697	Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
				1698	}
				1699	if (Sbit29(opnd1)) {
				1700	Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
				1701	}
				1702	if (Sbit30(opnd1)) {
				1703	Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
				1704	}
				1705	if (Sbit31(opnd1)) {
				1706	Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
				1707	}
				1708	Sgl_rightshiftby4(opnd1);
				1709	}
				1710	if (Is_sexthiddenoverflow(tmpresp1)) {
				1711	/* result mantissa >= 2 (mantissa overflow) */
				1712	mpy_exponent++;
				1713	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				1714	} else {
				1715	Sglext_rightshiftby3(tmpresp1,tmpresp2);
				1716	}
				1717
				1718	/*
				1719	* Restore the sign of the mpy result which was saved in resultp1.
				1720	* The exponent will continue to be kept in mpy_exponent.
				1721	*/
				1722	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
				1723
				1724	/*
				1725	* No rounding is required, since the result of the multiply
				1726	* is exact in the extended format.
				1727	*/
				1728
				1729	/*
				1730	* Now we are ready to perform the add portion of the operation.
				1731	*
				1732	* The exponents need to be kept as integers for now, since the
				1733	* multiply result might not fit into the exponent field. We
				1734	* can't overflow or underflow because of this yet, since the
				1735	* add could bring the final result back into range.
				1736	*/
				1737	add_exponent = Sgl_exponent(opnd3);
				1738
				1739	/*
				1740	* Check for denormalized or zero add operand.
				1741	*/
				1742	if (add_exponent == 0) {
				1743	/* check for zero */
				1744	if (Sgl_iszero_mantissa(opnd3)) {
				1745	/* right is zero */
				1746	/* Left can't be zero and must be result.
				1747	*
				1748	* The final result is now in tmpres and mpy_exponent,
				1749	* and needs to be rounded and squeezed back into
				1750	* double precision format from double extended.
				1751	*/
				1752	result_exponent = mpy_exponent;
				1753	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
				1754	sign_save = Sgl_signextendedsign(resultp1);/save sign/
				1755	goto round;
				1756	}
				1757
				1758	/*
				1759	* Neither are zeroes.
				1760	* Adjust exponent and normalize add operand.
				1761	*/
				1762	sign_save = Sgl_signextendedsign(opnd3); /* save sign */
				1763	Sgl_clear_signexponent(opnd3);
				1764	Sgl_leftshiftby1(opnd3);
				1765	Sgl_normalize(opnd3,add_exponent);
				1766	Sgl_set_sign(opnd3,sign_save); /* restore sign */
				1767	} else {
				1768	Sgl_clear_exponent_set_hidden(opnd3);
				1769	}
				1770	/*
				1771	* Copy opnd3 to the double extended variable called right.
				1772	*/
				1773	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
				1774
				1775	/*
				1776	* A zero "save" helps discover equal operands (for later),
				1777	* and is used in swapping operands (if needed).
				1778	*/
				1779	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
				1780
				1781	/*
				1782	* Compare magnitude of operands.
				1783	*/
				1784	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
				1785	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
				1786	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				1787	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
				1788	/*
				1789	* Set the left operand to the larger one by XOR swap.
				1790	* First finish the first word "save".
				1791	*/
				1792	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
				1793	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				1794	Sglext_swap_lower(tmpresp2,rightp2);
				1795	/* also setup exponents used in rest of routine */
				1796	diff_exponent = add_exponent - mpy_exponent;
				1797	result_exponent = add_exponent;
				1798	} else {
				1799	/* also setup exponents used in rest of routine */
				1800	diff_exponent = mpy_exponent - add_exponent;
				1801	result_exponent = mpy_exponent;
				1802	}
				1803	/* Invariant: left is not smaller than right. */
				1804
				1805	/*
				1806	* Special case alignment of operands that would force alignment
				1807	* beyond the extent of the extension. A further optimization
				1808	* could special case this but only reduces the path length for
				1809	* this infrequent case.
				1810	*/
				1811	if (diff_exponent > SGLEXT_THRESHOLD) {
				1812	diff_exponent = SGLEXT_THRESHOLD;
				1813	}
				1814
				1815	/* Align right operand by shifting it to the right */
				1816	Sglext_clear_sign(rightp1);
				1817	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
				1818
				1819	/* Treat sum and difference of the operands separately. */
				1820	if ((int)save < 0) {
				1821	/*
				1822	* Difference of the two operands. Overflow can occur if the
				1823	* multiply overflowed. A borrow can occur out of the hidden
				1824	* bit and force a post normalization phase.
				1825	*/
				1826	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
				1827	resultp1,resultp2);
				1828	sign_save = Sgl_signextendedsign(resultp1);
				1829	if (Sgl_iszero_hidden(resultp1)) {
				1830	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1831	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1832	* result and extension left until the hidden bit
				1833	* becomes one. Not all of the extension bits need
				1834	* participate in the shift. Only the two most
				1835	* significant bits (round and guard) are needed.
				1836	* If only a single shift is needed then the guard
				1837	* bit becomes a significant low order bit and the
				1838	* extension must participate in the rounding.
				1839	* If more than a single shift is needed, then all
				1840	* bits to the right of the guard bit are zeros,
				1841	* and the guard bit may or may not be zero. */
				1842	Sglext_leftshiftby1(resultp1,resultp2);
				1843
				1844	/* Need to check for a zero result. The sign and
				1845	* exponent fields have already been zeroed. The more
				1846	* efficient test of the full object can be used.
				1847	*/
				1848	if (Sglext_iszero(resultp1,resultp2)) {
				1849	/* Must have been "x-x" or "x+(-x)". */
				1850	if (Is_rounding_mode(ROUNDMINUS))
				1851	Sgl_setone_sign(resultp1);
				1852	Sgl_copytoptr(resultp1,dstptr);
				1853	return(NOEXCEPTION);
				1854	}
				1855	result_exponent--;
				1856
				1857	/* Look to see if normalization is finished. */
				1858	if (Sgl_isone_hidden(resultp1)) {
				1859	/* No further normalization is needed */
				1860	goto round;
				1861	}
				1862
				1863	/* Discover first one bit to determine shift amount.
				1864	* Use a modified binary search. We have already
				1865	* shifted the result one position right and still
				1866	* not found a one so the remainder of the extension
				1867	* must be zero and simplifies rounding. */
				1868	/* Scan bytes */
				1869	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
				1870	Sglext_leftshiftby8(resultp1,resultp2);
				1871	result_exponent -= 8;
				1872	}
				1873	/* Now narrow it down to the nibble */
				1874	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
				1875	/* The lower nibble contains the
				1876	* normalizing one */
				1877	Sglext_leftshiftby4(resultp1,resultp2);
				1878	result_exponent -= 4;
				1879	}
				1880	/* Select case where first bit is set (already
				1881	* normalized) otherwise select the proper shift. */
				1882	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
				1883	if (jumpsize <= 7) switch(jumpsize) {
				1884	case 1:
				1885	Sglext_leftshiftby3(resultp1,resultp2);
				1886	result_exponent -= 3;
				1887	break;
				1888	case 2:
				1889	case 3:
				1890	Sglext_leftshiftby2(resultp1,resultp2);
				1891	result_exponent -= 2;
				1892	break;
				1893	case 4:
				1894	case 5:
				1895	case 6:
				1896	case 7:
				1897	Sglext_leftshiftby1(resultp1,resultp2);
				1898	result_exponent -= 1;
				1899	break;
				1900	}
				1901	} /* end if (hidden...)... */
				1902	/* Fall through and round */
				1903	} /* end if (save < 0)... */
				1904	else {
				1905	/* Add magnitudes */
				1906	Sglext_addition(tmpresp1,tmpresp2,
				1907	rightp1,rightp2, /to/resultp1,resultp2);
				1908	sign_save = Sgl_signextendedsign(resultp1);
				1909	if (Sgl_isone_hiddenoverflow(resultp1)) {
				1910	/* Prenormalization required. */
				1911	Sglext_arithrightshiftby1(resultp1,resultp2);
				1912	result_exponent++;
				1913	} /* end if hiddenoverflow... */
				1914	} /* end else ...add magnitudes... */
				1915
				1916	/* Round the result. If the extension and lower two words are
				1917	* all zeros, then the result is exact. Otherwise round in the
				1918	* correct direction. Underflow is possible. If a postnormalization
				1919	* is necessary, then the mantissa is all zeros so no shift is needed.
				1920	*/
				1921	round:
				1922	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				1923	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
				1924	}
				1925	Sgl_set_sign(resultp1,/using/sign_save);
				1926	if (Sglext_isnotzero_mantissap2(resultp2)) {
				1927	inexact = TRUE;
				1928	switch(Rounding_mode()) {
				1929	case ROUNDNEAREST: /* The default. */
				1930	if (Sglext_isone_highp2(resultp2)) {
				1931	/* at least 1/2 ulp */
				1932	if (Sglext_isnotzero_low31p2(resultp2) \|\|
				1933	Sglext_isone_lowp1(resultp1)) {
				1934	/* either exactly half way and odd or
				1935	* more than 1/2ulp */
				1936	Sgl_increment(resultp1);
				1937	}
				1938	}
				1939	break;
				1940
				1941	case ROUNDPLUS:
				1942	if (Sgl_iszero_sign(resultp1)) {
				1943	/* Round up positive results */
				1944	Sgl_increment(resultp1);
				1945	}
				1946	break;
				1947
				1948	case ROUNDMINUS:
				1949	if (Sgl_isone_sign(resultp1)) {
				1950	/* Round down negative results */
				1951	Sgl_increment(resultp1);
				1952	}
				1953
				1954	case ROUNDZERO:;
				1955	/* truncate is simple */
				1956	} /* end switch... */
				1957	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
				1958	}
				1959	if (result_exponent >= SGL_INFINITY_EXPONENT) {
				1960	/* Overflow */
				1961	if (Is_overflowtrap_enabled()) {
				1962	/*
				1963	* Adjust bias of result
				1964	*/
				1965	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				1966	Sgl_copytoptr(resultp1,dstptr);
				1967	if (inexact)
				1968	if (Is_inexacttrap_enabled())
				1969	return (OPC_2E_OVERFLOWEXCEPTION \|
				1970	OPC_2E_INEXACTEXCEPTION);
				1971	else Set_inexactflag();
				1972	return (OPC_2E_OVERFLOWEXCEPTION);
				1973	}
				1974	inexact = TRUE;
				1975	Set_overflowflag();
				1976	Sgl_setoverflow(resultp1);
				1977	} else if (result_exponent <= 0) { /* underflow case */
				1978	if (Is_underflowtrap_enabled()) {
				1979	/*
				1980	* Adjust bias of result
				1981	*/
				1982	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
				1983	Sgl_copytoptr(resultp1,dstptr);
				1984	if (inexact)
				1985	if (Is_inexacttrap_enabled())
				1986	return (OPC_2E_UNDERFLOWEXCEPTION \|
				1987	OPC_2E_INEXACTEXCEPTION);
				1988	else Set_inexactflag();
				1989	return(OPC_2E_UNDERFLOWEXCEPTION);
				1990	}
				1991	else if (inexact && is_tiny) Set_underflowflag();
				1992	}
				1993	else Sgl_set_exponent(resultp1,result_exponent);
				1994	Sgl_copytoptr(resultp1,dstptr);
				1995	if (inexact)
				1996	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				1997	else Set_inexactflag();
				1998	return(NOEXCEPTION);
				1999	}
				2000
				2001	/*
				2002	* Single Floating-point Multiply Negate Fused Add
				2003	*/
				2004
				2005	sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
				2006
				2007	sgl_floating_point src1ptr, src2ptr, src3ptr, dstptr;
				2008	unsigned int *status;
				2009	{
				2010	unsigned int opnd1, opnd2, opnd3;
				2011	register unsigned int tmpresp1, tmpresp2;
				2012	unsigned int rightp1, rightp2;
				2013	unsigned int resultp1, resultp2 = 0;
				2014	register int mpy_exponent, add_exponent, count;
				2015	boolean inexact = FALSE, is_tiny = FALSE;
				2016
				2017	unsigned int signlessleft1, signlessright1, save;
				2018	register int result_exponent, diff_exponent;
				2019	int sign_save, jumpsize;
				2020
				2021	Sgl_copyfromptr(src1ptr,opnd1);
				2022	Sgl_copyfromptr(src2ptr,opnd2);
				2023	Sgl_copyfromptr(src3ptr,opnd3);
				2024
				2025	/*
				2026	* set sign bit of result of multiply
				2027	*/
				2028	if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2))
				2029	Sgl_setzero(resultp1);
				2030	else
				2031	Sgl_setnegativezero(resultp1);
				2032
				2033	/*
				2034	* Generate multiply exponent
				2035	*/
				2036	mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
				2037
				2038	/*
				2039	* check first operand for NaN's or infinity
				2040	*/
				2041	if (Sgl_isinfinity_exponent(opnd1)) {
				2042	if (Sgl_iszero_mantissa(opnd1)) {
				2043	if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
				2044	if (Sgl_iszero_exponentmantissa(opnd2)) {
				2045	/*
				2046	* invalid since operands are infinity
				2047	* and zero
				2048	*/
				2049	if (Is_invalidtrap_enabled())
				2050	return(OPC_2E_INVALIDEXCEPTION);
				2051	Set_invalidflag();
				2052	Sgl_makequietnan(resultp1);
				2053	Sgl_copytoptr(resultp1,dstptr);
				2054	return(NOEXCEPTION);
				2055	}
				2056	/*
				2057	* Check third operand for infinity with a
				2058	* sign opposite of the multiply result
				2059	*/
				2060	if (Sgl_isinfinity(opnd3) &&
				2061	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				2062	/*
				2063	* invalid since attempting a magnitude
				2064	* subtraction of infinities
				2065	*/
				2066	if (Is_invalidtrap_enabled())
				2067	return(OPC_2E_INVALIDEXCEPTION);
				2068	Set_invalidflag();
				2069	Sgl_makequietnan(resultp1);
				2070	Sgl_copytoptr(resultp1,dstptr);
				2071	return(NOEXCEPTION);
				2072	}
				2073
				2074	/*
				2075	* return infinity
				2076	*/
				2077	Sgl_setinfinity_exponentmantissa(resultp1);
				2078	Sgl_copytoptr(resultp1,dstptr);
				2079	return(NOEXCEPTION);
				2080	}
				2081	}
				2082	else {
				2083	/*
				2084	* is NaN; signaling or quiet?
				2085	*/
				2086	if (Sgl_isone_signaling(opnd1)) {
				2087	/* trap if INVALIDTRAP enabled */
				2088	if (Is_invalidtrap_enabled())
				2089	return(OPC_2E_INVALIDEXCEPTION);
				2090	/* make NaN quiet */
				2091	Set_invalidflag();
				2092	Sgl_set_quiet(opnd1);
				2093	}
				2094	/*
				2095	* is second operand a signaling NaN?
				2096	*/
				2097	else if (Sgl_is_signalingnan(opnd2)) {
				2098	/* trap if INVALIDTRAP enabled */
				2099	if (Is_invalidtrap_enabled())
				2100	return(OPC_2E_INVALIDEXCEPTION);
				2101	/* make NaN quiet */
				2102	Set_invalidflag();
				2103	Sgl_set_quiet(opnd2);
				2104	Sgl_copytoptr(opnd2,dstptr);
				2105	return(NOEXCEPTION);
				2106	}
				2107	/*
				2108	* is third operand a signaling NaN?
				2109	*/
				2110	else if (Sgl_is_signalingnan(opnd3)) {
				2111	/* trap if INVALIDTRAP enabled */
				2112	if (Is_invalidtrap_enabled())
				2113	return(OPC_2E_INVALIDEXCEPTION);
				2114	/* make NaN quiet */
				2115	Set_invalidflag();
				2116	Sgl_set_quiet(opnd3);
				2117	Sgl_copytoptr(opnd3,dstptr);
				2118	return(NOEXCEPTION);
				2119	}
				2120	/*
				2121	* return quiet NaN
				2122	*/
				2123	Sgl_copytoptr(opnd1,dstptr);
				2124	return(NOEXCEPTION);
				2125	}
				2126	}
				2127
				2128	/*
				2129	* check second operand for NaN's or infinity
				2130	*/
				2131	if (Sgl_isinfinity_exponent(opnd2)) {
				2132	if (Sgl_iszero_mantissa(opnd2)) {
				2133	if (Sgl_isnotnan(opnd3)) {
				2134	if (Sgl_iszero_exponentmantissa(opnd1)) {
				2135	/*
				2136	* invalid since multiply operands are
				2137	* zero & infinity
				2138	*/
				2139	if (Is_invalidtrap_enabled())
				2140	return(OPC_2E_INVALIDEXCEPTION);
				2141	Set_invalidflag();
				2142	Sgl_makequietnan(opnd2);
				2143	Sgl_copytoptr(opnd2,dstptr);
				2144	return(NOEXCEPTION);
				2145	}
				2146
				2147	/*
				2148	* Check third operand for infinity with a
				2149	* sign opposite of the multiply result
				2150	*/
				2151	if (Sgl_isinfinity(opnd3) &&
				2152	(Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
				2153	/*
				2154	* invalid since attempting a magnitude
				2155	* subtraction of infinities
				2156	*/
				2157	if (Is_invalidtrap_enabled())
				2158	return(OPC_2E_INVALIDEXCEPTION);
				2159	Set_invalidflag();
				2160	Sgl_makequietnan(resultp1);
				2161	Sgl_copytoptr(resultp1,dstptr);
				2162	return(NOEXCEPTION);
				2163	}
				2164
				2165	/*
				2166	* return infinity
				2167	*/
				2168	Sgl_setinfinity_exponentmantissa(resultp1);
				2169	Sgl_copytoptr(resultp1,dstptr);
				2170	return(NOEXCEPTION);
				2171	}
				2172	}
				2173	else {
				2174	/*
				2175	* is NaN; signaling or quiet?
				2176	*/
				2177	if (Sgl_isone_signaling(opnd2)) {
				2178	/* trap if INVALIDTRAP enabled */
				2179	if (Is_invalidtrap_enabled())
				2180	return(OPC_2E_INVALIDEXCEPTION);
				2181	/* make NaN quiet */
				2182	Set_invalidflag();
				2183	Sgl_set_quiet(opnd2);
				2184	}
				2185	/*
				2186	* is third operand a signaling NaN?
				2187	*/
				2188	else if (Sgl_is_signalingnan(opnd3)) {
				2189	/* trap if INVALIDTRAP enabled */
				2190	if (Is_invalidtrap_enabled())
				2191	return(OPC_2E_INVALIDEXCEPTION);
				2192	/* make NaN quiet */
				2193	Set_invalidflag();
				2194	Sgl_set_quiet(opnd3);
				2195	Sgl_copytoptr(opnd3,dstptr);
				2196	return(NOEXCEPTION);
				2197	}
				2198	/*
				2199	* return quiet NaN
				2200	*/
				2201	Sgl_copytoptr(opnd2,dstptr);
				2202	return(NOEXCEPTION);
				2203	}
				2204	}
				2205
				2206	/*
				2207	* check third operand for NaN's or infinity
				2208	*/
				2209	if (Sgl_isinfinity_exponent(opnd3)) {
				2210	if (Sgl_iszero_mantissa(opnd3)) {
				2211	/* return infinity */
				2212	Sgl_copytoptr(opnd3,dstptr);
				2213	return(NOEXCEPTION);
				2214	} else {
				2215	/*
				2216	* is NaN; signaling or quiet?
				2217	*/
				2218	if (Sgl_isone_signaling(opnd3)) {
				2219	/* trap if INVALIDTRAP enabled */
				2220	if (Is_invalidtrap_enabled())
				2221	return(OPC_2E_INVALIDEXCEPTION);
				2222	/* make NaN quiet */
				2223	Set_invalidflag();
				2224	Sgl_set_quiet(opnd3);
				2225	}
				2226	/*
				2227	* return quiet NaN
				2228	*/
				2229	Sgl_copytoptr(opnd3,dstptr);
				2230	return(NOEXCEPTION);
				2231	}
				2232	}
				2233
				2234	/*
				2235	* Generate multiply mantissa
				2236	*/
				2237	if (Sgl_isnotzero_exponent(opnd1)) {
				2238	/* set hidden bit */
				2239	Sgl_clear_signexponent_set_hidden(opnd1);
				2240	}
				2241	else {
				2242	/* check for zero */
				2243	if (Sgl_iszero_mantissa(opnd1)) {
				2244	/*
				2245	* Perform the add opnd3 with zero here.
				2246	*/
				2247	if (Sgl_iszero_exponentmantissa(opnd3)) {
				2248	if (Is_rounding_mode(ROUNDMINUS)) {
				2249	Sgl_or_signs(opnd3,resultp1);
				2250	} else {
				2251	Sgl_and_signs(opnd3,resultp1);
				2252	}
				2253	}
				2254	/*
				2255	* Now let's check for trapped underflow case.
				2256	*/
				2257	else if (Sgl_iszero_exponent(opnd3) &&
				2258	Is_underflowtrap_enabled()) {
				2259	/* need to normalize results mantissa */
				2260	sign_save = Sgl_signextendedsign(opnd3);
				2261	result_exponent = 0;
				2262	Sgl_leftshiftby1(opnd3);
				2263	Sgl_normalize(opnd3,result_exponent);
				2264	Sgl_set_sign(opnd3,/using/sign_save);
				2265	Sgl_setwrapped_exponent(opnd3,result_exponent,
				2266	unfl);
				2267	Sgl_copytoptr(opnd3,dstptr);
				2268	/* inexact = FALSE */
				2269	return(OPC_2E_UNDERFLOWEXCEPTION);
				2270	}
				2271	Sgl_copytoptr(opnd3,dstptr);
				2272	return(NOEXCEPTION);
				2273	}
				2274	/* is denormalized, adjust exponent */
				2275	Sgl_clear_signexponent(opnd1);
				2276	Sgl_leftshiftby1(opnd1);
				2277	Sgl_normalize(opnd1,mpy_exponent);
				2278	}
				2279	/* opnd2 needs to have hidden bit set with msb in hidden bit */
				2280	if (Sgl_isnotzero_exponent(opnd2)) {
				2281	Sgl_clear_signexponent_set_hidden(opnd2);
				2282	}
				2283	else {
				2284	/* check for zero */
				2285	if (Sgl_iszero_mantissa(opnd2)) {
				2286	/*
				2287	* Perform the add opnd3 with zero here.
				2288	*/
				2289	if (Sgl_iszero_exponentmantissa(opnd3)) {
				2290	if (Is_rounding_mode(ROUNDMINUS)) {
				2291	Sgl_or_signs(opnd3,resultp1);
				2292	} else {
				2293	Sgl_and_signs(opnd3,resultp1);
				2294	}
				2295	}
				2296	/*
				2297	* Now let's check for trapped underflow case.
				2298	*/
				2299	else if (Sgl_iszero_exponent(opnd3) &&
				2300	Is_underflowtrap_enabled()) {
				2301	/* need to normalize results mantissa */
				2302	sign_save = Sgl_signextendedsign(opnd3);
				2303	result_exponent = 0;
				2304	Sgl_leftshiftby1(opnd3);
				2305	Sgl_normalize(opnd3,result_exponent);
				2306	Sgl_set_sign(opnd3,/using/sign_save);
				2307	Sgl_setwrapped_exponent(opnd3,result_exponent,
				2308	unfl);
				2309	Sgl_copytoptr(opnd3,dstptr);
				2310	/* inexact = FALSE */
				2311	return(OPC_2E_UNDERFLOWEXCEPTION);
				2312	}
				2313	Sgl_copytoptr(opnd3,dstptr);
				2314	return(NOEXCEPTION);
				2315	}
				2316	/* is denormalized; want to normalize */
				2317	Sgl_clear_signexponent(opnd2);
				2318	Sgl_leftshiftby1(opnd2);
				2319	Sgl_normalize(opnd2,mpy_exponent);
				2320	}
				2321
				2322	/* Multiply the first two source mantissas together */
				2323
				2324	/*
				2325	* The intermediate result will be kept in tmpres,
				2326	* which needs enough room for 106 bits of mantissa,
				2327	* so lets call it a Double extended.
				2328	*/
				2329	Sglext_setzero(tmpresp1,tmpresp2);
				2330
				2331	/*
				2332	* Four bits at a time are inspected in each loop, and a
				2333	* simple shift and add multiply algorithm is used.
				2334	*/
				2335	for (count = SGL_P-1; count >= 0; count -= 4) {
				2336	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				2337	if (Sbit28(opnd1)) {
				2338	/* Twoword_add should be an ADD followed by 2 ADDC's */
				2339	Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
				2340	}
				2341	if (Sbit29(opnd1)) {
				2342	Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
				2343	}
				2344	if (Sbit30(opnd1)) {
				2345	Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
				2346	}
				2347	if (Sbit31(opnd1)) {
				2348	Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
				2349	}
				2350	Sgl_rightshiftby4(opnd1);
				2351	}
				2352	if (Is_sexthiddenoverflow(tmpresp1)) {
				2353	/* result mantissa >= 2 (mantissa overflow) */
				2354	mpy_exponent++;
				2355	Sglext_rightshiftby4(tmpresp1,tmpresp2);
				2356	} else {
				2357	Sglext_rightshiftby3(tmpresp1,tmpresp2);
				2358	}
				2359
				2360	/*
				2361	* Restore the sign of the mpy result which was saved in resultp1.
				2362	* The exponent will continue to be kept in mpy_exponent.
				2363	*/
				2364	Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
				2365
				2366	/*
				2367	* No rounding is required, since the result of the multiply
				2368	* is exact in the extended format.
				2369	*/
				2370
				2371	/*
				2372	* Now we are ready to perform the add portion of the operation.
				2373	*
				2374	* The exponents need to be kept as integers for now, since the
				2375	* multiply result might not fit into the exponent field. We
				2376	* can't overflow or underflow because of this yet, since the
				2377	* add could bring the final result back into range.
				2378	*/
				2379	add_exponent = Sgl_exponent(opnd3);
				2380
				2381	/*
				2382	* Check for denormalized or zero add operand.
				2383	*/
				2384	if (add_exponent == 0) {
				2385	/* check for zero */
				2386	if (Sgl_iszero_mantissa(opnd3)) {
				2387	/* right is zero */
				2388	/* Left can't be zero and must be result.
				2389	*
				2390	* The final result is now in tmpres and mpy_exponent,
				2391	* and needs to be rounded and squeezed back into
				2392	* double precision format from double extended.
				2393	*/
				2394	result_exponent = mpy_exponent;
				2395	Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
				2396	sign_save = Sgl_signextendedsign(resultp1);/save sign/
				2397	goto round;
				2398	}
				2399
				2400	/*
				2401	* Neither are zeroes.
				2402	* Adjust exponent and normalize add operand.
				2403	*/
				2404	sign_save = Sgl_signextendedsign(opnd3); /* save sign */
				2405	Sgl_clear_signexponent(opnd3);
				2406	Sgl_leftshiftby1(opnd3);
				2407	Sgl_normalize(opnd3,add_exponent);
				2408	Sgl_set_sign(opnd3,sign_save); /* restore sign */
				2409	} else {
				2410	Sgl_clear_exponent_set_hidden(opnd3);
				2411	}
				2412	/*
				2413	* Copy opnd3 to the double extended variable called right.
				2414	*/
				2415	Sgl_copyto_sglext(opnd3,rightp1,rightp2);
				2416
				2417	/*
				2418	* A zero "save" helps discover equal operands (for later),
				2419	* and is used in swapping operands (if needed).
				2420	*/
				2421	Sglext_xortointp1(tmpresp1,rightp1,/to/save);
				2422
				2423	/*
				2424	* Compare magnitude of operands.
				2425	*/
				2426	Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
				2427	Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
				2428	if (mpy_exponent < add_exponent \|\| mpy_exponent == add_exponent &&
				2429	Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
				2430	/*
				2431	* Set the left operand to the larger one by XOR swap.
				2432	* First finish the first word "save".
				2433	*/
				2434	Sglext_xorfromintp1(save,rightp1,/to/rightp1);
				2435	Sglext_xorfromintp1(save,tmpresp1,/to/tmpresp1);
				2436	Sglext_swap_lower(tmpresp2,rightp2);
				2437	/* also setup exponents used in rest of routine */
				2438	diff_exponent = add_exponent - mpy_exponent;
				2439	result_exponent = add_exponent;
				2440	} else {
				2441	/* also setup exponents used in rest of routine */
				2442	diff_exponent = mpy_exponent - add_exponent;
				2443	result_exponent = mpy_exponent;
				2444	}
				2445	/* Invariant: left is not smaller than right. */
				2446
				2447	/*
				2448	* Special case alignment of operands that would force alignment
				2449	* beyond the extent of the extension. A further optimization
				2450	* could special case this but only reduces the path length for
				2451	* this infrequent case.
				2452	*/
				2453	if (diff_exponent > SGLEXT_THRESHOLD) {
				2454	diff_exponent = SGLEXT_THRESHOLD;
				2455	}
				2456
				2457	/* Align right operand by shifting it to the right */
				2458	Sglext_clear_sign(rightp1);
				2459	Sglext_right_align(rightp1,rightp2,/shifted by/diff_exponent);
				2460
				2461	/* Treat sum and difference of the operands separately. */
				2462	if ((int)save < 0) {
				2463	/*
				2464	* Difference of the two operands. Overflow can occur if the
				2465	* multiply overflowed. A borrow can occur out of the hidden
				2466	* bit and force a post normalization phase.
				2467	*/
				2468	Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
				2469	resultp1,resultp2);
				2470	sign_save = Sgl_signextendedsign(resultp1);
				2471	if (Sgl_iszero_hidden(resultp1)) {
				2472	/* Handle normalization */
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	2473	/* A straightforward algorithm would now shift the
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2474	* result and extension left until the hidden bit
				2475	* becomes one. Not all of the extension bits need
				2476	* participate in the shift. Only the two most
				2477	* significant bits (round and guard) are needed.
				2478	* If only a single shift is needed then the guard
				2479	* bit becomes a significant low order bit and the
				2480	* extension must participate in the rounding.
				2481	* If more than a single shift is needed, then all
				2482	* bits to the right of the guard bit are zeros,
				2483	* and the guard bit may or may not be zero. */
				2484	Sglext_leftshiftby1(resultp1,resultp2);
				2485
				2486	/* Need to check for a zero result. The sign and
				2487	* exponent fields have already been zeroed. The more
				2488	* efficient test of the full object can be used.
				2489	*/
				2490	if (Sglext_iszero(resultp1,resultp2)) {
				2491	/* Must have been "x-x" or "x+(-x)". */
				2492	if (Is_rounding_mode(ROUNDMINUS))
				2493	Sgl_setone_sign(resultp1);
				2494	Sgl_copytoptr(resultp1,dstptr);
				2495	return(NOEXCEPTION);
				2496	}
				2497	result_exponent--;
				2498
				2499	/* Look to see if normalization is finished. */
				2500	if (Sgl_isone_hidden(resultp1)) {
				2501	/* No further normalization is needed */
				2502	goto round;
				2503	}
				2504
				2505	/* Discover first one bit to determine shift amount.
				2506	* Use a modified binary search. We have already
				2507	* shifted the result one position right and still
				2508	* not found a one so the remainder of the extension
				2509	* must be zero and simplifies rounding. */
				2510	/* Scan bytes */
				2511	while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
				2512	Sglext_leftshiftby8(resultp1,resultp2);
				2513	result_exponent -= 8;
				2514	}
				2515	/* Now narrow it down to the nibble */
				2516	if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
				2517	/* The lower nibble contains the
				2518	* normalizing one */
				2519	Sglext_leftshiftby4(resultp1,resultp2);
				2520	result_exponent -= 4;
				2521	}
				2522	/* Select case where first bit is set (already
				2523	* normalized) otherwise select the proper shift. */
				2524	jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
				2525	if (jumpsize <= 7) switch(jumpsize) {
				2526	case 1:
				2527	Sglext_leftshiftby3(resultp1,resultp2);
				2528	result_exponent -= 3;
				2529	break;
				2530	case 2:
				2531	case 3:
				2532	Sglext_leftshiftby2(resultp1,resultp2);
				2533	result_exponent -= 2;
				2534	break;
				2535	case 4:
				2536	case 5:
				2537	case 6:
				2538	case 7:
				2539	Sglext_leftshiftby1(resultp1,resultp2);
				2540	result_exponent -= 1;
				2541	break;
				2542	}
				2543	} /* end if (hidden...)... */
				2544	/* Fall through and round */
				2545	} /* end if (save < 0)... */
				2546	else {
				2547	/* Add magnitudes */
				2548	Sglext_addition(tmpresp1,tmpresp2,
				2549	rightp1,rightp2, /to/resultp1,resultp2);
				2550	sign_save = Sgl_signextendedsign(resultp1);
				2551	if (Sgl_isone_hiddenoverflow(resultp1)) {
				2552	/* Prenormalization required. */
				2553	Sglext_arithrightshiftby1(resultp1,resultp2);
				2554	result_exponent++;
				2555	} /* end if hiddenoverflow... */
				2556	} /* end else ...add magnitudes... */
				2557
				2558	/* Round the result. If the extension and lower two words are
				2559	* all zeros, then the result is exact. Otherwise round in the
				2560	* correct direction. Underflow is possible. If a postnormalization
				2561	* is necessary, then the mantissa is all zeros so no shift is needed.
				2562	*/
				2563	round:
				2564	if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
				2565	Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
				2566	}
				2567	Sgl_set_sign(resultp1,/using/sign_save);
				2568	if (Sglext_isnotzero_mantissap2(resultp2)) {
				2569	inexact = TRUE;
				2570	switch(Rounding_mode()) {
				2571	case ROUNDNEAREST: /* The default. */
				2572	if (Sglext_isone_highp2(resultp2)) {
				2573	/* at least 1/2 ulp */
				2574	if (Sglext_isnotzero_low31p2(resultp2) \|\|
				2575	Sglext_isone_lowp1(resultp1)) {
				2576	/* either exactly half way and odd or
				2577	* more than 1/2ulp */
				2578	Sgl_increment(resultp1);
				2579	}
				2580	}
				2581	break;
				2582
				2583	case ROUNDPLUS:
				2584	if (Sgl_iszero_sign(resultp1)) {
				2585	/* Round up positive results */
				2586	Sgl_increment(resultp1);
				2587	}
				2588	break;
				2589
				2590	case ROUNDMINUS:
				2591	if (Sgl_isone_sign(resultp1)) {
				2592	/* Round down negative results */
				2593	Sgl_increment(resultp1);
				2594	}
				2595
				2596	case ROUNDZERO:;
				2597	/* truncate is simple */
				2598	} /* end switch... */
				2599	if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
				2600	}
				2601	if (result_exponent >= SGL_INFINITY_EXPONENT) {
				2602	/* Overflow */
				2603	if (Is_overflowtrap_enabled()) {
				2604	/*
				2605	* Adjust bias of result
				2606	*/
				2607	Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
				2608	Sgl_copytoptr(resultp1,dstptr);
				2609	if (inexact)
				2610	if (Is_inexacttrap_enabled())
				2611	return (OPC_2E_OVERFLOWEXCEPTION \|
				2612	OPC_2E_INEXACTEXCEPTION);
				2613	else Set_inexactflag();
				2614	return (OPC_2E_OVERFLOWEXCEPTION);
				2615	}
				2616	inexact = TRUE;
				2617	Set_overflowflag();
				2618	Sgl_setoverflow(resultp1);
				2619	} else if (result_exponent <= 0) { /* underflow case */
				2620	if (Is_underflowtrap_enabled()) {
				2621	/*
				2622	* Adjust bias of result
				2623	*/
				2624	Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
				2625	Sgl_copytoptr(resultp1,dstptr);
				2626	if (inexact)
				2627	if (Is_inexacttrap_enabled())
				2628	return (OPC_2E_UNDERFLOWEXCEPTION \|
				2629	OPC_2E_INEXACTEXCEPTION);
				2630	else Set_inexactflag();
				2631	return(OPC_2E_UNDERFLOWEXCEPTION);
				2632	}
				2633	else if (inexact && is_tiny) Set_underflowflag();
				2634	}
				2635	else Sgl_set_exponent(resultp1,result_exponent);
				2636	Sgl_copytoptr(resultp1,dstptr);
				2637	if (inexact)
				2638	if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
				2639	else Set_inexactflag();
				2640	return(NOEXCEPTION);
				2641	}
				2642