Blame - arch/mips/math-emu/sp_maddf.c - SHIFTPHONES/kernel/common

blob: 07ba675401e24172ea762ec3e62b6c65702a822b [file] [log] [blame]

Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	1	/*
				2	* IEEE754 floating point arithmetic
				3	* single precision: MADDF.f (Fused Multiply Add)
				4	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
				5	*
				6	* MIPS floating point support
				7	* Copyright (C) 2015 Imagination Technologies, Ltd.
				8	* Author: Markos Chandras <markos.chandras@imgtec.com>
				9	*
				10	* This program is free software; you can distribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the
				12	* Free Software Foundation; version 2 of the License.
				13	*/
				14
				15	#include "ieee754sp.h"
				16
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	17
				18	static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
				19	union ieee754sp y, enum maddf_flags flags)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	20	{
				21	int re;
				22	int rs;
Aleksandar Markovic	a58f85b	2017-11-02 12:13:59 +0100	[diff] [blame]	23	unsigned int rm;
				24	u64 rm64;
				25	u64 zm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	26	int s;
				27
				28	COMPXSP;
				29	COMPYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	30	COMPZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	31
				32	EXPLODEXSP;
				33	EXPLODEYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	34	EXPLODEZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	35
				36	FLUSHXSP;
				37	FLUSHYSP;
Paul Burton	e2d11e1	2016-04-21 14:04:51 +0100	[diff] [blame]	38	FLUSHZSP;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	39
				40	ieee754_clearcx();
				41
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	42	/*
				43	* Handle the cases when at least one of x, y or z is a NaN.
				44	* Order of precedence is sNaN, qNaN and z, x, y.
				45	*/
				46	if (zc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	47	return ieee754sp_nanxcpt(z);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	48	if (xc == IEEE754_CLASS_SNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	49	return ieee754sp_nanxcpt(x);
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	50	if (yc == IEEE754_CLASS_SNAN)
				51	return ieee754sp_nanxcpt(y);
				52	if (zc == IEEE754_CLASS_QNAN)
				53	return z;
				54	if (xc == IEEE754_CLASS_QNAN)
				55	return x;
				56	if (yc == IEEE754_CLASS_QNAN)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	57	return y;
				58
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	59	if (zc == IEEE754_CLASS_DNORM)
				60	SPDNORMZ;
				61	/* ZERO z cases are handled separately below */
				62
				63	switch (CLPAIR(xc, yc)) {
				64
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	65
				66	/*
				67	* Infinity handling
				68	*/
				69	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
				70	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	71	ieee754_setcx(IEEE754_INVALID_OPERATION);
				72	return ieee754sp_indef();
				73
				74	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
				75	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
				76	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
				77	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
				78	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	79	if ((zc == IEEE754_CLASS_INF) &&
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	80	((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) \|\|
				81	((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	82	/*
				83	* Cases of addition of infinities with opposite signs
				84	* or subtraction of infinities with same signs.
				85	*/
				86	ieee754_setcx(IEEE754_INVALID_OPERATION);
				87	return ieee754sp_indef();
				88	}
				89	/*
				90	* z is here either not an infinity, or an infinity having the
				91	* same sign as product (x*y) (in case of MADDF.D instruction)
				92	* or product -(x*y) (in MSUBF.D case). The result must be an
				93	* infinity, and its sign is determined only by the value of
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	94	* (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	95	*/
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	96	if (flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	0c64fe6	2017-07-27 18:08:55 +0200	[diff] [blame]	97	return ieee754sp_inf(1 ^ (xs ^ ys));
				98	else
				99	return ieee754sp_inf(xs ^ ys);
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	100
				101	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
				102	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
				103	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
				104	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
				105	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
				106	if (zc == IEEE754_CLASS_INF)
				107	return ieee754sp_inf(zs);
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	108	if (zc == IEEE754_CLASS_ZERO) {
				109	/* Handle cases +0 + (-0) and similar ones. */
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	110	if ((!(flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	111	&& (zs == (xs ^ ys))) \|\|
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	112	((flags & MADDF_NEGATE_PRODUCT)
Aleksandar Markovic	7cf64ce	2017-07-27 18:08:56 +0200	[diff] [blame]	113	&& (zs != (xs ^ ys))))
				114	/*
				115	* Cases of addition of zeros of equal signs
				116	* or subtraction of zeroes of opposite signs.
				117	* The sign of the resulting zero is in any
				118	* such case determined only by the sign of z.
				119	*/
				120	return z;
				121
				122	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
				123	}
				124	/* xy is here 0, and z is not 0, so just return z /
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	125	return z;
				126
				127	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
				128	SPDNORMX;
Aleksandar Markovic	2a14b21	2017-11-02 12:14:05 +0100	[diff] [blame]	129	/* fall through */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	130
				131	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	132	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	133	return ieee754sp_inf(zs);
				134	SPDNORMY;
				135	break;
				136
				137	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	138	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	139	return ieee754sp_inf(zs);
				140	SPDNORMX;
				141	break;
				142
				143	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
Aleksandar Markovic	e840be6	2017-07-27 18:08:54 +0200	[diff] [blame]	144	if (zc == IEEE754_CLASS_INF)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	145	return ieee754sp_inf(zs);
Aleksandar Markovic	2a14b21	2017-11-02 12:14:05 +0100	[diff] [blame]	146	/* continue to real computations */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	147	}
				148
				149	/* Finally get to do some computation */
				150
				151	/*
				152	* Do the multiplication bit first
				153	*
				154	* rm = xm * ym, re = xe + ye basically
				155	*
				156	* At this point xm and ym should have been normalized.
				157	*/
				158
				159	/* rm = xm * ym, re = xe+ye basically */
				160	assert(xm & SP_HIDDEN_BIT);
				161	assert(ym & SP_HIDDEN_BIT);
				162
				163	re = xe + ye;
				164	rs = xs ^ ys;
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	165	if (flags & MADDF_NEGATE_PRODUCT)
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	166	rs ^= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	167
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	168	/* Multiple 24 bit xm and ym to give 48 bit results */
				169	rm64 = (uint64_t)xm * ym;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	170
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	171	/* Shunt to top of word */
				172	rm64 = rm64 << 16;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	173
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	174	/* Put explicit bit at bit 62 if necessary */
				175	if ((int64_t) rm64 < 0) {
				176	rm64 = rm64 >> 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	177	re++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	178	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	179
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	180	assert(rm64 & (1 << 62));
				181
				182	if (zc == IEEE754_CLASS_ZERO) {
				183	/*
				184	* Move explicit bit from bit 62 to bit 26 since the
				185	* ieee754sp_format code expects the mantissa to be
				186	* 27 bits wide (24 + 3 rounding bits).
				187	*/
				188	rm = XSPSRS64(rm64, (62 - 26));
Aleksandar Markovic	ddbfff7	2017-06-19 17:50:12 +0200	[diff] [blame]	189	return ieee754sp_format(rs, re, rm);
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	190	}
Aleksandar Markovic	ddbfff7	2017-06-19 17:50:12 +0200	[diff] [blame]	191
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	192	/* Move explicit bit from bit 23 to bit 62 */
				193	zm64 = (uint64_t)zm << (62 - 23);
				194	assert(zm64 & (1 << 62));
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	195
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	196	/* Make the exponents the same */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	197	if (ze > re) {
				198	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	199	* Have to shift r fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	200	*/
				201	s = ze - re;
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	202	rm64 = XSPSRS64(rm64, s);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	203	re += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	204	} else if (re > ze) {
				205	/*
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	206	* Have to shift z fraction right to align.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	207	*/
				208	s = re - ze;
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	209	zm64 = XSPSRS64(zm64, s);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	210	ze += s;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	211	}
				212	assert(ze == re);
				213	assert(ze <= SP_EMAX);
				214
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	215	/* Do the addition */
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	216	if (zs == rs) {
				217	/*
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	218	* Generate 64 bit result by adding two 63 bit numbers
				219	* leaving result in zm64, zs and ze.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	220	*/
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	221	zm64 = zm64 + rm64;
				222	if ((int64_t)zm64 < 0) { /* carry out */
				223	zm64 = XSPSRS1(zm64);
Paul Burton	db57f29	2016-04-21 14:04:54 +0100	[diff] [blame]	224	ze++;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	225	}
				226	} else {
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	227	if (zm64 >= rm64) {
				228	zm64 = zm64 - rm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	229	} else {
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	230	zm64 = rm64 - zm64;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	231	zs = rs;
				232	}
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	233	if (zm64 == 0)
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	234	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
				235
				236	/*
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	237	* Put explicit bit at bit 62 if necessary.
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	238	*/
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	239	while ((zm64 >> 62) == 0) {
				240	zm64 <<= 1;
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	241	ze--;
				242	}
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	243	}
Douglas Leung	b3b8e1e	2017-07-27 18:08:58 +0200	[diff] [blame]	244
				245	/*
				246	* Move explicit bit from bit 62 to bit 26 since the
				247	* ieee754sp_format code expects the mantissa to be
				248	* 27 bits wide (24 + 3 rounding bits).
				249	*/
				250	zm = XSPSRS64(zm64, (62 - 26));
				251
Markos Chandras	e24c3be	2015-08-13 09:56:31 +0200	[diff] [blame]	252	return ieee754sp_format(zs, ze, zm);
				253	}
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	254
				255	union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
				256	union ieee754sp y)
				257	{
				258	return _sp_maddf(z, x, y, 0);
				259	}
				260
				261	union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
				262	union ieee754sp y)
				263	{
Aleksandar Markovic	ae11c06	2017-07-27 18:08:57 +0200	[diff] [blame]	264	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
Paul Burton	6162051	2016-04-21 14:04:49 +0100	[diff] [blame]	265	}