Blame - arch/x86/math-emu/poly_sin.c - SHIFTPHONES/kernel/shift/mainline

blob: c192fba51526455d00adf6917fb9c89c6ba6bc1a [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*---------------------------------------------------------------------------+
				3	\| poly_sin.c \|
				4	\| \|
				5	\| Computation of an approximation of the sin function and the cosine \|
				6	\| function by a polynomial. \|
				7	\| \|
				8	\| Copyright (C) 1992,1993,1994,1997,1999 \|
				9	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
				10	\| E-mail billm@melbpc.org.au \|
				11	\| \|
				12	\| \|
				13	+---------------------------------------------------------------------------*/
				14
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15	#include "exception.h"
				16	#include "reg_constant.h"
				17	#include "fpu_emu.h"
				18	#include "fpu_system.h"
				19	#include "control_w.h"
				20	#include "poly.h"
				21
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22	#define N_COEFF_P 4
				23	#define N_COEFF_N 4
				24
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	25	static const unsigned long long pos_terms_l[N_COEFF_P] = {
				26	0xaaaaaaaaaaaaaaabLL,
				27	0x00d00d00d00cf906LL,
				28	0x000006b99159a8bbLL,
				29	0x000000000d7392e6LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	30	};
				31
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	32	static const unsigned long long neg_terms_l[N_COEFF_N] = {
				33	0x2222222222222167LL,
				34	0x0002e3bc74aab624LL,
				35	0x0000000b09229062LL,
				36	0x00000000000c7973LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	};
				38
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	39	#define N_COEFF_PH 4
				40	#define N_COEFF_NH 4
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	41	static const unsigned long long pos_terms_h[N_COEFF_PH] = {
				42	0x0000000000000000LL,
				43	0x05b05b05b05b0406LL,
				44	0x000049f93edd91a9LL,
				45	0x00000000c9c9ed62LL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	46	};
				47
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	48	static const unsigned long long neg_terms_h[N_COEFF_NH] = {
				49	0xaaaaaaaaaaaaaa98LL,
				50	0x001a01a01a019064LL,
				51	0x0000008f76c68a77LL,
				52	0x0000000000d58f5eLL
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	};
				54
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	55	/*--- poly_sine() -----------------------------------------------------------+
				56	\| \|
				57	+---------------------------------------------------------------------------*/
Ingo Molnar	e8d591d	2008-01-30 13:30:12 +0100	[diff] [blame]	58	void poly_sine(FPU_REG *st0_ptr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	59	{
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	60	int exponent, echange;
				61	Xsig accumulator, argSqrd, argTo4;
				62	unsigned long fix_up, adj;
				63	unsigned long long fixed_arg;
				64	FPU_REG result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	65
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	66	exponent = exponent(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	67
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	68	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	69
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	70	/* Split into two ranges, for arguments below and above 1.0 */
				71	/* The boundary between upper and lower is approx 0.88309101259 */
				72	if ((exponent < -1)
				73	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
				74	/* The argument is <= 0.88309101259 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	75
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	76	argSqrd.msw = st0_ptr->sigh;
				77	argSqrd.midw = st0_ptr->sigl;
				78	argSqrd.lsw = 0;
				79	mul64_Xsig(&argSqrd, &significand(st0_ptr));
				80	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				81	argTo4.msw = argSqrd.msw;
				82	argTo4.midw = argSqrd.midw;
				83	argTo4.lsw = argSqrd.lsw;
				84	mul_Xsig_Xsig(&argTo4, &argTo4);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	85
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	86	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				87	N_COEFF_N - 1);
				88	mul_Xsig_Xsig(&accumulator, &argSqrd);
				89	negate_Xsig(&accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	90
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	91	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				92	N_COEFF_P - 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	93
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	94	shr_Xsig(&accumulator, 2); /* Divide by four */
				95	accumulator.msw \|= 0x80000000; /* Add 1.0 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	96
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	97	mul64_Xsig(&accumulator, &significand(st0_ptr));
				98	mul64_Xsig(&accumulator, &significand(st0_ptr));
				99	mul64_Xsig(&accumulator, &significand(st0_ptr));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	100
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	101	/* Divide by four, FPU_REG compatible, etc */
				102	exponent = 3 * exponent;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	103
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	104	/* The minimum exponent difference is 3 */
				105	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	106
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	107	negate_Xsig(&accumulator);
				108	XSIG_LL(accumulator) += significand(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	109
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	110	echange = round_Xsig(&accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	111
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	112	setexponentpos(&result, exponent(st0_ptr) + echange);
				113	} else {
				114	/* The argument is > 0.88309101259 */
				115	/* We use sin(st(0)) = cos(pi/2-st(0)) */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	116
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	117	fixed_arg = significand(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	118
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	119	if (exponent == 0) {
				120	/* The argument is >= 1.0 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	121
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	122	/* Put the binary point at the left. */
				123	fixed_arg <<= 1;
				124	}
				125	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				126	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				127	/* There is a special case which arises due to rounding, to fix here. */
				128	if (fixed_arg == 0xffffffffffffffffLL)
				129	fixed_arg = 0;
				130
				131	XSIG_LL(argSqrd) = fixed_arg;
				132	argSqrd.lsw = 0;
				133	mul64_Xsig(&argSqrd, &fixed_arg);
				134
				135	XSIG_LL(argTo4) = XSIG_LL(argSqrd);
				136	argTo4.lsw = argSqrd.lsw;
				137	mul_Xsig_Xsig(&argTo4, &argTo4);
				138
				139	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				140	N_COEFF_NH - 1);
				141	mul_Xsig_Xsig(&accumulator, &argSqrd);
				142	negate_Xsig(&accumulator);
				143
				144	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				145	N_COEFF_PH - 1);
				146	negate_Xsig(&accumulator);
				147
				148	mul64_Xsig(&accumulator, &fixed_arg);
				149	mul64_Xsig(&accumulator, &fixed_arg);
				150
				151	shr_Xsig(&accumulator, 3);
				152	negate_Xsig(&accumulator);
				153
				154	add_Xsig_Xsig(&accumulator, &argSqrd);
				155
				156	shr_Xsig(&accumulator, 1);
				157
				158	accumulator.lsw \|= 1; /* A zero accumulator here would cause problems */
				159	negate_Xsig(&accumulator);
				160
				161	/* The basic computation is complete. Now fix the answer to
				162	compensate for the error due to the approximation used for
				163	pi/2
				164	*/
				165
				166	/* This has an exponent of -65 */
				167	fix_up = 0x898cc517;
				168	/* The fix-up needs to be improved for larger args */
				169	if (argSqrd.msw & 0xffc00000) {
				170	/* Get about 32 bit precision in these: */
				171	fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
				172	}
				173	fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
				174
				175	adj = accumulator.lsw; /* temp save */
				176	accumulator.lsw -= fix_up;
				177	if (accumulator.lsw > adj)
				178	XSIG_LL(accumulator)--;
				179
				180	echange = round_Xsig(&accumulator);
				181
				182	setexponentpos(&result, echange - 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	183	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	184
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	185	significand(&result) = XSIG_LL(accumulator);
				186	setsign(&result, getsign(st0_ptr));
				187	FPU_copy_to_reg0(&result, TAG_Valid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	188
				189	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	190	if ((exponent(&result) >= 0)
				191	&& (significand(&result) > 0x8000000000000000LL)) {
				192	EXCEPTION(EX_INTERNAL \| 0x150);
				193	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	194	#endif /* PARANOID */
				195
				196	}
				197
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	/*--- poly_cos() ------------------------------------------------------------+
				199	\| \|
				200	+---------------------------------------------------------------------------*/
Ingo Molnar	e8d591d	2008-01-30 13:30:12 +0100	[diff] [blame]	201	void poly_cos(FPU_REG *st0_ptr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	202	{
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	203	FPU_REG result;
				204	long int exponent, exp2, echange;
				205	Xsig accumulator, argSqrd, fix_up, argTo4;
				206	unsigned long long fixed_arg;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	207
				208	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	209	if ((exponent(st0_ptr) > 0)
				210	\|\| ((exponent(st0_ptr) == 0)
				211	&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
				212	EXCEPTION(EX_Invalid);
				213	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
				214	return;
				215	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	216	#endif /* PARANOID */
				217
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	218	exponent = exponent(st0_ptr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	219
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	220	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	221
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	222	if ((exponent < -1)
				223	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
				224	/* arg is < 0.687705 */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	225
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	226	argSqrd.msw = st0_ptr->sigh;
				227	argSqrd.midw = st0_ptr->sigl;
				228	argSqrd.lsw = 0;
				229	mul64_Xsig(&argSqrd, &significand(st0_ptr));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	230
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	231	if (exponent < -1) {
				232	/* shift the argument right by the required places */
				233	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				234	}
				235
				236	argTo4.msw = argSqrd.msw;
				237	argTo4.midw = argSqrd.midw;
				238	argTo4.lsw = argSqrd.lsw;
				239	mul_Xsig_Xsig(&argTo4, &argTo4);
				240
				241	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				242	N_COEFF_NH - 1);
				243	mul_Xsig_Xsig(&accumulator, &argSqrd);
				244	negate_Xsig(&accumulator);
				245
				246	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				247	N_COEFF_PH - 1);
				248	negate_Xsig(&accumulator);
				249
				250	mul64_Xsig(&accumulator, &significand(st0_ptr));
				251	mul64_Xsig(&accumulator, &significand(st0_ptr));
				252	shr_Xsig(&accumulator, -2 * (1 + exponent));
				253
				254	shr_Xsig(&accumulator, 3);
				255	negate_Xsig(&accumulator);
				256
				257	add_Xsig_Xsig(&accumulator, &argSqrd);
				258
				259	shr_Xsig(&accumulator, 1);
				260
				261	/* It doesn't matter if accumulator is all zero here, the
				262	following code will work ok */
				263	negate_Xsig(&accumulator);
				264
				265	if (accumulator.lsw & 0x80000000)
				266	XSIG_LL(accumulator)++;
				267	if (accumulator.msw == 0) {
				268	/* The result is 1.0 */
				269	FPU_copy_to_reg0(&CONST_1, TAG_Valid);
				270	return;
				271	} else {
				272	significand(&result) = XSIG_LL(accumulator);
				273
				274	/* will be a valid positive nr with expon = -1 */
				275	setexponentpos(&result, -1);
				276	}
				277	} else {
				278	fixed_arg = significand(st0_ptr);
				279
				280	if (exponent == 0) {
				281	/* The argument is >= 1.0 */
				282
				283	/* Put the binary point at the left. */
				284	fixed_arg <<= 1;
				285	}
				286	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
				287	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
				288	/* There is a special case which arises due to rounding, to fix here. */
				289	if (fixed_arg == 0xffffffffffffffffLL)
				290	fixed_arg = 0;
				291
				292	exponent = -1;
				293	exp2 = -1;
				294
				295	/* A shift is needed here only for a narrow range of arguments,
				296	i.e. for fixed_arg approx 2^-32, but we pick up more... */
				297	if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
				298	fixed_arg <<= 16;
				299	exponent -= 16;
				300	exp2 -= 16;
				301	}
				302
				303	XSIG_LL(argSqrd) = fixed_arg;
				304	argSqrd.lsw = 0;
				305	mul64_Xsig(&argSqrd, &fixed_arg);
				306
				307	if (exponent < -1) {
				308	/* shift the argument right by the required places */
				309	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
				310	}
				311
				312	argTo4.msw = argSqrd.msw;
				313	argTo4.midw = argSqrd.midw;
				314	argTo4.lsw = argSqrd.lsw;
				315	mul_Xsig_Xsig(&argTo4, &argTo4);
				316
				317	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				318	N_COEFF_N - 1);
				319	mul_Xsig_Xsig(&accumulator, &argSqrd);
				320	negate_Xsig(&accumulator);
				321
				322	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				323	N_COEFF_P - 1);
				324
				325	shr_Xsig(&accumulator, 2); /* Divide by four */
				326	accumulator.msw \|= 0x80000000; /* Add 1.0 */
				327
				328	mul64_Xsig(&accumulator, &fixed_arg);
				329	mul64_Xsig(&accumulator, &fixed_arg);
				330	mul64_Xsig(&accumulator, &fixed_arg);
				331
				332	/* Divide by four, FPU_REG compatible, etc */
				333	exponent = 3 * exponent;
				334
				335	/* The minimum exponent difference is 3 */
				336	shr_Xsig(&accumulator, exp2 - exponent);
				337
				338	negate_Xsig(&accumulator);
				339	XSIG_LL(accumulator) += fixed_arg;
				340
				341	/* The basic computation is complete. Now fix the answer to
				342	compensate for the error due to the approximation used for
				343	pi/2
				344	*/
				345
				346	/* This has an exponent of -65 */
				347	XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
				348	fix_up.lsw = 0;
				349
				350	/* The fix-up needs to be improved for larger args */
				351	if (argSqrd.msw & 0xffc00000) {
				352	/* Get about 32 bit precision in these: */
				353	fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
				354	fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
				355	}
				356
				357	exp2 += norm_Xsig(&accumulator);
				358	shr_Xsig(&accumulator, 1); /* Prevent overflow */
				359	exp2++;
				360	shr_Xsig(&fix_up, 65 + exp2);
				361
				362	add_Xsig_Xsig(&accumulator, &fix_up);
				363
				364	echange = round_Xsig(&accumulator);
				365
				366	setexponentpos(&result, exp2 + echange);
				367	significand(&result) = XSIG_LL(accumulator);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	368	}
				369
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	370	FPU_copy_to_reg0(&result, TAG_Valid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	371
				372	#ifdef PARANOID
Ingo Molnar	3d0d14f	2008-01-30 13:30:11 +0100	[diff] [blame]	373	if ((exponent(&result) >= 0)
				374	&& (significand(&result) > 0x8000000000000000LL)) {
				375	EXCEPTION(EX_INTERNAL \| 0x151);
				376	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	377	#endif /* PARANOID */
				378
				379	}