Blame - arch/powerpc/kernel/vecemu.c - SHIFTPHONES/mainline/linux

blob: 604d0947cb20cd87dcad9f7e512dd5a63ddd05d4 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Routines to emulate some Altivec/VMX instructions, specifically
				3	* those that can trap when given denormalized operands in Java mode.
				4	*/
				5	#include <linux/kernel.h>
				6	#include <linux/errno.h>
				7	#include <linux/sched.h>
				8	#include <asm/ptrace.h>
				9	#include <asm/processor.h>
				10	#include <asm/uaccess.h>
				11
				12	/* Functions in vector.S */
				13	extern void vaddfp(vector128 dst, vector128 a, vector128 *b);
				14	extern void vsubfp(vector128 dst, vector128 a, vector128 *b);
				15	extern void vmaddfp(vector128 dst, vector128 a, vector128 b, vector128 c);
				16	extern void vnmsubfp(vector128 dst, vector128 a, vector128 b, vector128 c);
				17	extern void vrefp(vector128 dst, vector128 src);
				18	extern void vrsqrtefp(vector128 dst, vector128 src);
				19	extern void vexptep(vector128 dst, vector128 src);
				20
				21	static unsigned int exp2s[8] = {
				22	0x800000,
				23	0x8b95c2,
				24	0x9837f0,
				25	0xa5fed7,
				26	0xb504f3,
				27	0xc5672a,
				28	0xd744fd,
				29	0xeac0c7
				30	};
				31
				32	/*
				33	* Computes an estimate of 2^x. The `s' argument is the 32-bit
				34	* single-precision floating-point representation of x.
				35	*/
				36	static unsigned int eexp2(unsigned int s)
				37	{
				38	int exp, pwr;
				39	unsigned int mant, frac;
				40
				41	/* extract exponent field from input */
				42	exp = ((s >> 23) & 0xff) - 127;
				43	if (exp > 7) {
				44	/* check for NaN input */
				45	if (exp == 128 && (s & 0x7fffff) != 0)
				46	return s \| 0x400000; /* return QNaN */
				47	/* 2^-big = 0, 2^+big = +Inf */
				48	return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
				49	}
				50	if (exp < -23)
				51	return 0x3f800000; /* 1.0 */
				52
				53	/* convert to fixed point integer in 9.23 representation */
				54	pwr = (s & 0x7fffff) \| 0x800000;
				55	if (exp > 0)
				56	pwr <<= exp;
				57	else
				58	pwr >>= -exp;
				59	if (s & 0x80000000)
				60	pwr = -pwr;
				61
				62	/* extract integer part, which becomes exponent part of result */
				63	exp = (pwr >> 23) + 126;
				64	if (exp >= 254)
				65	return 0x7f800000;
				66	if (exp < -23)
				67	return 0;
				68
				69	/* table lookup on top 3 bits of fraction to get mantissa */
				70	mant = exp2s[(pwr >> 20) & 7];
				71
				72	/* linear interpolation using remaining 20 bits of fraction */
				73	asm("mulhwu %0,%1,%2" : "=r" (frac)
				74	: "r" (pwr << 12), "r" (0x172b83ff));
				75	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
				76	mant += frac;
				77
				78	if (exp >= 0)
				79	return mant + (exp << 23);
				80
				81	/* denormalized result */
				82	exp = -exp;
				83	mant += 1 << (exp - 1);
				84	return mant >> exp;
				85	}
				86
				87	/*
				88	* Computes an estimate of log_2(x). The `s' argument is the 32-bit
				89	* single-precision floating-point representation of x.
				90	*/
				91	static unsigned int elog2(unsigned int s)
				92	{
				93	int exp, mant, lz, frac;
				94
				95	exp = s & 0x7f800000;
				96	mant = s & 0x7fffff;
				97	if (exp == 0x7f800000) { /* Inf or NaN */
				98	if (mant != 0)
				99	s \|= 0x400000; /* turn NaN into QNaN */
				100	return s;
				101	}
				102	if ((exp \| mant) == 0) /* +0 or -0 */
				103	return 0xff800000; /* return -Inf */
				104
				105	if (exp == 0) {
				106	/* denormalized */
				107	asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
				108	mant <<= lz - 8;
				109	exp = (-118 - lz) << 23;
				110	} else {
				111	mant \|= 0x800000;
				112	exp -= 127 << 23;
				113	}
				114
				115	if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
				116	exp \|= 0x400000; /* 0.5 * 2^23 */
				117	asm("mulhwu %0,%1,%2" : "=r" (mant)
				118	: "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
				119	}
				120	if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
				121	exp \|= 0x200000; /* 0.25 * 2^23 */
				122	asm("mulhwu %0,%1,%2" : "=r" (mant)
				123	: "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
				124	}
				125	if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
				126	exp \|= 0x100000; /* 0.125 * 2^23 */
				127	asm("mulhwu %0,%1,%2" : "=r" (mant)
				128	: "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
				129	}
				130	if (mant > 0x800000) { /* 1.0 * 2^23 */
				131	/* calculate (mant - 1) * 1.381097463 */
				132	/* 1.381097463 == 0.125 / (2^0.125 - 1) */
				133	asm("mulhwu %0,%1,%2" : "=r" (frac)
				134	: "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
				135	exp += frac;
				136	}
				137	s = exp & 0x80000000;
				138	if (exp != 0) {
				139	if (s)
				140	exp = -exp;
				141	asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
				142	lz = 8 - lz;
				143	if (lz > 0)
				144	exp >>= lz;
				145	else if (lz < 0)
				146	exp <<= -lz;
				147	s += ((lz + 126) << 23) + exp;
				148	}
				149	return s;
				150	}
				151
				152	#define VSCR_SAT 1
				153
				154	static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
				155	{
				156	int exp, mant;
				157
				158	exp = (x >> 23) & 0xff;
				159	mant = x & 0x7fffff;
				160	if (exp == 255 && mant != 0)
				161	return 0; /* NaN -> 0 */
				162	exp = exp - 127 + scale;
				163	if (exp < 0)
				164	return 0; /* round towards zero */
				165	if (exp >= 31) {
				166	/* saturate, unless the result would be -2^31 */
				167	if (x + (scale << 23) != 0xcf000000)
				168	*vscrp \|= VSCR_SAT;
				169	return (x & 0x80000000)? 0x80000000: 0x7fffffff;
				170	}
				171	mant \|= 0x800000;
				172	mant = (mant << 7) >> (30 - exp);
				173	return (x & 0x80000000)? -mant: mant;
				174	}
				175
				176	static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
				177	{
				178	int exp;
				179	unsigned int mant;
				180
				181	exp = (x >> 23) & 0xff;
				182	mant = x & 0x7fffff;
				183	if (exp == 255 && mant != 0)
				184	return 0; /* NaN -> 0 */
				185	exp = exp - 127 + scale;
				186	if (exp < 0)
				187	return 0; /* round towards zero */
				188	if (x & 0x80000000) {
				189	/* negative => saturate to 0 */
				190	*vscrp \|= VSCR_SAT;
				191	return 0;
				192	}
				193	if (exp >= 32) {
				194	/* saturate */
				195	*vscrp \|= VSCR_SAT;
				196	return 0xffffffff;
				197	}
				198	mant \|= 0x800000;
				199	mant = (mant << 8) >> (31 - exp);
				200	return mant;
				201	}
				202
				203	/* Round to floating integer, towards 0 */
				204	static unsigned int rfiz(unsigned int x)
				205	{
				206	int exp;
				207
				208	exp = ((x >> 23) & 0xff) - 127;
				209	if (exp == 128 && (x & 0x7fffff) != 0)
				210	return x \| 0x400000; /* NaN -> make it a QNaN */
				211	if (exp >= 23)
				212	return x; /* it's an integer already (or Inf) */
				213	if (exp < 0)
				214	return x & 0x80000000; /* \|x\| < 1.0 rounds to 0 */
				215	return x & ~(0x7fffff >> exp);
				216	}
				217
				218	/* Round to floating integer, towards +/- Inf */
				219	static unsigned int rfii(unsigned int x)
				220	{
				221	int exp, mask;
				222
				223	exp = ((x >> 23) & 0xff) - 127;
				224	if (exp == 128 && (x & 0x7fffff) != 0)
				225	return x \| 0x400000; /* NaN -> make it a QNaN */
				226	if (exp >= 23)
				227	return x; /* it's an integer already (or Inf) */
				228	if ((x & 0x7fffffff) == 0)
				229	return x; /* +/-0 -> +/-0 */
				230	if (exp < 0)
				231	/* 0 < \|x\| < 1.0 rounds to +/- 1.0 */
				232	return (x & 0x80000000) \| 0x3f800000;
				233	mask = 0x7fffff >> exp;
				234	/* mantissa overflows into exponent - that's OK,
				235	it can't overflow into the sign bit */
				236	return (x + mask) & ~mask;
				237	}
				238
				239	/* Round to floating integer, to nearest */
				240	static unsigned int rfin(unsigned int x)
				241	{
				242	int exp, half;
				243
				244	exp = ((x >> 23) & 0xff) - 127;
				245	if (exp == 128 && (x & 0x7fffff) != 0)
				246	return x \| 0x400000; /* NaN -> make it a QNaN */
				247	if (exp >= 23)
				248	return x; /* it's an integer already (or Inf) */
				249	if (exp < -1)
				250	return x & 0x80000000; /* \|x\| < 0.5 -> +/-0 */
				251	if (exp == -1)
				252	/* 0.5 <= \|x\| < 1.0 rounds to +/- 1.0 */
				253	return (x & 0x80000000) \| 0x3f800000;
				254	half = 0x400000 >> exp;
				255	/* add 0.5 to the magnitude and chop off the fraction bits */
				256	return (x + half) & ~(0x7fffff >> exp);
				257	}
				258
				259	int emulate_altivec(struct pt_regs *regs)
				260	{
				261	unsigned int instr, i;
				262	unsigned int va, vb, vc, vd;
				263	vector128 *vrs;
				264
				265	if (get_user(instr, (unsigned int __user *) regs->nip))
				266	return -EFAULT;
				267	if ((instr >> 26) != 4)
				268	return -EINVAL; /* not an altivec instruction */
				269	vd = (instr >> 21) & 0x1f;
				270	va = (instr >> 16) & 0x1f;
				271	vb = (instr >> 11) & 0x1f;
				272	vc = (instr >> 6) & 0x1f;
				273
				274	vrs = current->thread.vr;
				275	switch (instr & 0x3f) {
				276	case 10:
				277	switch (vc) {
				278	case 0: /* vaddfp */
				279	vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
				280	break;
				281	case 1: /* vsubfp */
				282	vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
				283	break;
				284	case 4: /* vrefp */
				285	vrefp(&vrs[vd], &vrs[vb]);
				286	break;
				287	case 5: /* vrsqrtefp */
				288	vrsqrtefp(&vrs[vd], &vrs[vb]);
				289	break;
				290	case 6: /* vexptefp */
				291	for (i = 0; i < 4; ++i)
				292	vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
				293	break;
				294	case 7: /* vlogefp */
				295	for (i = 0; i < 4; ++i)
				296	vrs[vd].u[i] = elog2(vrs[vb].u[i]);
				297	break;
				298	case 8: /* vrfin */
				299	for (i = 0; i < 4; ++i)
				300	vrs[vd].u[i] = rfin(vrs[vb].u[i]);
				301	break;
				302	case 9: /* vrfiz */
				303	for (i = 0; i < 4; ++i)
				304	vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
				305	break;
				306	case 10: /* vrfip */
				307	for (i = 0; i < 4; ++i) {
				308	u32 x = vrs[vb].u[i];
				309	x = (x & 0x80000000)? rfiz(x): rfii(x);
				310	vrs[vd].u[i] = x;
				311	}
				312	break;
				313	case 11: /* vrfim */
				314	for (i = 0; i < 4; ++i) {
				315	u32 x = vrs[vb].u[i];
				316	x = (x & 0x80000000)? rfii(x): rfiz(x);
				317	vrs[vd].u[i] = x;
				318	}
				319	break;
				320	case 14: /* vctuxs */
				321	for (i = 0; i < 4; ++i)
				322	vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
				323	&current->thread.vscr.u[3]);
				324	break;
				325	case 15: /* vctsxs */
				326	for (i = 0; i < 4; ++i)
				327	vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
				328	&current->thread.vscr.u[3]);
				329	break;
				330	default:
				331	return -EINVAL;
				332	}
				333	break;
				334	case 46: /* vmaddfp */
				335	vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
				336	break;
				337	case 47: /* vnmsubfp */
				338	vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
				339	break;
				340	default:
				341	return -EINVAL;
				342	}
				343
				344	return 0;
				345	}