Blame - arch/ia64/kernel/unaligned.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: 1e357550c776c20d7ffa7ac4a57e59baee6adfd7 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Architecture-specific unaligned trap handling.
				3	*
				4	* Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
				5	* Stephane Eranian <eranian@hpl.hp.com>
				6	* David Mosberger-Tang <davidm@hpl.hp.com>
				7	*
				8	* 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
				9	* get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
				10	* stacked register returns an undefined value; it does NOT trigger a
				11	* "rsvd register fault").
				12	* 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
				13	* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
				14	* 2001/01/17 Add support emulation of unaligned kernel accesses.
				15	*/
				16	#include <linux/kernel.h>
				17	#include <linux/sched.h>
				18	#include <linux/smp_lock.h>
				19	#include <linux/tty.h>
				20
				21	#include <asm/intrinsics.h>
				22	#include <asm/processor.h>
				23	#include <asm/rse.h>
				24	#include <asm/uaccess.h>
				25	#include <asm/unaligned.h>
				26
Tony Luck	e963701	2006-02-27 16:18:58 -0800	[diff] [blame]	27	extern void die_if_kernel(char str, struct pt_regs regs, long err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28
				29	#undef DEBUG_UNALIGNED_TRAP
				30
				31	#ifdef DEBUG_UNALIGNED_TRAP
				32	# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
				33	# define DDUMP(str,vp,len) dump(str, vp, len)
				34
				35	static void
				36	dump (const char str, void vp, size_t len)
				37	{
				38	unsigned char *cp = vp;
				39	int i;
				40
				41	printk("%s", str);
				42	for (i = 0; i < len; ++i)
				43	printk (" %02x", *cp++);
				44	printk("\n");
				45	}
				46	#else
				47	# define DPRINT(a...)
				48	# define DDUMP(str,vp,len)
				49	#endif
				50
				51	#define IA64_FIRST_STACKED_GR 32
				52	#define IA64_FIRST_ROTATING_FR 32
				53	#define SIGN_EXT9 0xffffffffffffff00ul
				54
				55	/*
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame^]	56	* sysctl settable hook which tells the kernel whether to honor the
				57	* IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
				58	* to allow the super user to enable/disable this for security reasons
				59	* (i.e. don't allow attacker to fill up logs with unaligned accesses).
				60	*/
				61	int no_unaligned_warning;
				62	static int noprint_warning;
				63
				64	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	65	* For M-unit:
				66	*
				67	* opcode \| m \| x6 \|
				68	* --------\|------\|---------\|
				69	* [40-37] \| [36] \| [35:30] \|
				70	* --------\|------\|---------\|
				71	* 4 \| 1 \| 6 \| = 11 bits
				72	* --------------------------
				73	* However bits [31:30] are not directly useful to distinguish between
				74	* load/store so we can use [35:32] instead, which gives the following
				75	* mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
				76	* checking the m-bit until later in the load/store emulation.
				77	*/
				78	#define IA64_OPCODE_MASK 0x1ef
				79	#define IA64_OPCODE_SHIFT 32
				80
				81	/*
				82	* Table C-28 Integer Load/Store
				83	*
				84	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				85	*
				86	* ld8.fill, st8.fill MUST be aligned because the RNATs are based on
				87	* the address (bits [8:3]), so we must failed.
				88	*/
				89	#define LD_OP 0x080
				90	#define LDS_OP 0x081
				91	#define LDA_OP 0x082
				92	#define LDSA_OP 0x083
				93	#define LDBIAS_OP 0x084
				94	#define LDACQ_OP 0x085
				95	/* 0x086, 0x087 are not relevant */
				96	#define LDCCLR_OP 0x088
				97	#define LDCNC_OP 0x089
				98	#define LDCCLRACQ_OP 0x08a
				99	#define ST_OP 0x08c
				100	#define STREL_OP 0x08d
				101	/* 0x08e,0x8f are not relevant */
				102
				103	/*
				104	* Table C-29 Integer Load +Reg
				105	*
				106	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				107	* a load/store of this form.
				108	*/
				109
				110	/*
				111	* Table C-30 Integer Load/Store +Imm
				112	*
				113	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				114	*
				115	* ld8.fill, st8.fill must be aligned because the Nat register are based on
				116	* the address, so we must fail and the program must be fixed.
				117	*/
				118	#define LD_IMM_OP 0x0a0
				119	#define LDS_IMM_OP 0x0a1
				120	#define LDA_IMM_OP 0x0a2
				121	#define LDSA_IMM_OP 0x0a3
				122	#define LDBIAS_IMM_OP 0x0a4
				123	#define LDACQ_IMM_OP 0x0a5
				124	/* 0x0a6, 0xa7 are not relevant */
				125	#define LDCCLR_IMM_OP 0x0a8
				126	#define LDCNC_IMM_OP 0x0a9
				127	#define LDCCLRACQ_IMM_OP 0x0aa
				128	#define ST_IMM_OP 0x0ac
				129	#define STREL_IMM_OP 0x0ad
				130	/* 0x0ae,0xaf are not relevant */
				131
				132	/*
				133	* Table C-32 Floating-point Load/Store
				134	*/
				135	#define LDF_OP 0x0c0
				136	#define LDFS_OP 0x0c1
				137	#define LDFA_OP 0x0c2
				138	#define LDFSA_OP 0x0c3
				139	/* 0x0c6 is irrelevant */
				140	#define LDFCCLR_OP 0x0c8
				141	#define LDFCNC_OP 0x0c9
				142	/* 0x0cb is irrelevant */
				143	#define STF_OP 0x0cc
				144
				145	/*
				146	* Table C-33 Floating-point Load +Reg
				147	*
				148	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				149	* a load/store of this form.
				150	*/
				151
				152	/*
				153	* Table C-34 Floating-point Load/Store +Imm
				154	*/
				155	#define LDF_IMM_OP 0x0e0
				156	#define LDFS_IMM_OP 0x0e1
				157	#define LDFA_IMM_OP 0x0e2
				158	#define LDFSA_IMM_OP 0x0e3
				159	/* 0x0e6 is irrelevant */
				160	#define LDFCCLR_IMM_OP 0x0e8
				161	#define LDFCNC_IMM_OP 0x0e9
				162	#define STF_IMM_OP 0x0ec
				163
				164	typedef struct {
				165	unsigned long qp:6; /* [0:5] */
				166	unsigned long r1:7; /* [6:12] */
				167	unsigned long imm:7; /* [13:19] */
				168	unsigned long r3:7; /* [20:26] */
				169	unsigned long x:1; /* [27:27] */
				170	unsigned long hint:2; /* [28:29] */
				171	unsigned long x6_sz:2; /* [30:31] */
				172	unsigned long x6_op:4; /* [32:35], x6 = x6_sz\|x6_op */
				173	unsigned long m:1; /* [36:36] */
				174	unsigned long op:4; /* [37:40] */
				175	unsigned long pad:23; /* [41:63] */
				176	} load_store_t;
				177
				178
				179	typedef enum {
				180	UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
				181	UPD_REG /* ldXZ r1=[r3],r2 */
				182	} update_t;
				183
				184	/*
				185	* We use tables to keep track of the offsets of registers in the saved state.
				186	* This way we save having big switch/case statements.
				187	*
				188	* We use bit 0 to indicate switch_stack or pt_regs.
				189	* The offset is simply shifted by 1 bit.
				190	* A 2-byte value should be enough to hold any kind of offset
				191	*
				192	* In case the calling convention changes (and thus pt_regs/switch_stack)
				193	* simply use RSW instead of RPT or vice-versa.
				194	*/
				195
				196	#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
				197	#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
				198
				199	#define RPT(x) (RPO(x) << 1)
				200	#define RSW(x) (1\| RSO(x)<<1)
				201
				202	#define GR_OFFS(x) (gr_info[x]>>1)
				203	#define GR_IN_SW(x) (gr_info[x] & 0x1)
				204
				205	#define FR_OFFS(x) (fr_info[x]>>1)
				206	#define FR_IN_SW(x) (fr_info[x] & 0x1)
				207
				208	static u16 gr_info[32]={
				209	0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
				210
				211	RPT(r1), RPT(r2), RPT(r3),
				212
				213	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
				214
				215	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
				216	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
				217
				218	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
				219	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
				220	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
				221	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
				222	};
				223
				224	static u16 fr_info[32]={
				225	0, /* constant : WE SHOULD NEVER GET THIS */
				226	0, /* constant : WE SHOULD NEVER GET THIS */
				227
				228	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
				229
				230	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
				231	RPT(f10), RPT(f11),
				232
				233	RSW(f12), RSW(f13), RSW(f14),
				234	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
				235	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
				236	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
				237	RSW(f30), RSW(f31)
				238	};
				239
				240	/* Invalidate ALAT entry for integer register REGNO. */
				241	static void
				242	invala_gr (int regno)
				243	{
				244	# define F(reg) case reg: ia64_invala_gr(reg); break
				245
				246	switch (regno) {
				247	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				248	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				249	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				250	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				251	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				252	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				253	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				254	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				255	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				256	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				257	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				258	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				259	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				260	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				261	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				262	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				263	}
				264	# undef F
				265	}
				266
				267	/* Invalidate ALAT entry for floating-point register REGNO. */
				268	static void
				269	invala_fr (int regno)
				270	{
				271	# define F(reg) case reg: ia64_invala_fr(reg); break
				272
				273	switch (regno) {
				274	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				275	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				276	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				277	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				278	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				279	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				280	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				281	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				282	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				283	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				284	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				285	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				286	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				287	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				288	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				289	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				290	}
				291	# undef F
				292	}
				293
				294	static inline unsigned long
				295	rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
				296	{
				297	reg += rrb;
				298	if (reg >= sor)
				299	reg -= sor;
				300	return reg;
				301	}
				302
				303	static void
				304	set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
				305	{
				306	struct switch_stack sw = (struct switch_stack ) regs - 1;
				307	unsigned long bsp, bspstore, addr, rnat_addr, *ubs_end;
				308	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				309	unsigned long rnats, nat_mask;
				310	unsigned long on_kbs;
				311	long sof = (regs->cr_ifs) & 0x7f;
				312	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				313	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				314	long ridx = r1 - 32;
				315
				316	if (ridx >= sof) {
				317	/* this should never happen, as the "rsvd register fault" has higher priority */
				318	DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
				319	return;
				320	}
				321
				322	if (ridx < sor)
				323	ridx = rotate_reg(sor, rrb_gr, ridx);
				324
				325	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				326	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				327
				328	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				329	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				330	if (addr >= kbs) {
				331	/* the register is on the kernel backing store: easy... */
				332	rnat_addr = ia64_rse_rnat_addr(addr);
				333	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				334	rnat_addr = &sw->ar_rnat;
				335	nat_mask = 1UL << ia64_rse_slot_num(addr);
				336
				337	*addr = val;
				338	if (nat)
				339	*rnat_addr \|= nat_mask;
				340	else
				341	*rnat_addr &= ~nat_mask;
				342	return;
				343	}
				344
				345	if (!user_stack(current, regs)) {
				346	DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
				347	return;
				348	}
				349
				350	bspstore = (unsigned long *)regs->ar_bspstore;
				351	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				352	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				353	addr = ia64_rse_skip_regs(bsp, ridx);
				354
				355	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				356
				357	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				358
				359	rnat_addr = ia64_rse_rnat_addr(addr);
				360
				361	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				362	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
				363	(void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
				364
				365	nat_mask = 1UL << ia64_rse_slot_num(addr);
				366	if (nat)
				367	rnats \|= nat_mask;
				368	else
				369	rnats &= ~nat_mask;
				370	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
				371
				372	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				373	}
				374
				375
				376	static void
				377	get_rse_reg (struct pt_regs regs, unsigned long r1, unsigned long val, int *nat)
				378	{
				379	struct switch_stack sw = (struct switch_stack ) regs - 1;
				380	unsigned long bsp, addr, rnat_addr, ubs_end, *bspstore;
				381	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				382	unsigned long rnats, nat_mask;
				383	unsigned long on_kbs;
				384	long sof = (regs->cr_ifs) & 0x7f;
				385	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				386	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				387	long ridx = r1 - 32;
				388
				389	if (ridx >= sof) {
				390	/* read of out-of-frame register returns an undefined value; 0 in our case. */
				391	DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
				392	goto fail;
				393	}
				394
				395	if (ridx < sor)
				396	ridx = rotate_reg(sor, rrb_gr, ridx);
				397
				398	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				399	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				400
				401	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				402	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				403	if (addr >= kbs) {
				404	/* the register is on the kernel backing store: easy... */
				405	val = addr;
				406	if (nat) {
				407	rnat_addr = ia64_rse_rnat_addr(addr);
				408	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				409	rnat_addr = &sw->ar_rnat;
				410	nat_mask = 1UL << ia64_rse_slot_num(addr);
				411	nat = (rnat_addr & nat_mask) != 0;
				412	}
				413	return;
				414	}
				415
				416	if (!user_stack(current, regs)) {
				417	DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
				418	goto fail;
				419	}
				420
				421	bspstore = (unsigned long *)regs->ar_bspstore;
				422	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				423	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				424	addr = ia64_rse_skip_regs(bsp, ridx);
				425
				426	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				427
				428	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				429
				430	if (nat) {
				431	rnat_addr = ia64_rse_rnat_addr(addr);
				432	nat_mask = 1UL << ia64_rse_slot_num(addr);
				433
				434	DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				435
				436	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				437	*nat = (rnats & nat_mask) != 0;
				438	}
				439	return;
				440
				441	fail:
				442	*val = 0;
				443	if (nat)
				444	*nat = 0;
				445	return;
				446	}
				447
				448
				449	static void
				450	setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				451	{
				452	struct switch_stack sw = (struct switch_stack ) regs - 1;
				453	unsigned long addr;
				454	unsigned long bitmask;
				455	unsigned long *unat;
				456
				457	/*
				458	* First takes care of stacked registers
				459	*/
				460	if (regnum >= IA64_FIRST_STACKED_GR) {
				461	set_rse_reg(regs, regnum, val, nat);
				462	return;
				463	}
				464
				465	/*
				466	* Using r0 as a target raises a General Exception fault which has higher priority
				467	* than the Unaligned Reference fault.
				468	*/
				469
				470	/*
				471	* Now look at registers in [0-31] range and init correct UNAT
				472	*/
				473	if (GR_IN_SW(regnum)) {
				474	addr = (unsigned long)sw;
				475	unat = &sw->ar_unat;
				476	} else {
				477	addr = (unsigned long)regs;
				478	unat = &sw->caller_unat;
				479	}
				480	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
				481	addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
				482	/*
				483	* add offset from base of struct
				484	* and do it !
				485	*/
				486	addr += GR_OFFS(regnum);
				487
				488	(unsigned long )addr = val;
				489
				490	/*
				491	* We need to clear the corresponding UNAT bit to fully emulate the load
				492	* UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
				493	*/
				494	bitmask = 1UL << (addr >> 3 & 0x3f);
				495	DPRINT("0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void ) unat, *unat);
				496	if (nat) {
				497	*unat \|= bitmask;
				498	} else {
				499	*unat &= ~bitmask;
				500	}
				501	DPRINT("0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void ) unat,*unat);
				502	}
				503
				504	/*
				505	* Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
				506	* range from 32-127, result is in the range from 0-95.
				507	*/
				508	static inline unsigned long
				509	fph_index (struct pt_regs *regs, long regnum)
				510	{
				511	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
				512	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
				513	}
				514
				515	static void
				516	setfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				517	{
				518	struct switch_stack sw = (struct switch_stack )regs - 1;
				519	unsigned long addr;
				520
				521	/*
				522	* From EAS-2.5: FPDisableFault has higher priority than Unaligned
				523	* Fault. Thus, when we get here, we know the partition is enabled.
				524	* To update f32-f127, there are three choices:
				525	*
				526	* (1) save f32-f127 to thread.fph and update the values there
				527	* (2) use a gigantic switch statement to directly access the registers
				528	* (3) generate code on the fly to update the desired register
				529	*
				530	* For now, we are using approach (1).
				531	*/
				532	if (regnum >= IA64_FIRST_ROTATING_FR) {
				533	ia64_sync_fph(current);
				534	current->thread.fph[fph_index(regs, regnum)] = *fpval;
				535	} else {
				536	/*
				537	* pt_regs or switch_stack ?
				538	*/
				539	if (FR_IN_SW(regnum)) {
				540	addr = (unsigned long)sw;
				541	} else {
				542	addr = (unsigned long)regs;
				543	}
				544
				545	DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
				546
				547	addr += FR_OFFS(regnum);
				548	(struct ia64_fpreg )addr = *fpval;
				549
				550	/*
				551	* mark the low partition as being used now
				552	*
				553	* It is highly unlikely that this bit is not already set, but
				554	* let's do it for safety.
				555	*/
				556	regs->cr_ipsr \|= IA64_PSR_MFL;
				557	}
				558	}
				559
				560	/*
				561	* Those 2 inline functions generate the spilled versions of the constant floating point
				562	* registers which can be used with stfX
				563	*/
				564	static inline void
				565	float_spill_f0 (struct ia64_fpreg *final)
				566	{
				567	ia64_stf_spill(final, 0);
				568	}
				569
				570	static inline void
				571	float_spill_f1 (struct ia64_fpreg *final)
				572	{
				573	ia64_stf_spill(final, 1);
				574	}
				575
				576	static void
				577	getfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				578	{
				579	struct switch_stack sw = (struct switch_stack ) regs - 1;
				580	unsigned long addr;
				581
				582	/*
				583	* From EAS-2.5: FPDisableFault has higher priority than
				584	* Unaligned Fault. Thus, when we get here, we know the partition is
				585	* enabled.
				586	*
				587	* When regnum > 31, the register is still live and we need to force a save
				588	* to current->thread.fph to get access to it. See discussion in setfpreg()
				589	* for reasons and other ways of doing this.
				590	*/
				591	if (regnum >= IA64_FIRST_ROTATING_FR) {
				592	ia64_flush_fph(current);
				593	*fpval = current->thread.fph[fph_index(regs, regnum)];
				594	} else {
				595	/*
				596	* f0 = 0.0, f1= 1.0. Those registers are constant and are thus
				597	* not saved, we must generate their spilled form on the fly
				598	*/
				599	switch(regnum) {
				600	case 0:
				601	float_spill_f0(fpval);
				602	break;
				603	case 1:
				604	float_spill_f1(fpval);
				605	break;
				606	default:
				607	/*
				608	* pt_regs or switch_stack ?
				609	*/
				610	addr = FR_IN_SW(regnum) ? (unsigned long)sw
				611	: (unsigned long)regs;
				612
				613	DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
				614	FR_IN_SW(regnum), addr, FR_OFFS(regnum));
				615
				616	addr += FR_OFFS(regnum);
				617	fpval = (struct ia64_fpreg *)addr;
				618	}
				619	}
				620	}
				621
				622
				623	static void
				624	getreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				625	{
				626	struct switch_stack sw = (struct switch_stack ) regs - 1;
				627	unsigned long addr, *unat;
				628
				629	if (regnum >= IA64_FIRST_STACKED_GR) {
				630	get_rse_reg(regs, regnum, val, nat);
				631	return;
				632	}
				633
				634	/*
				635	* take care of r0 (read-only always evaluate to 0)
				636	*/
				637	if (regnum == 0) {
				638	*val = 0;
				639	if (nat)
				640	*nat = 0;
				641	return;
				642	}
				643
				644	/*
				645	* Now look at registers in [0-31] range and init correct UNAT
				646	*/
				647	if (GR_IN_SW(regnum)) {
				648	addr = (unsigned long)sw;
				649	unat = &sw->ar_unat;
				650	} else {
				651	addr = (unsigned long)regs;
				652	unat = &sw->caller_unat;
				653	}
				654
				655	DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
				656
				657	addr += GR_OFFS(regnum);
				658
				659	val = (unsigned long *)addr;
				660
				661	/*
				662	* do it only when requested
				663	*/
				664	if (nat)
				665	nat = (unat >> (addr >> 3 & 0x3f)) & 0x1UL;
				666	}
				667
				668	static void
				669	emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
				670	{
				671	/*
				672	* IMPORTANT:
				673	* Given the way we handle unaligned speculative loads, we should
				674	* not get to this point in the code but we keep this sanity check,
				675	* just in case.
				676	*/
				677	if (ld.x6_op == 1 \|\| ld.x6_op == 3) {
				678	printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
				679	die_if_kernel("unaligned reference on speculative load with register update\n",
				680	regs, 30);
				681	}
				682
				683
				684	/*
				685	* at this point, we know that the base register to update is valid i.e.,
				686	* it's not r0
				687	*/
				688	if (type == UPD_IMMEDIATE) {
				689	unsigned long imm;
				690
				691	/*
				692	* Load +Imm: ldXZ r1=[r3],imm(9)
				693	*
				694	*
				695	* form imm9: [13:19] contain the first 7 bits
				696	*/
				697	imm = ld.x << 7 \| ld.imm;
				698
				699	/*
				700	* sign extend (1+8bits) if m set
				701	*/
				702	if (ld.m) imm \|= SIGN_EXT9;
				703
				704	/*
				705	* ifa == r3 and we know that the NaT bit on r3 was clear so
				706	* we can directly use ifa.
				707	*/
				708	ifa += imm;
				709
				710	setreg(ld.r3, ifa, 0, regs);
				711
				712	DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
				713
				714	} else if (ld.m) {
				715	unsigned long r2;
				716	int nat_r2;
				717
				718	/*
				719	* Load +Reg Opcode: ldXZ r1=[r3],r2
				720	*
				721	* Note: that we update r3 even in the case of ldfX.a
				722	* (where the load does not happen)
				723	*
				724	* The way the load algorithm works, we know that r3 does not
				725	* have its NaT bit set (would have gotten NaT consumption
				726	* before getting the unaligned fault). So we can use ifa
				727	* which equals r3 at this point.
				728	*
				729	* IMPORTANT:
				730	* The above statement holds ONLY because we know that we
				731	* never reach this code when trying to do a ldX.s.
				732	* If we ever make it to here on an ldfX.s then
				733	*/
				734	getreg(ld.imm, &r2, &nat_r2, regs);
				735
				736	ifa += r2;
				737
				738	/*
				739	* propagate Nat r2 -> r3
				740	*/
				741	setreg(ld.r3, ifa, nat_r2, regs);
				742
				743	DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
				744	}
				745	}
				746
				747
				748	static int
				749	emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				750	{
				751	unsigned int len = 1 << ld.x6_sz;
				752	unsigned long val = 0;
				753
				754	/*
				755	* r0, as target, doesn't need to be checked because Illegal Instruction
				756	* faults have higher priority than unaligned faults.
				757	*
				758	* r0 cannot be found as the base as it would never generate an
				759	* unaligned reference.
				760	*/
				761
				762	/*
				763	* ldX.a we will emulate load and also invalidate the ALAT entry.
				764	* See comment below for explanation on how we handle ldX.a
				765	*/
				766
				767	if (len != 2 && len != 4 && len != 8) {
				768	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				769	return -1;
				770	}
				771	/* this assumes little-endian byte-order: */
				772	if (copy_from_user(&val, (void __user *) ifa, len))
				773	return -1;
				774	setreg(ld.r1, val, 0, regs);
				775
				776	/*
				777	* check for updates on any kind of loads
				778	*/
				779	if (ld.op == 0x5 \|\| ld.m)
				780	emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				781
				782	/*
				783	* handling of various loads (based on EAS2.4):
				784	*
				785	* ldX.acq (ordered load):
				786	* - acquire semantics would have been used, so force fence instead.
				787	*
				788	* ldX.c.clr (check load and clear):
				789	* - if we get to this handler, it's because the entry was not in the ALAT.
				790	* Therefore the operation reverts to a normal load
				791	*
				792	* ldX.c.nc (check load no clear):
				793	* - same as previous one
				794	*
				795	* ldX.c.clr.acq (ordered check load and clear):
				796	* - same as above for c.clr part. The load needs to have acquire semantics. So
				797	* we use the fence semantics which is stronger and thus ensures correctness.
				798	*
				799	* ldX.a (advanced load):
				800	* - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
				801	* address doesn't match requested size alignment. This means that we would
				802	* possibly need more than one load to get the result.
				803	*
				804	* The load part can be handled just like a normal load, however the difficult
				805	* part is to get the right thing into the ALAT. The critical piece of information
				806	* in the base address of the load & size. To do that, a ld.a must be executed,
				807	* clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
				808	* if we use the same target register, we will be okay for the check.a instruction.
				809	* If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
				810	* which would overlap within [r3,r3+X] (the size of the load was store in the
				811	* ALAT). If such an entry is found the entry is invalidated. But this is not good
				812	* enough, take the following example:
				813	* r3=3
				814	* ld4.a r1=[r3]
				815	*
				816	* Could be emulated by doing:
				817	* ld1.a r1=[r3],1
				818	* store to temporary;
				819	* ld1.a r1=[r3],1
				820	* store & shift to temporary;
				821	* ld1.a r1=[r3],1
				822	* store & shift to temporary;
				823	* ld1.a r1=[r3]
				824	* store & shift to temporary;
				825	* r1=temporary
				826	*
				827	* So in this case, you would get the right value is r1 but the wrong info in
				828	* the ALAT. Notice that you could do it in reverse to finish with address 3
				829	* but you would still get the size wrong. To get the size right, one needs to
				830	* execute exactly the same kind of load. You could do it from a aligned
				831	* temporary location, but you would get the address wrong.
				832	*
				833	* So no matter what, it is not possible to emulate an advanced load
				834	* correctly. But is that really critical ?
				835	*
				836	* We will always convert ld.a into a normal load with ALAT invalidated. This
				837	* will enable compiler to do optimization where certain code path after ld.a
				838	* is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
				839	*
				840	* If there is a store after the advanced load, one must either do a ld.c.* or
				841	* chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
				842	* entry found in ALAT), and that's perfectly ok because:
				843	*
				844	* - ld.c.*, if the entry is not present a normal load is executed
				845	* - chk.a.*, if the entry is not present, execution jumps to recovery code
				846	*
				847	* In either case, the load can be potentially retried in another form.
				848	*
				849	* ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
				850	* up a stale entry later). The register base update MUST also be performed.
				851	*/
				852
				853	/*
				854	* when the load has the .acq completer then
				855	* use ordering fence.
				856	*/
				857	if (ld.x6_op == 0x5 \|\| ld.x6_op == 0xa)
				858	mb();
				859
				860	/*
				861	* invalidate ALAT entry in case of advanced load
				862	*/
				863	if (ld.x6_op == 0x2)
				864	invala_gr(ld.r1);
				865
				866	return 0;
				867	}
				868
				869	static int
				870	emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				871	{
				872	unsigned long r2;
				873	unsigned int len = 1 << ld.x6_sz;
				874
				875	/*
				876	* if we get to this handler, Nat bits on both r3 and r2 have already
				877	* been checked. so we don't need to do it
				878	*
				879	* extract the value to be stored
				880	*/
				881	getreg(ld.imm, &r2, NULL, regs);
				882
				883	/*
				884	* we rely on the macros in unaligned.h for now i.e.,
				885	* we let the compiler figure out how to read memory gracefully.
				886	*
				887	* We need this switch/case because the way the inline function
				888	* works. The code is optimized by the compiler and looks like
				889	* a single switch/case.
				890	*/
				891	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
				892
				893	if (len != 2 && len != 4 && len != 8) {
				894	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				895	return -1;
				896	}
				897
				898	/* this assumes little-endian byte-order: */
				899	if (copy_to_user((void __user *) ifa, &r2, len))
				900	return -1;
				901
				902	/*
				903	* stX [r3]=r2,imm(9)
				904	*
				905	* NOTE:
				906	* ld.r3 can never be r0, because r0 would not generate an
				907	* unaligned access.
				908	*/
				909	if (ld.op == 0x5) {
				910	unsigned long imm;
				911
				912	/*
				913	* form imm9: [12:6] contain first 7bits
				914	*/
				915	imm = ld.x << 7 \| ld.r1;
				916	/*
				917	* sign extend (8bits) if m set
				918	*/
				919	if (ld.m) imm \|= SIGN_EXT9;
				920	/*
				921	* ifa == r3 (NaT is necessarily cleared)
				922	*/
				923	ifa += imm;
				924
				925	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				926
				927	setreg(ld.r3, ifa, 0, regs);
				928	}
				929	/*
				930	* we don't have alat_invalidate_multiple() so we need
				931	* to do the complete flush :-<<
				932	*/
				933	ia64_invala();
				934
				935	/*
				936	* stX.rel: use fence instead of release
				937	*/
				938	if (ld.x6_op == 0xd)
				939	mb();
				940
				941	return 0;
				942	}
				943
				944	/*
				945	* floating point operations sizes in bytes
				946	*/
				947	static const unsigned char float_fsz[4]={
				948	10, /* extended precision (e) */
				949	8, /* integer (8) */
				950	4, /* single precision (s) */
				951	8 /* double precision (d) */
				952	};
				953
				954	static inline void
				955	mem2float_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				956	{
				957	ia64_ldfe(6, init);
				958	ia64_stop();
				959	ia64_stf_spill(final, 6);
				960	}
				961
				962	static inline void
				963	mem2float_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				964	{
				965	ia64_ldf8(6, init);
				966	ia64_stop();
				967	ia64_stf_spill(final, 6);
				968	}
				969
				970	static inline void
				971	mem2float_single (struct ia64_fpreg init, struct ia64_fpreg final)
				972	{
				973	ia64_ldfs(6, init);
				974	ia64_stop();
				975	ia64_stf_spill(final, 6);
				976	}
				977
				978	static inline void
				979	mem2float_double (struct ia64_fpreg init, struct ia64_fpreg final)
				980	{
				981	ia64_ldfd(6, init);
				982	ia64_stop();
				983	ia64_stf_spill(final, 6);
				984	}
				985
				986	static inline void
				987	float2mem_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				988	{
				989	ia64_ldf_fill(6, init);
				990	ia64_stop();
				991	ia64_stfe(final, 6);
				992	}
				993
				994	static inline void
				995	float2mem_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				996	{
				997	ia64_ldf_fill(6, init);
				998	ia64_stop();
				999	ia64_stf8(final, 6);
				1000	}
				1001
				1002	static inline void
				1003	float2mem_single (struct ia64_fpreg init, struct ia64_fpreg final)
				1004	{
				1005	ia64_ldf_fill(6, init);
				1006	ia64_stop();
				1007	ia64_stfs(final, 6);
				1008	}
				1009
				1010	static inline void
				1011	float2mem_double (struct ia64_fpreg init, struct ia64_fpreg final)
				1012	{
				1013	ia64_ldf_fill(6, init);
				1014	ia64_stop();
				1015	ia64_stfd(final, 6);
				1016	}
				1017
				1018	static int
				1019	emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1020	{
				1021	struct ia64_fpreg fpr_init[2];
				1022	struct ia64_fpreg fpr_final[2];
				1023	unsigned long len = float_fsz[ld.x6_sz];
				1024
				1025	/*
				1026	* fr0 & fr1 don't need to be checked because Illegal Instruction faults have
				1027	* higher priority than unaligned faults.
				1028	*
				1029	* r0 cannot be found as the base as it would never generate an unaligned
				1030	* reference.
				1031	*/
				1032
				1033	/*
				1034	* make sure we get clean buffers
				1035	*/
				1036	memset(&fpr_init, 0, sizeof(fpr_init));
				1037	memset(&fpr_final, 0, sizeof(fpr_final));
				1038
				1039	/*
				1040	* ldfpX.a: we don't try to emulate anything but we must
				1041	* invalidate the ALAT entry and execute updates, if any.
				1042	*/
				1043	if (ld.x6_op != 0x2) {
				1044	/*
				1045	* This assumes little-endian byte-order. Note that there is no "ldfpe"
				1046	* instruction:
				1047	*/
				1048	if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
				1049	\|\| copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
				1050	return -1;
				1051
				1052	DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
				1053	DDUMP("frp_init =", &fpr_init, 2*len);
				1054	/*
				1055	* XXX fixme
				1056	* Could optimize inlines by using ldfpX & 2 spills
				1057	*/
				1058	switch( ld.x6_sz ) {
				1059	case 0:
				1060	mem2float_extended(&fpr_init[0], &fpr_final[0]);
				1061	mem2float_extended(&fpr_init[1], &fpr_final[1]);
				1062	break;
				1063	case 1:
				1064	mem2float_integer(&fpr_init[0], &fpr_final[0]);
				1065	mem2float_integer(&fpr_init[1], &fpr_final[1]);
				1066	break;
				1067	case 2:
				1068	mem2float_single(&fpr_init[0], &fpr_final[0]);
				1069	mem2float_single(&fpr_init[1], &fpr_final[1]);
				1070	break;
				1071	case 3:
				1072	mem2float_double(&fpr_init[0], &fpr_final[0]);
				1073	mem2float_double(&fpr_init[1], &fpr_final[1]);
				1074	break;
				1075	}
				1076	DDUMP("fpr_final =", &fpr_final, 2*len);
				1077	/*
				1078	* XXX fixme
				1079	*
				1080	* A possible optimization would be to drop fpr_final and directly
				1081	* use the storage from the saved context i.e., the actual final
				1082	* destination (pt_regs, switch_stack or thread structure).
				1083	*/
				1084	setfpreg(ld.r1, &fpr_final[0], regs);
				1085	setfpreg(ld.imm, &fpr_final[1], regs);
				1086	}
				1087
				1088	/*
				1089	* Check for updates: only immediate updates are available for this
				1090	* instruction.
				1091	*/
				1092	if (ld.m) {
				1093	/*
				1094	* the immediate is implicit given the ldsz of the operation:
				1095	* single: 8 (2x4) and for all others it's 16 (2x8)
				1096	*/
				1097	ifa += len<<1;
				1098
				1099	/*
				1100	* IMPORTANT:
				1101	* the fact that we force the NaT of r3 to zero is ONLY valid
				1102	* as long as we don't come here with a ldfpX.s.
				1103	* For this reason we keep this sanity check
				1104	*/
				1105	if (ld.x6_op == 1 \|\| ld.x6_op == 3)
				1106	printk(KERN_ERR "%s: register update on speculative load pair, error\n",
				1107	__FUNCTION__);
				1108
				1109	setreg(ld.r3, ifa, 0, regs);
				1110	}
				1111
				1112	/*
				1113	* Invalidate ALAT entries, if any, for both registers.
				1114	*/
				1115	if (ld.x6_op == 0x2) {
				1116	invala_fr(ld.r1);
				1117	invala_fr(ld.imm);
				1118	}
				1119	return 0;
				1120	}
				1121
				1122
				1123	static int
				1124	emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1125	{
				1126	struct ia64_fpreg fpr_init;
				1127	struct ia64_fpreg fpr_final;
				1128	unsigned long len = float_fsz[ld.x6_sz];
				1129
				1130	/*
				1131	* fr0 & fr1 don't need to be checked because Illegal Instruction
				1132	* faults have higher priority than unaligned faults.
				1133	*
				1134	* r0 cannot be found as the base as it would never generate an
				1135	* unaligned reference.
				1136	*/
				1137
				1138	/*
				1139	* make sure we get clean buffers
				1140	*/
				1141	memset(&fpr_init,0, sizeof(fpr_init));
				1142	memset(&fpr_final,0, sizeof(fpr_final));
				1143
				1144	/*
				1145	* ldfX.a we don't try to emulate anything but we must
				1146	* invalidate the ALAT entry.
				1147	* See comments in ldX for descriptions on how the various loads are handled.
				1148	*/
				1149	if (ld.x6_op != 0x2) {
				1150	if (copy_from_user(&fpr_init, (void __user *) ifa, len))
				1151	return -1;
				1152
				1153	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1154	DDUMP("fpr_init =", &fpr_init, len);
				1155	/*
				1156	* we only do something for x6_op={0,8,9}
				1157	*/
				1158	switch( ld.x6_sz ) {
				1159	case 0:
				1160	mem2float_extended(&fpr_init, &fpr_final);
				1161	break;
				1162	case 1:
				1163	mem2float_integer(&fpr_init, &fpr_final);
				1164	break;
				1165	case 2:
				1166	mem2float_single(&fpr_init, &fpr_final);
				1167	break;
				1168	case 3:
				1169	mem2float_double(&fpr_init, &fpr_final);
				1170	break;
				1171	}
				1172	DDUMP("fpr_final =", &fpr_final, len);
				1173	/*
				1174	* XXX fixme
				1175	*
				1176	* A possible optimization would be to drop fpr_final and directly
				1177	* use the storage from the saved context i.e., the actual final
				1178	* destination (pt_regs, switch_stack or thread structure).
				1179	*/
				1180	setfpreg(ld.r1, &fpr_final, regs);
				1181	}
				1182
				1183	/*
				1184	* check for updates on any loads
				1185	*/
				1186	if (ld.op == 0x7 \|\| ld.m)
				1187	emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				1188
				1189	/*
				1190	* invalidate ALAT entry in case of advanced floating point loads
				1191	*/
				1192	if (ld.x6_op == 0x2)
				1193	invala_fr(ld.r1);
				1194
				1195	return 0;
				1196	}
				1197
				1198
				1199	static int
				1200	emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1201	{
				1202	struct ia64_fpreg fpr_init;
				1203	struct ia64_fpreg fpr_final;
				1204	unsigned long len = float_fsz[ld.x6_sz];
				1205
				1206	/*
				1207	* make sure we get clean buffers
				1208	*/
				1209	memset(&fpr_init,0, sizeof(fpr_init));
				1210	memset(&fpr_final,0, sizeof(fpr_final));
				1211
				1212	/*
				1213	* if we get to this handler, Nat bits on both r3 and r2 have already
				1214	* been checked. so we don't need to do it
				1215	*
				1216	* extract the value to be stored
				1217	*/
				1218	getfpreg(ld.imm, &fpr_init, regs);
				1219	/*
				1220	* during this step, we extract the spilled registers from the saved
				1221	* context i.e., we refill. Then we store (no spill) to temporary
				1222	* aligned location
				1223	*/
				1224	switch( ld.x6_sz ) {
				1225	case 0:
				1226	float2mem_extended(&fpr_init, &fpr_final);
				1227	break;
				1228	case 1:
				1229	float2mem_integer(&fpr_init, &fpr_final);
				1230	break;
				1231	case 2:
				1232	float2mem_single(&fpr_init, &fpr_final);
				1233	break;
				1234	case 3:
				1235	float2mem_double(&fpr_init, &fpr_final);
				1236	break;
				1237	}
				1238	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1239	DDUMP("fpr_init =", &fpr_init, len);
				1240	DDUMP("fpr_final =", &fpr_final, len);
				1241
				1242	if (copy_to_user((void __user *) ifa, &fpr_final, len))
				1243	return -1;
				1244
				1245	/*
				1246	* stfX [r3]=r2,imm(9)
				1247	*
				1248	* NOTE:
				1249	* ld.r3 can never be r0, because r0 would not generate an
				1250	* unaligned access.
				1251	*/
				1252	if (ld.op == 0x7) {
				1253	unsigned long imm;
				1254
				1255	/*
				1256	* form imm9: [12:6] contain first 7bits
				1257	*/
				1258	imm = ld.x << 7 \| ld.r1;
				1259	/*
				1260	* sign extend (8bits) if m set
				1261	*/
				1262	if (ld.m)
				1263	imm \|= SIGN_EXT9;
				1264	/*
				1265	* ifa == r3 (NaT is necessarily cleared)
				1266	*/
				1267	ifa += imm;
				1268
				1269	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				1270
				1271	setreg(ld.r3, ifa, 0, regs);
				1272	}
				1273	/*
				1274	* we don't have alat_invalidate_multiple() so we need
				1275	* to do the complete flush :-<<
				1276	*/
				1277	ia64_invala();
				1278
				1279	return 0;
				1280	}
				1281
				1282	/*
				1283	* Make sure we log the unaligned access, so that user/sysadmin can notice it and
				1284	* eventually fix the program. However, we don't want to do that for every access so we
				1285	* pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
				1286	* either...
				1287	*/
				1288	static int
				1289	within_logging_rate_limit (void)
				1290	{
				1291	static unsigned long count, last_time;
				1292
				1293	if (jiffies - last_time > 5*HZ)
				1294	count = 0;
Jack Steiner	79c83bd	2006-01-24 16:32:11 -0600	[diff] [blame]	1295	if (count < 5) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1296	last_time = jiffies;
Jack Steiner	79c83bd	2006-01-24 16:32:11 -0600	[diff] [blame]	1297	count++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1298	return 1;
				1299	}
				1300	return 0;
				1301
				1302	}
				1303
				1304	void
				1305	ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
				1306	{
				1307	struct ia64_psr *ipsr = ia64_psr(regs);
				1308	mm_segment_t old_fs = get_fs();
				1309	unsigned long bundle[2];
				1310	unsigned long opcode;
				1311	struct siginfo si;
				1312	const struct exception_table_entry *eh = NULL;
				1313	union {
				1314	unsigned long l;
				1315	load_store_t insn;
				1316	} u;
				1317	int ret = -1;
				1318
				1319	if (ia64_psr(regs)->be) {
				1320	/* we don't support big-endian accesses */
				1321	die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
				1322	goto force_sigbus;
				1323	}
				1324
				1325	/*
				1326	* Treat kernel accesses for which there is an exception handler entry the same as
				1327	* user-level unaligned accesses. Otherwise, a clever program could trick this
				1328	* handler into reading an arbitrary kernel addresses...
				1329	*/
				1330	if (!user_mode(regs))
				1331	eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
				1332	if (user_mode(regs) \|\| eh) {
				1333	if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
				1334	goto force_sigbus;
				1335
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame^]	1336	if (!no_unaligned_warning &&
				1337	!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
				1338	within_logging_rate_limit())
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1339	{
				1340	char buf[200]; /* comm[] is at most 16 bytes... */
				1341	size_t len;
				1342
				1343	len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
				1344	"ip=0x%016lx\n\r", current->comm, current->pid,
				1345	ifa, regs->cr_iip + ipsr->ri);
				1346	/*
				1347	* Don't call tty_write_message() if we're in the kernel; we might
				1348	* be holding locks...
				1349	*/
				1350	if (user_mode(regs))
				1351	tty_write_message(current->signal->tty, buf);
				1352	buf[len-1] = '\0'; /* drop '\r' */
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame^]	1353	/* watch for command names containing %s */
				1354	printk(KERN_WARNING "%s", buf);
				1355	} else {
				1356	if (no_unaligned_warning && !noprint_warning) {
				1357	noprint_warning = 1;
				1358	printk(KERN_WARNING "%s(%d) encountered an "
				1359	"unaligned exception which required\n"
				1360	"kernel assistance, which degrades "
				1361	"the performance of the application.\n"
				1362	"Unaligned exception warnings have "
				1363	"been disabled by the system "
				1364	"administrator\n"
				1365	"echo 0 > /proc/sys/kernel/ignore-"
				1366	"unaligned-usertrap to re-enable\n",
				1367	current->comm, current->pid);
				1368	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1369	}
				1370	} else {
				1371	if (within_logging_rate_limit())
				1372	printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
				1373	ifa, regs->cr_iip + ipsr->ri);
				1374	set_fs(KERNEL_DS);
				1375	}
				1376
				1377	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
				1378	regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
				1379
				1380	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
				1381	goto failure;
				1382
				1383	/*
				1384	* extract the instruction from the bundle given the slot number
				1385	*/
				1386	switch (ipsr->ri) {
				1387	case 0: u.l = (bundle[0] >> 5); break;
				1388	case 1: u.l = (bundle[0] >> 46) \| (bundle[1] << 18); break;
				1389	case 2: u.l = (bundle[1] >> 23); break;
				1390	}
				1391	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
				1392
				1393	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
				1394	"ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
				1395	u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
				1396
				1397	/*
				1398	* IMPORTANT:
				1399	* Notice that the switch statement DOES not cover all possible instructions
				1400	* that DO generate unaligned references. This is made on purpose because for some
				1401	* instructions it DOES NOT make sense to try and emulate the access. Sometimes it
				1402	* is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
				1403	* the program will get a signal and die:
				1404	*
				1405	* load/store:
				1406	* - ldX.spill
				1407	* - stX.spill
				1408	* Reason: RNATs are based on addresses
				1409	* - ld16
				1410	* - st16
				1411	* Reason: ld16 and st16 are supposed to occur in a single
				1412	* memory op
				1413	*
				1414	* synchronization:
				1415	* - cmpxchg
				1416	* - fetchadd
				1417	* - xchg
				1418	* Reason: ATOMIC operations cannot be emulated properly using multiple
				1419	* instructions.
				1420	*
				1421	* speculative loads:
				1422	* - ldX.sZ
				1423	* Reason: side effects, code must be ready to deal with failure so simpler
				1424	* to let the load fail.
				1425	* ---------------------------------------------------------------------------------
				1426	* XXX fixme
				1427	*
				1428	* I would like to get rid of this switch case and do something
				1429	* more elegant.
				1430	*/
				1431	switch (opcode) {
				1432	case LDS_OP:
				1433	case LDSA_OP:
				1434	if (u.insn.x)
				1435	/* oops, really a semaphore op (cmpxchg, etc) */
				1436	goto failure;
				1437	/* no break */
				1438	case LDS_IMM_OP:
				1439	case LDSA_IMM_OP:
				1440	case LDFS_OP:
				1441	case LDFSA_OP:
				1442	case LDFS_IMM_OP:
				1443	/*
				1444	* The instruction will be retried with deferred exceptions turned on, and
				1445	* we should get Nat bit installed
				1446	*
				1447	* IMPORTANT: When PSR_ED is set, the register & immediate update forms
				1448	* are actually executed even though the operation failed. So we don't
				1449	* need to take care of this.
				1450	*/
				1451	DPRINT("forcing PSR_ED\n");
				1452	regs->cr_ipsr \|= IA64_PSR_ED;
				1453	goto done;
				1454
				1455	case LD_OP:
				1456	case LDA_OP:
				1457	case LDBIAS_OP:
				1458	case LDACQ_OP:
				1459	case LDCCLR_OP:
				1460	case LDCNC_OP:
				1461	case LDCCLRACQ_OP:
				1462	if (u.insn.x)
				1463	/* oops, really a semaphore op (cmpxchg, etc) */
				1464	goto failure;
				1465	/* no break */
				1466	case LD_IMM_OP:
				1467	case LDA_IMM_OP:
				1468	case LDBIAS_IMM_OP:
				1469	case LDACQ_IMM_OP:
				1470	case LDCCLR_IMM_OP:
				1471	case LDCNC_IMM_OP:
				1472	case LDCCLRACQ_IMM_OP:
				1473	ret = emulate_load_int(ifa, u.insn, regs);
				1474	break;
				1475
				1476	case ST_OP:
				1477	case STREL_OP:
				1478	if (u.insn.x)
				1479	/* oops, really a semaphore op (cmpxchg, etc) */
				1480	goto failure;
				1481	/* no break */
				1482	case ST_IMM_OP:
				1483	case STREL_IMM_OP:
				1484	ret = emulate_store_int(ifa, u.insn, regs);
				1485	break;
				1486
				1487	case LDF_OP:
				1488	case LDFA_OP:
				1489	case LDFCCLR_OP:
				1490	case LDFCNC_OP:
				1491	case LDF_IMM_OP:
				1492	case LDFA_IMM_OP:
				1493	case LDFCCLR_IMM_OP:
				1494	case LDFCNC_IMM_OP:
				1495	if (u.insn.x)
				1496	ret = emulate_load_floatpair(ifa, u.insn, regs);
				1497	else
				1498	ret = emulate_load_float(ifa, u.insn, regs);
				1499	break;
				1500
				1501	case STF_OP:
				1502	case STF_IMM_OP:
				1503	ret = emulate_store_float(ifa, u.insn, regs);
				1504	break;
				1505
				1506	default:
				1507	goto failure;
				1508	}
				1509	DPRINT("ret=%d\n", ret);
				1510	if (ret)
				1511	goto failure;
				1512
				1513	if (ipsr->ri == 2)
				1514	/*
				1515	* given today's architecture this case is not likely to happen because a
				1516	* memory access instruction (M) can never be in the last slot of a
				1517	* bundle. But let's keep it for now.
				1518	*/
				1519	regs->cr_iip += 16;
				1520	ipsr->ri = (ipsr->ri + 1) & 0x3;
				1521
				1522	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
				1523	done:
				1524	set_fs(old_fs); /* restore original address limit */
				1525	return;
				1526
				1527	failure:
				1528	/* something went wrong... */
				1529	if (!user_mode(regs)) {
				1530	if (eh) {
				1531	ia64_handle_exception(regs, eh);
				1532	goto done;
				1533	}
				1534	die_if_kernel("error during unaligned kernel access\n", regs, ret);
				1535	/* NOT_REACHED */
				1536	}
				1537	force_sigbus:
				1538	si.si_signo = SIGBUS;
				1539	si.si_errno = 0;
				1540	si.si_code = BUS_ADRALN;
				1541	si.si_addr = (void __user *) ifa;
				1542	si.si_flags = 0;
				1543	si.si_isr = 0;
				1544	si.si_imm = 0;
				1545	force_sig_info(SIGBUS, &si, current);
				1546	goto done;
				1547	}