Blame - arch/ia64/kernel/unaligned.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: 7f0d31656b4d9d2069c904ae474ef46a29e6f966 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Architecture-specific unaligned trap handling.
				3	*
				4	* Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
				5	* Stephane Eranian <eranian@hpl.hp.com>
				6	* David Mosberger-Tang <davidm@hpl.hp.com>
				7	*
				8	* 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
				9	* get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
				10	* stacked register returns an undefined value; it does NOT trigger a
				11	* "rsvd register fault").
				12	* 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
				13	* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
				14	* 2001/01/17 Add support emulation of unaligned kernel accesses.
				15	*/
S.Caglar Onur	5cf1f7c	2008-03-28 14:27:05 -0700	[diff] [blame]	16	#include <linux/jiffies.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	#include <linux/kernel.h>
				18	#include <linux/sched.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	19	#include <linux/tty.h>
Akinobu Mita	7683a3f	2010-02-28 19:58:14 +0900	[diff] [blame]	20	#include <linux/ratelimit.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21
				22	#include <asm/intrinsics.h>
				23	#include <asm/processor.h>
				24	#include <asm/rse.h>
				25	#include <asm/uaccess.h>
				26	#include <asm/unaligned.h>
				27
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	28	extern int die_if_kernel(char str, struct pt_regs regs, long err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	29
				30	#undef DEBUG_UNALIGNED_TRAP
				31
				32	#ifdef DEBUG_UNALIGNED_TRAP
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	33	# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34	# define DDUMP(str,vp,len) dump(str, vp, len)
				35
				36	static void
				37	dump (const char str, void vp, size_t len)
				38	{
				39	unsigned char *cp = vp;
				40	int i;
				41
				42	printk("%s", str);
				43	for (i = 0; i < len; ++i)
				44	printk (" %02x", *cp++);
				45	printk("\n");
				46	}
				47	#else
				48	# define DPRINT(a...)
				49	# define DDUMP(str,vp,len)
				50	#endif
				51
				52	#define IA64_FIRST_STACKED_GR 32
				53	#define IA64_FIRST_ROTATING_FR 32
				54	#define SIGN_EXT9 0xffffffffffffff00ul
				55
				56	/*
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	57	* sysctl settable hook which tells the kernel whether to honor the
				58	* IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
				59	* to allow the super user to enable/disable this for security reasons
				60	* (i.e. don't allow attacker to fill up logs with unaligned accesses).
				61	*/
				62	int no_unaligned_warning;
Doug Chapman	88fc241	2009-01-15 10:38:56 -0800	[diff] [blame]	63	int unaligned_dump_stack;
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	64
				65	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	66	* For M-unit:
				67	*
				68	* opcode \| m \| x6 \|
				69	* --------\|------\|---------\|
				70	* [40-37] \| [36] \| [35:30] \|
				71	* --------\|------\|---------\|
				72	* 4 \| 1 \| 6 \| = 11 bits
				73	* --------------------------
				74	* However bits [31:30] are not directly useful to distinguish between
				75	* load/store so we can use [35:32] instead, which gives the following
				76	* mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
				77	* checking the m-bit until later in the load/store emulation.
				78	*/
				79	#define IA64_OPCODE_MASK 0x1ef
				80	#define IA64_OPCODE_SHIFT 32
				81
				82	/*
				83	* Table C-28 Integer Load/Store
				84	*
				85	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				86	*
				87	* ld8.fill, st8.fill MUST be aligned because the RNATs are based on
				88	* the address (bits [8:3]), so we must failed.
				89	*/
				90	#define LD_OP 0x080
				91	#define LDS_OP 0x081
				92	#define LDA_OP 0x082
				93	#define LDSA_OP 0x083
				94	#define LDBIAS_OP 0x084
				95	#define LDACQ_OP 0x085
				96	/* 0x086, 0x087 are not relevant */
				97	#define LDCCLR_OP 0x088
				98	#define LDCNC_OP 0x089
				99	#define LDCCLRACQ_OP 0x08a
				100	#define ST_OP 0x08c
				101	#define STREL_OP 0x08d
				102	/* 0x08e,0x8f are not relevant */
				103
				104	/*
				105	* Table C-29 Integer Load +Reg
				106	*
				107	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				108	* a load/store of this form.
				109	*/
				110
				111	/*
				112	* Table C-30 Integer Load/Store +Imm
				113	*
				114	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				115	*
				116	* ld8.fill, st8.fill must be aligned because the Nat register are based on
				117	* the address, so we must fail and the program must be fixed.
				118	*/
				119	#define LD_IMM_OP 0x0a0
				120	#define LDS_IMM_OP 0x0a1
				121	#define LDA_IMM_OP 0x0a2
				122	#define LDSA_IMM_OP 0x0a3
				123	#define LDBIAS_IMM_OP 0x0a4
				124	#define LDACQ_IMM_OP 0x0a5
				125	/* 0x0a6, 0xa7 are not relevant */
				126	#define LDCCLR_IMM_OP 0x0a8
				127	#define LDCNC_IMM_OP 0x0a9
				128	#define LDCCLRACQ_IMM_OP 0x0aa
				129	#define ST_IMM_OP 0x0ac
				130	#define STREL_IMM_OP 0x0ad
				131	/* 0x0ae,0xaf are not relevant */
				132
				133	/*
				134	* Table C-32 Floating-point Load/Store
				135	*/
				136	#define LDF_OP 0x0c0
				137	#define LDFS_OP 0x0c1
				138	#define LDFA_OP 0x0c2
				139	#define LDFSA_OP 0x0c3
				140	/* 0x0c6 is irrelevant */
				141	#define LDFCCLR_OP 0x0c8
				142	#define LDFCNC_OP 0x0c9
				143	/* 0x0cb is irrelevant */
				144	#define STF_OP 0x0cc
				145
				146	/*
				147	* Table C-33 Floating-point Load +Reg
				148	*
				149	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				150	* a load/store of this form.
				151	*/
				152
				153	/*
				154	* Table C-34 Floating-point Load/Store +Imm
				155	*/
				156	#define LDF_IMM_OP 0x0e0
				157	#define LDFS_IMM_OP 0x0e1
				158	#define LDFA_IMM_OP 0x0e2
				159	#define LDFSA_IMM_OP 0x0e3
				160	/* 0x0e6 is irrelevant */
				161	#define LDFCCLR_IMM_OP 0x0e8
				162	#define LDFCNC_IMM_OP 0x0e9
				163	#define STF_IMM_OP 0x0ec
				164
				165	typedef struct {
				166	unsigned long qp:6; /* [0:5] */
				167	unsigned long r1:7; /* [6:12] */
				168	unsigned long imm:7; /* [13:19] */
				169	unsigned long r3:7; /* [20:26] */
				170	unsigned long x:1; /* [27:27] */
				171	unsigned long hint:2; /* [28:29] */
				172	unsigned long x6_sz:2; /* [30:31] */
				173	unsigned long x6_op:4; /* [32:35], x6 = x6_sz\|x6_op */
				174	unsigned long m:1; /* [36:36] */
				175	unsigned long op:4; /* [37:40] */
				176	unsigned long pad:23; /* [41:63] */
				177	} load_store_t;
				178
				179
				180	typedef enum {
				181	UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
				182	UPD_REG /* ldXZ r1=[r3],r2 */
				183	} update_t;
				184
				185	/*
				186	* We use tables to keep track of the offsets of registers in the saved state.
				187	* This way we save having big switch/case statements.
				188	*
				189	* We use bit 0 to indicate switch_stack or pt_regs.
				190	* The offset is simply shifted by 1 bit.
				191	* A 2-byte value should be enough to hold any kind of offset
				192	*
				193	* In case the calling convention changes (and thus pt_regs/switch_stack)
				194	* simply use RSW instead of RPT or vice-versa.
				195	*/
				196
				197	#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
				198	#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
				199
				200	#define RPT(x) (RPO(x) << 1)
				201	#define RSW(x) (1\| RSO(x)<<1)
				202
				203	#define GR_OFFS(x) (gr_info[x]>>1)
				204	#define GR_IN_SW(x) (gr_info[x] & 0x1)
				205
				206	#define FR_OFFS(x) (fr_info[x]>>1)
				207	#define FR_IN_SW(x) (fr_info[x] & 0x1)
				208
				209	static u16 gr_info[32]={
				210	0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
				211
				212	RPT(r1), RPT(r2), RPT(r3),
				213
				214	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
				215
				216	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
				217	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
				218
				219	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
				220	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
				221	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
				222	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
				223	};
				224
				225	static u16 fr_info[32]={
				226	0, /* constant : WE SHOULD NEVER GET THIS */
				227	0, /* constant : WE SHOULD NEVER GET THIS */
				228
				229	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
				230
				231	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
				232	RPT(f10), RPT(f11),
				233
				234	RSW(f12), RSW(f13), RSW(f14),
				235	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
				236	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
				237	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
				238	RSW(f30), RSW(f31)
				239	};
				240
				241	/* Invalidate ALAT entry for integer register REGNO. */
				242	static void
				243	invala_gr (int regno)
				244	{
				245	# define F(reg) case reg: ia64_invala_gr(reg); break
				246
				247	switch (regno) {
				248	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				249	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				250	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				251	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				252	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				253	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				254	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				255	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				256	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				257	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				258	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				259	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				260	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				261	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				262	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				263	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				264	}
				265	# undef F
				266	}
				267
				268	/* Invalidate ALAT entry for floating-point register REGNO. */
				269	static void
				270	invala_fr (int regno)
				271	{
				272	# define F(reg) case reg: ia64_invala_fr(reg); break
				273
				274	switch (regno) {
				275	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				276	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				277	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				278	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				279	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				280	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				281	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				282	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				283	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				284	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				285	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				286	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				287	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				288	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				289	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				290	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				291	}
				292	# undef F
				293	}
				294
				295	static inline unsigned long
				296	rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
				297	{
				298	reg += rrb;
				299	if (reg >= sor)
				300	reg -= sor;
				301	return reg;
				302	}
				303
				304	static void
				305	set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
				306	{
				307	struct switch_stack sw = (struct switch_stack ) regs - 1;
				308	unsigned long bsp, bspstore, addr, rnat_addr, *ubs_end;
				309	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				310	unsigned long rnats, nat_mask;
				311	unsigned long on_kbs;
				312	long sof = (regs->cr_ifs) & 0x7f;
				313	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				314	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				315	long ridx = r1 - 32;
				316
				317	if (ridx >= sof) {
				318	/* this should never happen, as the "rsvd register fault" has higher priority */
				319	DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
				320	return;
				321	}
				322
				323	if (ridx < sor)
				324	ridx = rotate_reg(sor, rrb_gr, ridx);
				325
				326	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				327	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				328
				329	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				330	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				331	if (addr >= kbs) {
				332	/* the register is on the kernel backing store: easy... */
				333	rnat_addr = ia64_rse_rnat_addr(addr);
				334	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				335	rnat_addr = &sw->ar_rnat;
				336	nat_mask = 1UL << ia64_rse_slot_num(addr);
				337
				338	*addr = val;
				339	if (nat)
				340	*rnat_addr \|= nat_mask;
				341	else
				342	*rnat_addr &= ~nat_mask;
				343	return;
				344	}
				345
				346	if (!user_stack(current, regs)) {
				347	DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
				348	return;
				349	}
				350
				351	bspstore = (unsigned long *)regs->ar_bspstore;
				352	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				353	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				354	addr = ia64_rse_skip_regs(bsp, ridx);
				355
				356	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				357
				358	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				359
				360	rnat_addr = ia64_rse_rnat_addr(addr);
				361
				362	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				363	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
				364	(void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
				365
				366	nat_mask = 1UL << ia64_rse_slot_num(addr);
				367	if (nat)
				368	rnats \|= nat_mask;
				369	else
				370	rnats &= ~nat_mask;
				371	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
				372
				373	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				374	}
				375
				376
				377	static void
				378	get_rse_reg (struct pt_regs regs, unsigned long r1, unsigned long val, int *nat)
				379	{
				380	struct switch_stack sw = (struct switch_stack ) regs - 1;
				381	unsigned long bsp, addr, rnat_addr, ubs_end, *bspstore;
				382	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				383	unsigned long rnats, nat_mask;
				384	unsigned long on_kbs;
				385	long sof = (regs->cr_ifs) & 0x7f;
				386	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				387	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				388	long ridx = r1 - 32;
				389
				390	if (ridx >= sof) {
				391	/* read of out-of-frame register returns an undefined value; 0 in our case. */
				392	DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
				393	goto fail;
				394	}
				395
				396	if (ridx < sor)
				397	ridx = rotate_reg(sor, rrb_gr, ridx);
				398
				399	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				400	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				401
				402	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				403	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				404	if (addr >= kbs) {
				405	/* the register is on the kernel backing store: easy... */
				406	val = addr;
				407	if (nat) {
				408	rnat_addr = ia64_rse_rnat_addr(addr);
				409	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				410	rnat_addr = &sw->ar_rnat;
				411	nat_mask = 1UL << ia64_rse_slot_num(addr);
				412	nat = (rnat_addr & nat_mask) != 0;
				413	}
				414	return;
				415	}
				416
				417	if (!user_stack(current, regs)) {
				418	DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
				419	goto fail;
				420	}
				421
				422	bspstore = (unsigned long *)regs->ar_bspstore;
				423	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				424	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				425	addr = ia64_rse_skip_regs(bsp, ridx);
				426
				427	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				428
				429	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				430
				431	if (nat) {
				432	rnat_addr = ia64_rse_rnat_addr(addr);
				433	nat_mask = 1UL << ia64_rse_slot_num(addr);
				434
				435	DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				436
				437	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				438	*nat = (rnats & nat_mask) != 0;
				439	}
				440	return;
				441
				442	fail:
				443	*val = 0;
				444	if (nat)
				445	*nat = 0;
				446	return;
				447	}
				448
				449
				450	static void
				451	setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				452	{
				453	struct switch_stack sw = (struct switch_stack ) regs - 1;
				454	unsigned long addr;
				455	unsigned long bitmask;
				456	unsigned long *unat;
				457
				458	/*
				459	* First takes care of stacked registers
				460	*/
				461	if (regnum >= IA64_FIRST_STACKED_GR) {
				462	set_rse_reg(regs, regnum, val, nat);
				463	return;
				464	}
				465
				466	/*
				467	* Using r0 as a target raises a General Exception fault which has higher priority
				468	* than the Unaligned Reference fault.
				469	*/
				470
				471	/*
				472	* Now look at registers in [0-31] range and init correct UNAT
				473	*/
				474	if (GR_IN_SW(regnum)) {
				475	addr = (unsigned long)sw;
				476	unat = &sw->ar_unat;
				477	} else {
				478	addr = (unsigned long)regs;
				479	unat = &sw->caller_unat;
				480	}
				481	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
				482	addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
				483	/*
				484	* add offset from base of struct
				485	* and do it !
				486	*/
				487	addr += GR_OFFS(regnum);
				488
				489	(unsigned long )addr = val;
				490
				491	/*
				492	* We need to clear the corresponding UNAT bit to fully emulate the load
				493	* UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
				494	*/
				495	bitmask = 1UL << (addr >> 3 & 0x3f);
				496	DPRINT("0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void ) unat, *unat);
				497	if (nat) {
				498	*unat \|= bitmask;
				499	} else {
				500	*unat &= ~bitmask;
				501	}
				502	DPRINT("0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void ) unat,*unat);
				503	}
				504
				505	/*
				506	* Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
				507	* range from 32-127, result is in the range from 0-95.
				508	*/
				509	static inline unsigned long
				510	fph_index (struct pt_regs *regs, long regnum)
				511	{
				512	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
				513	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
				514	}
				515
				516	static void
				517	setfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				518	{
				519	struct switch_stack sw = (struct switch_stack )regs - 1;
				520	unsigned long addr;
				521
				522	/*
				523	* From EAS-2.5: FPDisableFault has higher priority than Unaligned
				524	* Fault. Thus, when we get here, we know the partition is enabled.
				525	* To update f32-f127, there are three choices:
				526	*
				527	* (1) save f32-f127 to thread.fph and update the values there
				528	* (2) use a gigantic switch statement to directly access the registers
				529	* (3) generate code on the fly to update the desired register
				530	*
				531	* For now, we are using approach (1).
				532	*/
				533	if (regnum >= IA64_FIRST_ROTATING_FR) {
				534	ia64_sync_fph(current);
				535	current->thread.fph[fph_index(regs, regnum)] = *fpval;
				536	} else {
				537	/*
				538	* pt_regs or switch_stack ?
				539	*/
				540	if (FR_IN_SW(regnum)) {
				541	addr = (unsigned long)sw;
				542	} else {
				543	addr = (unsigned long)regs;
				544	}
				545
				546	DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
				547
				548	addr += FR_OFFS(regnum);
				549	(struct ia64_fpreg )addr = *fpval;
				550
				551	/*
				552	* mark the low partition as being used now
				553	*
				554	* It is highly unlikely that this bit is not already set, but
				555	* let's do it for safety.
				556	*/
				557	regs->cr_ipsr \|= IA64_PSR_MFL;
				558	}
				559	}
				560
				561	/*
				562	* Those 2 inline functions generate the spilled versions of the constant floating point
				563	* registers which can be used with stfX
				564	*/
				565	static inline void
				566	float_spill_f0 (struct ia64_fpreg *final)
				567	{
				568	ia64_stf_spill(final, 0);
				569	}
				570
				571	static inline void
				572	float_spill_f1 (struct ia64_fpreg *final)
				573	{
				574	ia64_stf_spill(final, 1);
				575	}
				576
				577	static void
				578	getfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				579	{
				580	struct switch_stack sw = (struct switch_stack ) regs - 1;
				581	unsigned long addr;
				582
				583	/*
				584	* From EAS-2.5: FPDisableFault has higher priority than
				585	* Unaligned Fault. Thus, when we get here, we know the partition is
				586	* enabled.
				587	*
				588	* When regnum > 31, the register is still live and we need to force a save
				589	* to current->thread.fph to get access to it. See discussion in setfpreg()
				590	* for reasons and other ways of doing this.
				591	*/
				592	if (regnum >= IA64_FIRST_ROTATING_FR) {
				593	ia64_flush_fph(current);
				594	*fpval = current->thread.fph[fph_index(regs, regnum)];
				595	} else {
				596	/*
				597	* f0 = 0.0, f1= 1.0. Those registers are constant and are thus
				598	* not saved, we must generate their spilled form on the fly
				599	*/
				600	switch(regnum) {
				601	case 0:
				602	float_spill_f0(fpval);
				603	break;
				604	case 1:
				605	float_spill_f1(fpval);
				606	break;
				607	default:
				608	/*
				609	* pt_regs or switch_stack ?
				610	*/
				611	addr = FR_IN_SW(regnum) ? (unsigned long)sw
				612	: (unsigned long)regs;
				613
				614	DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
				615	FR_IN_SW(regnum), addr, FR_OFFS(regnum));
				616
				617	addr += FR_OFFS(regnum);
				618	fpval = (struct ia64_fpreg *)addr;
				619	}
				620	}
				621	}
				622
				623
				624	static void
				625	getreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				626	{
				627	struct switch_stack sw = (struct switch_stack ) regs - 1;
				628	unsigned long addr, *unat;
				629
				630	if (regnum >= IA64_FIRST_STACKED_GR) {
				631	get_rse_reg(regs, regnum, val, nat);
				632	return;
				633	}
				634
				635	/*
				636	* take care of r0 (read-only always evaluate to 0)
				637	*/
				638	if (regnum == 0) {
				639	*val = 0;
				640	if (nat)
				641	*nat = 0;
				642	return;
				643	}
				644
				645	/*
				646	* Now look at registers in [0-31] range and init correct UNAT
				647	*/
				648	if (GR_IN_SW(regnum)) {
				649	addr = (unsigned long)sw;
				650	unat = &sw->ar_unat;
				651	} else {
				652	addr = (unsigned long)regs;
				653	unat = &sw->caller_unat;
				654	}
				655
				656	DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
				657
				658	addr += GR_OFFS(regnum);
				659
				660	val = (unsigned long *)addr;
				661
				662	/*
				663	* do it only when requested
				664	*/
				665	if (nat)
				666	nat = (unat >> (addr >> 3 & 0x3f)) & 0x1UL;
				667	}
				668
				669	static void
				670	emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
				671	{
				672	/*
				673	* IMPORTANT:
				674	* Given the way we handle unaligned speculative loads, we should
				675	* not get to this point in the code but we keep this sanity check,
				676	* just in case.
				677	*/
				678	if (ld.x6_op == 1 \|\| ld.x6_op == 3) {
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	679	printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	680	if (die_if_kernel("unaligned reference on speculative load with register update\n",
				681	regs, 30))
				682	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	683	}
				684
				685
				686	/*
				687	* at this point, we know that the base register to update is valid i.e.,
				688	* it's not r0
				689	*/
				690	if (type == UPD_IMMEDIATE) {
				691	unsigned long imm;
				692
				693	/*
				694	* Load +Imm: ldXZ r1=[r3],imm(9)
				695	*
				696	*
				697	* form imm9: [13:19] contain the first 7 bits
				698	*/
				699	imm = ld.x << 7 \| ld.imm;
				700
				701	/*
				702	* sign extend (1+8bits) if m set
				703	*/
				704	if (ld.m) imm \|= SIGN_EXT9;
				705
				706	/*
				707	* ifa == r3 and we know that the NaT bit on r3 was clear so
				708	* we can directly use ifa.
				709	*/
				710	ifa += imm;
				711
				712	setreg(ld.r3, ifa, 0, regs);
				713
				714	DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
				715
				716	} else if (ld.m) {
				717	unsigned long r2;
				718	int nat_r2;
				719
				720	/*
				721	* Load +Reg Opcode: ldXZ r1=[r3],r2
				722	*
				723	* Note: that we update r3 even in the case of ldfX.a
				724	* (where the load does not happen)
				725	*
				726	* The way the load algorithm works, we know that r3 does not
				727	* have its NaT bit set (would have gotten NaT consumption
				728	* before getting the unaligned fault). So we can use ifa
				729	* which equals r3 at this point.
				730	*
				731	* IMPORTANT:
				732	* The above statement holds ONLY because we know that we
				733	* never reach this code when trying to do a ldX.s.
				734	* If we ever make it to here on an ldfX.s then
				735	*/
				736	getreg(ld.imm, &r2, &nat_r2, regs);
				737
				738	ifa += r2;
				739
				740	/*
				741	* propagate Nat r2 -> r3
				742	*/
				743	setreg(ld.r3, ifa, nat_r2, regs);
				744
				745	DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
				746	}
				747	}
				748
				749
				750	static int
				751	emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				752	{
				753	unsigned int len = 1 << ld.x6_sz;
				754	unsigned long val = 0;
				755
				756	/*
				757	* r0, as target, doesn't need to be checked because Illegal Instruction
				758	* faults have higher priority than unaligned faults.
				759	*
				760	* r0 cannot be found as the base as it would never generate an
				761	* unaligned reference.
				762	*/
				763
				764	/*
				765	* ldX.a we will emulate load and also invalidate the ALAT entry.
				766	* See comment below for explanation on how we handle ldX.a
				767	*/
				768
				769	if (len != 2 && len != 4 && len != 8) {
				770	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				771	return -1;
				772	}
				773	/* this assumes little-endian byte-order: */
				774	if (copy_from_user(&val, (void __user *) ifa, len))
				775	return -1;
				776	setreg(ld.r1, val, 0, regs);
				777
				778	/*
				779	* check for updates on any kind of loads
				780	*/
				781	if (ld.op == 0x5 \|\| ld.m)
				782	emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				783
				784	/*
				785	* handling of various loads (based on EAS2.4):
				786	*
				787	* ldX.acq (ordered load):
				788	* - acquire semantics would have been used, so force fence instead.
				789	*
				790	* ldX.c.clr (check load and clear):
				791	* - if we get to this handler, it's because the entry was not in the ALAT.
				792	* Therefore the operation reverts to a normal load
				793	*
				794	* ldX.c.nc (check load no clear):
				795	* - same as previous one
				796	*
				797	* ldX.c.clr.acq (ordered check load and clear):
				798	* - same as above for c.clr part. The load needs to have acquire semantics. So
				799	* we use the fence semantics which is stronger and thus ensures correctness.
				800	*
				801	* ldX.a (advanced load):
				802	* - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
				803	* address doesn't match requested size alignment. This means that we would
				804	* possibly need more than one load to get the result.
				805	*
				806	* The load part can be handled just like a normal load, however the difficult
				807	* part is to get the right thing into the ALAT. The critical piece of information
				808	* in the base address of the load & size. To do that, a ld.a must be executed,
				809	* clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
				810	* if we use the same target register, we will be okay for the check.a instruction.
				811	* If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
				812	* which would overlap within [r3,r3+X] (the size of the load was store in the
				813	* ALAT). If such an entry is found the entry is invalidated. But this is not good
				814	* enough, take the following example:
				815	* r3=3
				816	* ld4.a r1=[r3]
				817	*
				818	* Could be emulated by doing:
				819	* ld1.a r1=[r3],1
				820	* store to temporary;
				821	* ld1.a r1=[r3],1
				822	* store & shift to temporary;
				823	* ld1.a r1=[r3],1
				824	* store & shift to temporary;
				825	* ld1.a r1=[r3]
				826	* store & shift to temporary;
				827	* r1=temporary
				828	*
				829	* So in this case, you would get the right value is r1 but the wrong info in
				830	* the ALAT. Notice that you could do it in reverse to finish with address 3
				831	* but you would still get the size wrong. To get the size right, one needs to
				832	* execute exactly the same kind of load. You could do it from a aligned
				833	* temporary location, but you would get the address wrong.
				834	*
				835	* So no matter what, it is not possible to emulate an advanced load
				836	* correctly. But is that really critical ?
				837	*
				838	* We will always convert ld.a into a normal load with ALAT invalidated. This
				839	* will enable compiler to do optimization where certain code path after ld.a
				840	* is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
				841	*
				842	* If there is a store after the advanced load, one must either do a ld.c.* or
				843	* chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
				844	* entry found in ALAT), and that's perfectly ok because:
				845	*
				846	* - ld.c.*, if the entry is not present a normal load is executed
				847	* - chk.a.*, if the entry is not present, execution jumps to recovery code
				848	*
				849	* In either case, the load can be potentially retried in another form.
				850	*
				851	* ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
				852	* up a stale entry later). The register base update MUST also be performed.
				853	*/
				854
				855	/*
				856	* when the load has the .acq completer then
				857	* use ordering fence.
				858	*/
				859	if (ld.x6_op == 0x5 \|\| ld.x6_op == 0xa)
				860	mb();
				861
				862	/*
				863	* invalidate ALAT entry in case of advanced load
				864	*/
				865	if (ld.x6_op == 0x2)
				866	invala_gr(ld.r1);
				867
				868	return 0;
				869	}
				870
				871	static int
				872	emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				873	{
				874	unsigned long r2;
				875	unsigned int len = 1 << ld.x6_sz;
				876
				877	/*
				878	* if we get to this handler, Nat bits on both r3 and r2 have already
				879	* been checked. so we don't need to do it
				880	*
				881	* extract the value to be stored
				882	*/
				883	getreg(ld.imm, &r2, NULL, regs);
				884
				885	/*
				886	* we rely on the macros in unaligned.h for now i.e.,
				887	* we let the compiler figure out how to read memory gracefully.
				888	*
				889	* We need this switch/case because the way the inline function
				890	* works. The code is optimized by the compiler and looks like
				891	* a single switch/case.
				892	*/
				893	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
				894
				895	if (len != 2 && len != 4 && len != 8) {
				896	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				897	return -1;
				898	}
				899
				900	/* this assumes little-endian byte-order: */
				901	if (copy_to_user((void __user *) ifa, &r2, len))
				902	return -1;
				903
				904	/*
				905	* stX [r3]=r2,imm(9)
				906	*
				907	* NOTE:
				908	* ld.r3 can never be r0, because r0 would not generate an
				909	* unaligned access.
				910	*/
				911	if (ld.op == 0x5) {
				912	unsigned long imm;
				913
				914	/*
				915	* form imm9: [12:6] contain first 7bits
				916	*/
				917	imm = ld.x << 7 \| ld.r1;
				918	/*
				919	* sign extend (8bits) if m set
				920	*/
				921	if (ld.m) imm \|= SIGN_EXT9;
				922	/*
				923	* ifa == r3 (NaT is necessarily cleared)
				924	*/
				925	ifa += imm;
				926
				927	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				928
				929	setreg(ld.r3, ifa, 0, regs);
				930	}
				931	/*
				932	* we don't have alat_invalidate_multiple() so we need
				933	* to do the complete flush :-<<
				934	*/
				935	ia64_invala();
				936
				937	/*
				938	* stX.rel: use fence instead of release
				939	*/
				940	if (ld.x6_op == 0xd)
				941	mb();
				942
				943	return 0;
				944	}
				945
				946	/*
				947	* floating point operations sizes in bytes
				948	*/
				949	static const unsigned char float_fsz[4]={
				950	10, /* extended precision (e) */
				951	8, /* integer (8) */
				952	4, /* single precision (s) */
				953	8 /* double precision (d) */
				954	};
				955
				956	static inline void
				957	mem2float_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				958	{
				959	ia64_ldfe(6, init);
				960	ia64_stop();
				961	ia64_stf_spill(final, 6);
				962	}
				963
				964	static inline void
				965	mem2float_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				966	{
				967	ia64_ldf8(6, init);
				968	ia64_stop();
				969	ia64_stf_spill(final, 6);
				970	}
				971
				972	static inline void
				973	mem2float_single (struct ia64_fpreg init, struct ia64_fpreg final)
				974	{
				975	ia64_ldfs(6, init);
				976	ia64_stop();
				977	ia64_stf_spill(final, 6);
				978	}
				979
				980	static inline void
				981	mem2float_double (struct ia64_fpreg init, struct ia64_fpreg final)
				982	{
				983	ia64_ldfd(6, init);
				984	ia64_stop();
				985	ia64_stf_spill(final, 6);
				986	}
				987
				988	static inline void
				989	float2mem_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				990	{
				991	ia64_ldf_fill(6, init);
				992	ia64_stop();
				993	ia64_stfe(final, 6);
				994	}
				995
				996	static inline void
				997	float2mem_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				998	{
				999	ia64_ldf_fill(6, init);
				1000	ia64_stop();
				1001	ia64_stf8(final, 6);
				1002	}
				1003
				1004	static inline void
				1005	float2mem_single (struct ia64_fpreg init, struct ia64_fpreg final)
				1006	{
				1007	ia64_ldf_fill(6, init);
				1008	ia64_stop();
				1009	ia64_stfs(final, 6);
				1010	}
				1011
				1012	static inline void
				1013	float2mem_double (struct ia64_fpreg init, struct ia64_fpreg final)
				1014	{
				1015	ia64_ldf_fill(6, init);
				1016	ia64_stop();
				1017	ia64_stfd(final, 6);
				1018	}
				1019
				1020	static int
				1021	emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1022	{
				1023	struct ia64_fpreg fpr_init[2];
				1024	struct ia64_fpreg fpr_final[2];
				1025	unsigned long len = float_fsz[ld.x6_sz];
				1026
				1027	/*
				1028	* fr0 & fr1 don't need to be checked because Illegal Instruction faults have
				1029	* higher priority than unaligned faults.
				1030	*
				1031	* r0 cannot be found as the base as it would never generate an unaligned
				1032	* reference.
				1033	*/
				1034
				1035	/*
				1036	* make sure we get clean buffers
				1037	*/
				1038	memset(&fpr_init, 0, sizeof(fpr_init));
				1039	memset(&fpr_final, 0, sizeof(fpr_final));
				1040
				1041	/*
				1042	* ldfpX.a: we don't try to emulate anything but we must
				1043	* invalidate the ALAT entry and execute updates, if any.
				1044	*/
				1045	if (ld.x6_op != 0x2) {
				1046	/*
				1047	* This assumes little-endian byte-order. Note that there is no "ldfpe"
				1048	* instruction:
				1049	*/
				1050	if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
				1051	\|\| copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
				1052	return -1;
				1053
				1054	DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
				1055	DDUMP("frp_init =", &fpr_init, 2*len);
				1056	/*
				1057	* XXX fixme
				1058	* Could optimize inlines by using ldfpX & 2 spills
				1059	*/
				1060	switch( ld.x6_sz ) {
				1061	case 0:
				1062	mem2float_extended(&fpr_init[0], &fpr_final[0]);
				1063	mem2float_extended(&fpr_init[1], &fpr_final[1]);
				1064	break;
				1065	case 1:
				1066	mem2float_integer(&fpr_init[0], &fpr_final[0]);
				1067	mem2float_integer(&fpr_init[1], &fpr_final[1]);
				1068	break;
				1069	case 2:
				1070	mem2float_single(&fpr_init[0], &fpr_final[0]);
				1071	mem2float_single(&fpr_init[1], &fpr_final[1]);
				1072	break;
				1073	case 3:
				1074	mem2float_double(&fpr_init[0], &fpr_final[0]);
				1075	mem2float_double(&fpr_init[1], &fpr_final[1]);
				1076	break;
				1077	}
				1078	DDUMP("fpr_final =", &fpr_final, 2*len);
				1079	/*
				1080	* XXX fixme
				1081	*
				1082	* A possible optimization would be to drop fpr_final and directly
				1083	* use the storage from the saved context i.e., the actual final
				1084	* destination (pt_regs, switch_stack or thread structure).
				1085	*/
				1086	setfpreg(ld.r1, &fpr_final[0], regs);
				1087	setfpreg(ld.imm, &fpr_final[1], regs);
				1088	}
				1089
				1090	/*
				1091	* Check for updates: only immediate updates are available for this
				1092	* instruction.
				1093	*/
				1094	if (ld.m) {
				1095	/*
				1096	* the immediate is implicit given the ldsz of the operation:
				1097	* single: 8 (2x4) and for all others it's 16 (2x8)
				1098	*/
				1099	ifa += len<<1;
				1100
				1101	/*
				1102	* IMPORTANT:
				1103	* the fact that we force the NaT of r3 to zero is ONLY valid
				1104	* as long as we don't come here with a ldfpX.s.
				1105	* For this reason we keep this sanity check
				1106	*/
				1107	if (ld.x6_op == 1 \|\| ld.x6_op == 3)
				1108	printk(KERN_ERR "%s: register update on speculative load pair, error\n",
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	1109	__func__);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1110
				1111	setreg(ld.r3, ifa, 0, regs);
				1112	}
				1113
				1114	/*
				1115	* Invalidate ALAT entries, if any, for both registers.
				1116	*/
				1117	if (ld.x6_op == 0x2) {
				1118	invala_fr(ld.r1);
				1119	invala_fr(ld.imm);
				1120	}
				1121	return 0;
				1122	}
				1123
				1124
				1125	static int
				1126	emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1127	{
				1128	struct ia64_fpreg fpr_init;
				1129	struct ia64_fpreg fpr_final;
				1130	unsigned long len = float_fsz[ld.x6_sz];
				1131
				1132	/*
				1133	* fr0 & fr1 don't need to be checked because Illegal Instruction
				1134	* faults have higher priority than unaligned faults.
				1135	*
				1136	* r0 cannot be found as the base as it would never generate an
				1137	* unaligned reference.
				1138	*/
				1139
				1140	/*
				1141	* make sure we get clean buffers
				1142	*/
				1143	memset(&fpr_init,0, sizeof(fpr_init));
				1144	memset(&fpr_final,0, sizeof(fpr_final));
				1145
				1146	/*
				1147	* ldfX.a we don't try to emulate anything but we must
				1148	* invalidate the ALAT entry.
				1149	* See comments in ldX for descriptions on how the various loads are handled.
				1150	*/
				1151	if (ld.x6_op != 0x2) {
				1152	if (copy_from_user(&fpr_init, (void __user *) ifa, len))
				1153	return -1;
				1154
				1155	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1156	DDUMP("fpr_init =", &fpr_init, len);
				1157	/*
				1158	* we only do something for x6_op={0,8,9}
				1159	*/
				1160	switch( ld.x6_sz ) {
				1161	case 0:
				1162	mem2float_extended(&fpr_init, &fpr_final);
				1163	break;
				1164	case 1:
				1165	mem2float_integer(&fpr_init, &fpr_final);
				1166	break;
				1167	case 2:
				1168	mem2float_single(&fpr_init, &fpr_final);
				1169	break;
				1170	case 3:
				1171	mem2float_double(&fpr_init, &fpr_final);
				1172	break;
				1173	}
				1174	DDUMP("fpr_final =", &fpr_final, len);
				1175	/*
				1176	* XXX fixme
				1177	*
				1178	* A possible optimization would be to drop fpr_final and directly
				1179	* use the storage from the saved context i.e., the actual final
				1180	* destination (pt_regs, switch_stack or thread structure).
				1181	*/
				1182	setfpreg(ld.r1, &fpr_final, regs);
				1183	}
				1184
				1185	/*
				1186	* check for updates on any loads
				1187	*/
				1188	if (ld.op == 0x7 \|\| ld.m)
				1189	emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				1190
				1191	/*
				1192	* invalidate ALAT entry in case of advanced floating point loads
				1193	*/
				1194	if (ld.x6_op == 0x2)
				1195	invala_fr(ld.r1);
				1196
				1197	return 0;
				1198	}
				1199
				1200
				1201	static int
				1202	emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1203	{
				1204	struct ia64_fpreg fpr_init;
				1205	struct ia64_fpreg fpr_final;
				1206	unsigned long len = float_fsz[ld.x6_sz];
				1207
				1208	/*
				1209	* make sure we get clean buffers
				1210	*/
				1211	memset(&fpr_init,0, sizeof(fpr_init));
				1212	memset(&fpr_final,0, sizeof(fpr_final));
				1213
				1214	/*
				1215	* if we get to this handler, Nat bits on both r3 and r2 have already
				1216	* been checked. so we don't need to do it
				1217	*
				1218	* extract the value to be stored
				1219	*/
				1220	getfpreg(ld.imm, &fpr_init, regs);
				1221	/*
				1222	* during this step, we extract the spilled registers from the saved
				1223	* context i.e., we refill. Then we store (no spill) to temporary
				1224	* aligned location
				1225	*/
				1226	switch( ld.x6_sz ) {
				1227	case 0:
				1228	float2mem_extended(&fpr_init, &fpr_final);
				1229	break;
				1230	case 1:
				1231	float2mem_integer(&fpr_init, &fpr_final);
				1232	break;
				1233	case 2:
				1234	float2mem_single(&fpr_init, &fpr_final);
				1235	break;
				1236	case 3:
				1237	float2mem_double(&fpr_init, &fpr_final);
				1238	break;
				1239	}
				1240	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1241	DDUMP("fpr_init =", &fpr_init, len);
				1242	DDUMP("fpr_final =", &fpr_final, len);
				1243
				1244	if (copy_to_user((void __user *) ifa, &fpr_final, len))
				1245	return -1;
				1246
				1247	/*
				1248	* stfX [r3]=r2,imm(9)
				1249	*
				1250	* NOTE:
				1251	* ld.r3 can never be r0, because r0 would not generate an
				1252	* unaligned access.
				1253	*/
				1254	if (ld.op == 0x7) {
				1255	unsigned long imm;
				1256
				1257	/*
				1258	* form imm9: [12:6] contain first 7bits
				1259	*/
				1260	imm = ld.x << 7 \| ld.r1;
				1261	/*
				1262	* sign extend (8bits) if m set
				1263	*/
				1264	if (ld.m)
				1265	imm \|= SIGN_EXT9;
				1266	/*
				1267	* ifa == r3 (NaT is necessarily cleared)
				1268	*/
				1269	ifa += imm;
				1270
				1271	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				1272
				1273	setreg(ld.r3, ifa, 0, regs);
				1274	}
				1275	/*
				1276	* we don't have alat_invalidate_multiple() so we need
				1277	* to do the complete flush :-<<
				1278	*/
				1279	ia64_invala();
				1280
				1281	return 0;
				1282	}
				1283
				1284	/*
				1285	* Make sure we log the unaligned access, so that user/sysadmin can notice it and
				1286	* eventually fix the program. However, we don't want to do that for every access so we
Akinobu Mita	7683a3f	2010-02-28 19:58:14 +0900	[diff] [blame]	1287	* pace it with jiffies.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1288	*/
Akinobu Mita	7683a3f	2010-02-28 19:58:14 +0900	[diff] [blame]	1289	static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1290
				1291	void
				1292	ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
				1293	{
				1294	struct ia64_psr *ipsr = ia64_psr(regs);
				1295	mm_segment_t old_fs = get_fs();
				1296	unsigned long bundle[2];
				1297	unsigned long opcode;
				1298	struct siginfo si;
				1299	const struct exception_table_entry *eh = NULL;
				1300	union {
				1301	unsigned long l;
				1302	load_store_t insn;
				1303	} u;
				1304	int ret = -1;
				1305
				1306	if (ia64_psr(regs)->be) {
				1307	/* we don't support big-endian accesses */
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	1308	if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
				1309	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1310	goto force_sigbus;
				1311	}
				1312
				1313	/*
				1314	* Treat kernel accesses for which there is an exception handler entry the same as
				1315	* user-level unaligned accesses. Otherwise, a clever program could trick this
				1316	* handler into reading an arbitrary kernel addresses...
				1317	*/
				1318	if (!user_mode(regs))
				1319	eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
				1320	if (user_mode(regs) \|\| eh) {
				1321	if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
				1322	goto force_sigbus;
				1323
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1324	if (!no_unaligned_warning &&
				1325	!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
Akinobu Mita	7683a3f	2010-02-28 19:58:14 +0900	[diff] [blame]	1326	__ratelimit(&logging_rate_limit))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1327	{
				1328	char buf[200]; /* comm[] is at most 16 bytes... */
				1329	size_t len;
				1330
				1331	len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
Alexey Dobriyan	19c5870	2007-10-18 23:40:41 -0700	[diff] [blame]	1332	"ip=0x%016lx\n\r", current->comm,
				1333	task_pid_nr(current),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1334	ifa, regs->cr_iip + ipsr->ri);
				1335	/*
				1336	* Don't call tty_write_message() if we're in the kernel; we might
				1337	* be holding locks...
				1338	*/
Peter Hurley	02f14c7	2016-01-10 20:36:10 -0800	[diff] [blame]	1339	if (user_mode(regs)) {
				1340	struct tty_struct *tty = get_current_tty();
				1341	tty_write_message(tty, buf);
				1342	tty_kref_put(tty);
				1343	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1344	buf[len-1] = '\0'; /* drop '\r' */
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1345	/* watch for command names containing %s */
				1346	printk(KERN_WARNING "%s", buf);
				1347	} else {
Marcin Slusarz	54f8dd3	2009-09-18 12:48:12 -0700	[diff] [blame]	1348	if (no_unaligned_warning) {
				1349	printk_once(KERN_WARNING "%s(%d) encountered an "
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1350	"unaligned exception which required\n"
				1351	"kernel assistance, which degrades "
				1352	"the performance of the application.\n"
				1353	"Unaligned exception warnings have "
				1354	"been disabled by the system "
				1355	"administrator\n"
				1356	"echo 0 > /proc/sys/kernel/ignore-"
				1357	"unaligned-usertrap to re-enable\n",
Alexey Dobriyan	19c5870	2007-10-18 23:40:41 -0700	[diff] [blame]	1358	current->comm, task_pid_nr(current));
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1359	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1360	}
				1361	} else {
Akinobu Mita	7683a3f	2010-02-28 19:58:14 +0900	[diff] [blame]	1362	if (__ratelimit(&logging_rate_limit)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1363	printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
				1364	ifa, regs->cr_iip + ipsr->ri);
Doug Chapman	88fc241	2009-01-15 10:38:56 -0800	[diff] [blame]	1365	if (unaligned_dump_stack)
				1366	dump_stack();
				1367	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1368	set_fs(KERNEL_DS);
				1369	}
				1370
				1371	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
				1372	regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
				1373
				1374	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
				1375	goto failure;
				1376
				1377	/*
				1378	* extract the instruction from the bundle given the slot number
				1379	*/
				1380	switch (ipsr->ri) {
Matt Fleming	787ca32	2016-05-04 12:17:50 +0100	[diff] [blame]	1381	default:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1382	case 0: u.l = (bundle[0] >> 5); break;
				1383	case 1: u.l = (bundle[0] >> 46) \| (bundle[1] << 18); break;
				1384	case 2: u.l = (bundle[1] >> 23); break;
				1385	}
				1386	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
				1387
				1388	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
				1389	"ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
				1390	u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
				1391
				1392	/*
				1393	* IMPORTANT:
				1394	* Notice that the switch statement DOES not cover all possible instructions
				1395	* that DO generate unaligned references. This is made on purpose because for some
				1396	* instructions it DOES NOT make sense to try and emulate the access. Sometimes it
				1397	* is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
				1398	* the program will get a signal and die:
				1399	*
				1400	* load/store:
				1401	* - ldX.spill
				1402	* - stX.spill
				1403	* Reason: RNATs are based on addresses
				1404	* - ld16
				1405	* - st16
				1406	* Reason: ld16 and st16 are supposed to occur in a single
				1407	* memory op
				1408	*
				1409	* synchronization:
				1410	* - cmpxchg
				1411	* - fetchadd
				1412	* - xchg
				1413	* Reason: ATOMIC operations cannot be emulated properly using multiple
				1414	* instructions.
				1415	*
				1416	* speculative loads:
				1417	* - ldX.sZ
				1418	* Reason: side effects, code must be ready to deal with failure so simpler
				1419	* to let the load fail.
				1420	* ---------------------------------------------------------------------------------
				1421	* XXX fixme
				1422	*
				1423	* I would like to get rid of this switch case and do something
				1424	* more elegant.
				1425	*/
				1426	switch (opcode) {
				1427	case LDS_OP:
				1428	case LDSA_OP:
				1429	if (u.insn.x)
				1430	/* oops, really a semaphore op (cmpxchg, etc) */
				1431	goto failure;
				1432	/* no break */
				1433	case LDS_IMM_OP:
				1434	case LDSA_IMM_OP:
				1435	case LDFS_OP:
				1436	case LDFSA_OP:
				1437	case LDFS_IMM_OP:
				1438	/*
				1439	* The instruction will be retried with deferred exceptions turned on, and
				1440	* we should get Nat bit installed
				1441	*
				1442	* IMPORTANT: When PSR_ED is set, the register & immediate update forms
				1443	* are actually executed even though the operation failed. So we don't
				1444	* need to take care of this.
				1445	*/
				1446	DPRINT("forcing PSR_ED\n");
				1447	regs->cr_ipsr \|= IA64_PSR_ED;
				1448	goto done;
				1449
				1450	case LD_OP:
				1451	case LDA_OP:
				1452	case LDBIAS_OP:
				1453	case LDACQ_OP:
				1454	case LDCCLR_OP:
				1455	case LDCNC_OP:
				1456	case LDCCLRACQ_OP:
				1457	if (u.insn.x)
				1458	/* oops, really a semaphore op (cmpxchg, etc) */
				1459	goto failure;
				1460	/* no break */
				1461	case LD_IMM_OP:
				1462	case LDA_IMM_OP:
				1463	case LDBIAS_IMM_OP:
				1464	case LDACQ_IMM_OP:
				1465	case LDCCLR_IMM_OP:
				1466	case LDCNC_IMM_OP:
				1467	case LDCCLRACQ_IMM_OP:
				1468	ret = emulate_load_int(ifa, u.insn, regs);
				1469	break;
				1470
				1471	case ST_OP:
				1472	case STREL_OP:
				1473	if (u.insn.x)
				1474	/* oops, really a semaphore op (cmpxchg, etc) */
				1475	goto failure;
				1476	/* no break */
				1477	case ST_IMM_OP:
				1478	case STREL_IMM_OP:
				1479	ret = emulate_store_int(ifa, u.insn, regs);
				1480	break;
				1481
				1482	case LDF_OP:
				1483	case LDFA_OP:
				1484	case LDFCCLR_OP:
				1485	case LDFCNC_OP:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1486	if (u.insn.x)
				1487	ret = emulate_load_floatpair(ifa, u.insn, regs);
				1488	else
				1489	ret = emulate_load_float(ifa, u.insn, regs);
				1490	break;
				1491
Luck, Tony	1a49915	2008-01-14 09:59:24 -0800	[diff] [blame]	1492	case LDF_IMM_OP:
				1493	case LDFA_IMM_OP:
				1494	case LDFCCLR_IMM_OP:
				1495	case LDFCNC_IMM_OP:
				1496	ret = emulate_load_float(ifa, u.insn, regs);
				1497	break;
				1498
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1499	case STF_OP:
				1500	case STF_IMM_OP:
				1501	ret = emulate_store_float(ifa, u.insn, regs);
				1502	break;
				1503
				1504	default:
				1505	goto failure;
				1506	}
				1507	DPRINT("ret=%d\n", ret);
				1508	if (ret)
				1509	goto failure;
				1510
				1511	if (ipsr->ri == 2)
				1512	/*
				1513	* given today's architecture this case is not likely to happen because a
				1514	* memory access instruction (M) can never be in the last slot of a
				1515	* bundle. But let's keep it for now.
				1516	*/
				1517	regs->cr_iip += 16;
				1518	ipsr->ri = (ipsr->ri + 1) & 0x3;
				1519
				1520	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
				1521	done:
				1522	set_fs(old_fs); /* restore original address limit */
				1523	return;
				1524
				1525	failure:
				1526	/* something went wrong... */
				1527	if (!user_mode(regs)) {
				1528	if (eh) {
				1529	ia64_handle_exception(regs, eh);
				1530	goto done;
				1531	}
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	1532	if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
				1533	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1534	/* NOT_REACHED */
				1535	}
				1536	force_sigbus:
				1537	si.si_signo = SIGBUS;
				1538	si.si_errno = 0;
				1539	si.si_code = BUS_ADRALN;
				1540	si.si_addr = (void __user *) ifa;
				1541	si.si_flags = 0;
				1542	si.si_isr = 0;
				1543	si.si_imm = 0;
				1544	force_sig_info(SIGBUS, &si, current);
				1545	goto done;
				1546	}