blob: f9e0ae936d1a9e278ea56bb9af1d35ae8159df26 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Architecture-specific unaligned trap handling.
3 *
4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10 * stacked register returns an undefined value; it does NOT trigger a
11 * "rsvd register fault").
12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14 * 2001/01/17 Add support emulation of unaligned kernel accesses.
15 */
16#include <linux/kernel.h>
17#include <linux/sched.h>
18#include <linux/smp_lock.h>
19#include <linux/tty.h>
20
21#include <asm/intrinsics.h>
22#include <asm/processor.h>
23#include <asm/rse.h>
24#include <asm/uaccess.h>
25#include <asm/unaligned.h>
26
27extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
28
29#undef DEBUG_UNALIGNED_TRAP
30
31#ifdef DEBUG_UNALIGNED_TRAP
32# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
33# define DDUMP(str,vp,len) dump(str, vp, len)
34
35static void
36dump (const char *str, void *vp, size_t len)
37{
38 unsigned char *cp = vp;
39 int i;
40
41 printk("%s", str);
42 for (i = 0; i < len; ++i)
43 printk (" %02x", *cp++);
44 printk("\n");
45}
46#else
47# define DPRINT(a...)
48# define DDUMP(str,vp,len)
49#endif
50
51#define IA64_FIRST_STACKED_GR 32
52#define IA64_FIRST_ROTATING_FR 32
53#define SIGN_EXT9 0xffffffffffffff00ul
54
55/*
56 * For M-unit:
57 *
58 * opcode | m | x6 |
59 * --------|------|---------|
60 * [40-37] | [36] | [35:30] |
61 * --------|------|---------|
62 * 4 | 1 | 6 | = 11 bits
63 * --------------------------
64 * However bits [31:30] are not directly useful to distinguish between
65 * load/store so we can use [35:32] instead, which gives the following
66 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
67 * checking the m-bit until later in the load/store emulation.
68 */
69#define IA64_OPCODE_MASK 0x1ef
70#define IA64_OPCODE_SHIFT 32
71
72/*
73 * Table C-28 Integer Load/Store
74 *
75 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
76 *
77 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
78 * the address (bits [8:3]), so we must failed.
79 */
80#define LD_OP 0x080
81#define LDS_OP 0x081
82#define LDA_OP 0x082
83#define LDSA_OP 0x083
84#define LDBIAS_OP 0x084
85#define LDACQ_OP 0x085
86/* 0x086, 0x087 are not relevant */
87#define LDCCLR_OP 0x088
88#define LDCNC_OP 0x089
89#define LDCCLRACQ_OP 0x08a
90#define ST_OP 0x08c
91#define STREL_OP 0x08d
92/* 0x08e,0x8f are not relevant */
93
94/*
95 * Table C-29 Integer Load +Reg
96 *
97 * we use the ld->m (bit [36:36]) field to determine whether or not we have
98 * a load/store of this form.
99 */
100
101/*
102 * Table C-30 Integer Load/Store +Imm
103 *
104 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
105 *
106 * ld8.fill, st8.fill must be aligned because the Nat register are based on
107 * the address, so we must fail and the program must be fixed.
108 */
109#define LD_IMM_OP 0x0a0
110#define LDS_IMM_OP 0x0a1
111#define LDA_IMM_OP 0x0a2
112#define LDSA_IMM_OP 0x0a3
113#define LDBIAS_IMM_OP 0x0a4
114#define LDACQ_IMM_OP 0x0a5
115/* 0x0a6, 0xa7 are not relevant */
116#define LDCCLR_IMM_OP 0x0a8
117#define LDCNC_IMM_OP 0x0a9
118#define LDCCLRACQ_IMM_OP 0x0aa
119#define ST_IMM_OP 0x0ac
120#define STREL_IMM_OP 0x0ad
121/* 0x0ae,0xaf are not relevant */
122
123/*
124 * Table C-32 Floating-point Load/Store
125 */
126#define LDF_OP 0x0c0
127#define LDFS_OP 0x0c1
128#define LDFA_OP 0x0c2
129#define LDFSA_OP 0x0c3
130/* 0x0c6 is irrelevant */
131#define LDFCCLR_OP 0x0c8
132#define LDFCNC_OP 0x0c9
133/* 0x0cb is irrelevant */
134#define STF_OP 0x0cc
135
136/*
137 * Table C-33 Floating-point Load +Reg
138 *
139 * we use the ld->m (bit [36:36]) field to determine whether or not we have
140 * a load/store of this form.
141 */
142
143/*
144 * Table C-34 Floating-point Load/Store +Imm
145 */
146#define LDF_IMM_OP 0x0e0
147#define LDFS_IMM_OP 0x0e1
148#define LDFA_IMM_OP 0x0e2
149#define LDFSA_IMM_OP 0x0e3
150/* 0x0e6 is irrelevant */
151#define LDFCCLR_IMM_OP 0x0e8
152#define LDFCNC_IMM_OP 0x0e9
153#define STF_IMM_OP 0x0ec
154
155typedef struct {
156 unsigned long qp:6; /* [0:5] */
157 unsigned long r1:7; /* [6:12] */
158 unsigned long imm:7; /* [13:19] */
159 unsigned long r3:7; /* [20:26] */
160 unsigned long x:1; /* [27:27] */
161 unsigned long hint:2; /* [28:29] */
162 unsigned long x6_sz:2; /* [30:31] */
163 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
164 unsigned long m:1; /* [36:36] */
165 unsigned long op:4; /* [37:40] */
166 unsigned long pad:23; /* [41:63] */
167} load_store_t;
168
169
170typedef enum {
171 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
172 UPD_REG /* ldXZ r1=[r3],r2 */
173} update_t;
174
175/*
176 * We use tables to keep track of the offsets of registers in the saved state.
177 * This way we save having big switch/case statements.
178 *
179 * We use bit 0 to indicate switch_stack or pt_regs.
180 * The offset is simply shifted by 1 bit.
181 * A 2-byte value should be enough to hold any kind of offset
182 *
183 * In case the calling convention changes (and thus pt_regs/switch_stack)
184 * simply use RSW instead of RPT or vice-versa.
185 */
186
187#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
188#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
189
190#define RPT(x) (RPO(x) << 1)
191#define RSW(x) (1| RSO(x)<<1)
192
193#define GR_OFFS(x) (gr_info[x]>>1)
194#define GR_IN_SW(x) (gr_info[x] & 0x1)
195
196#define FR_OFFS(x) (fr_info[x]>>1)
197#define FR_IN_SW(x) (fr_info[x] & 0x1)
198
199static u16 gr_info[32]={
200 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
201
202 RPT(r1), RPT(r2), RPT(r3),
203
204 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
205
206 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
207 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
208
209 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
210 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
211 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
212 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
213};
214
215static u16 fr_info[32]={
216 0, /* constant : WE SHOULD NEVER GET THIS */
217 0, /* constant : WE SHOULD NEVER GET THIS */
218
219 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
220
221 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
222 RPT(f10), RPT(f11),
223
224 RSW(f12), RSW(f13), RSW(f14),
225 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
226 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
227 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
228 RSW(f30), RSW(f31)
229};
230
231/* Invalidate ALAT entry for integer register REGNO. */
232static void
233invala_gr (int regno)
234{
235# define F(reg) case reg: ia64_invala_gr(reg); break
236
237 switch (regno) {
238 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
239 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
240 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
241 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
242 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
243 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
244 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
245 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
246 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
247 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
248 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
249 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
250 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
251 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
252 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
253 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
254 }
255# undef F
256}
257
258/* Invalidate ALAT entry for floating-point register REGNO. */
259static void
260invala_fr (int regno)
261{
262# define F(reg) case reg: ia64_invala_fr(reg); break
263
264 switch (regno) {
265 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
266 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
267 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
268 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
269 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
270 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
271 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
272 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
273 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
274 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
275 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
276 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
277 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
278 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
279 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
280 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
281 }
282# undef F
283}
284
285static inline unsigned long
286rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
287{
288 reg += rrb;
289 if (reg >= sor)
290 reg -= sor;
291 return reg;
292}
293
294static void
295set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
296{
297 struct switch_stack *sw = (struct switch_stack *) regs - 1;
298 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
299 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
300 unsigned long rnats, nat_mask;
301 unsigned long on_kbs;
302 long sof = (regs->cr_ifs) & 0x7f;
303 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
304 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
305 long ridx = r1 - 32;
306
307 if (ridx >= sof) {
308 /* this should never happen, as the "rsvd register fault" has higher priority */
309 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
310 return;
311 }
312
313 if (ridx < sor)
314 ridx = rotate_reg(sor, rrb_gr, ridx);
315
316 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
317 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
318
319 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
320 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
321 if (addr >= kbs) {
322 /* the register is on the kernel backing store: easy... */
323 rnat_addr = ia64_rse_rnat_addr(addr);
324 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
325 rnat_addr = &sw->ar_rnat;
326 nat_mask = 1UL << ia64_rse_slot_num(addr);
327
328 *addr = val;
329 if (nat)
330 *rnat_addr |= nat_mask;
331 else
332 *rnat_addr &= ~nat_mask;
333 return;
334 }
335
336 if (!user_stack(current, regs)) {
337 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
338 return;
339 }
340
341 bspstore = (unsigned long *)regs->ar_bspstore;
342 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
343 bsp = ia64_rse_skip_regs(ubs_end, -sof);
344 addr = ia64_rse_skip_regs(bsp, ridx);
345
346 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
347
348 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
349
350 rnat_addr = ia64_rse_rnat_addr(addr);
351
352 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
353 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
354 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
355
356 nat_mask = 1UL << ia64_rse_slot_num(addr);
357 if (nat)
358 rnats |= nat_mask;
359 else
360 rnats &= ~nat_mask;
361 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
362
363 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
364}
365
366
367static void
368get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
369{
370 struct switch_stack *sw = (struct switch_stack *) regs - 1;
371 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
372 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
373 unsigned long rnats, nat_mask;
374 unsigned long on_kbs;
375 long sof = (regs->cr_ifs) & 0x7f;
376 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
377 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
378 long ridx = r1 - 32;
379
380 if (ridx >= sof) {
381 /* read of out-of-frame register returns an undefined value; 0 in our case. */
382 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
383 goto fail;
384 }
385
386 if (ridx < sor)
387 ridx = rotate_reg(sor, rrb_gr, ridx);
388
389 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
390 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
391
392 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
393 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
394 if (addr >= kbs) {
395 /* the register is on the kernel backing store: easy... */
396 *val = *addr;
397 if (nat) {
398 rnat_addr = ia64_rse_rnat_addr(addr);
399 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
400 rnat_addr = &sw->ar_rnat;
401 nat_mask = 1UL << ia64_rse_slot_num(addr);
402 *nat = (*rnat_addr & nat_mask) != 0;
403 }
404 return;
405 }
406
407 if (!user_stack(current, regs)) {
408 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
409 goto fail;
410 }
411
412 bspstore = (unsigned long *)regs->ar_bspstore;
413 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
414 bsp = ia64_rse_skip_regs(ubs_end, -sof);
415 addr = ia64_rse_skip_regs(bsp, ridx);
416
417 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
418
419 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
420
421 if (nat) {
422 rnat_addr = ia64_rse_rnat_addr(addr);
423 nat_mask = 1UL << ia64_rse_slot_num(addr);
424
425 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
426
427 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
428 *nat = (rnats & nat_mask) != 0;
429 }
430 return;
431
432 fail:
433 *val = 0;
434 if (nat)
435 *nat = 0;
436 return;
437}
438
439
440static void
441setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
442{
443 struct switch_stack *sw = (struct switch_stack *) regs - 1;
444 unsigned long addr;
445 unsigned long bitmask;
446 unsigned long *unat;
447
448 /*
449 * First takes care of stacked registers
450 */
451 if (regnum >= IA64_FIRST_STACKED_GR) {
452 set_rse_reg(regs, regnum, val, nat);
453 return;
454 }
455
456 /*
457 * Using r0 as a target raises a General Exception fault which has higher priority
458 * than the Unaligned Reference fault.
459 */
460
461 /*
462 * Now look at registers in [0-31] range and init correct UNAT
463 */
464 if (GR_IN_SW(regnum)) {
465 addr = (unsigned long)sw;
466 unat = &sw->ar_unat;
467 } else {
468 addr = (unsigned long)regs;
469 unat = &sw->caller_unat;
470 }
471 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
472 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
473 /*
474 * add offset from base of struct
475 * and do it !
476 */
477 addr += GR_OFFS(regnum);
478
479 *(unsigned long *)addr = val;
480
481 /*
482 * We need to clear the corresponding UNAT bit to fully emulate the load
483 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
484 */
485 bitmask = 1UL << (addr >> 3 & 0x3f);
486 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
487 if (nat) {
488 *unat |= bitmask;
489 } else {
490 *unat &= ~bitmask;
491 }
492 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
493}
494
495/*
496 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
497 * range from 32-127, result is in the range from 0-95.
498 */
499static inline unsigned long
500fph_index (struct pt_regs *regs, long regnum)
501{
502 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
503 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
504}
505
506static void
507setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
508{
509 struct switch_stack *sw = (struct switch_stack *)regs - 1;
510 unsigned long addr;
511
512 /*
513 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
514 * Fault. Thus, when we get here, we know the partition is enabled.
515 * To update f32-f127, there are three choices:
516 *
517 * (1) save f32-f127 to thread.fph and update the values there
518 * (2) use a gigantic switch statement to directly access the registers
519 * (3) generate code on the fly to update the desired register
520 *
521 * For now, we are using approach (1).
522 */
523 if (regnum >= IA64_FIRST_ROTATING_FR) {
524 ia64_sync_fph(current);
525 current->thread.fph[fph_index(regs, regnum)] = *fpval;
526 } else {
527 /*
528 * pt_regs or switch_stack ?
529 */
530 if (FR_IN_SW(regnum)) {
531 addr = (unsigned long)sw;
532 } else {
533 addr = (unsigned long)regs;
534 }
535
536 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
537
538 addr += FR_OFFS(regnum);
539 *(struct ia64_fpreg *)addr = *fpval;
540
541 /*
542 * mark the low partition as being used now
543 *
544 * It is highly unlikely that this bit is not already set, but
545 * let's do it for safety.
546 */
547 regs->cr_ipsr |= IA64_PSR_MFL;
548 }
549}
550
551/*
552 * Those 2 inline functions generate the spilled versions of the constant floating point
553 * registers which can be used with stfX
554 */
555static inline void
556float_spill_f0 (struct ia64_fpreg *final)
557{
558 ia64_stf_spill(final, 0);
559}
560
561static inline void
562float_spill_f1 (struct ia64_fpreg *final)
563{
564 ia64_stf_spill(final, 1);
565}
566
567static void
568getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
569{
570 struct switch_stack *sw = (struct switch_stack *) regs - 1;
571 unsigned long addr;
572
573 /*
574 * From EAS-2.5: FPDisableFault has higher priority than
575 * Unaligned Fault. Thus, when we get here, we know the partition is
576 * enabled.
577 *
578 * When regnum > 31, the register is still live and we need to force a save
579 * to current->thread.fph to get access to it. See discussion in setfpreg()
580 * for reasons and other ways of doing this.
581 */
582 if (regnum >= IA64_FIRST_ROTATING_FR) {
583 ia64_flush_fph(current);
584 *fpval = current->thread.fph[fph_index(regs, regnum)];
585 } else {
586 /*
587 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
588 * not saved, we must generate their spilled form on the fly
589 */
590 switch(regnum) {
591 case 0:
592 float_spill_f0(fpval);
593 break;
594 case 1:
595 float_spill_f1(fpval);
596 break;
597 default:
598 /*
599 * pt_regs or switch_stack ?
600 */
601 addr = FR_IN_SW(regnum) ? (unsigned long)sw
602 : (unsigned long)regs;
603
604 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
605 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
606
607 addr += FR_OFFS(regnum);
608 *fpval = *(struct ia64_fpreg *)addr;
609 }
610 }
611}
612
613
614static void
615getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
616{
617 struct switch_stack *sw = (struct switch_stack *) regs - 1;
618 unsigned long addr, *unat;
619
620 if (regnum >= IA64_FIRST_STACKED_GR) {
621 get_rse_reg(regs, regnum, val, nat);
622 return;
623 }
624
625 /*
626 * take care of r0 (read-only always evaluate to 0)
627 */
628 if (regnum == 0) {
629 *val = 0;
630 if (nat)
631 *nat = 0;
632 return;
633 }
634
635 /*
636 * Now look at registers in [0-31] range and init correct UNAT
637 */
638 if (GR_IN_SW(regnum)) {
639 addr = (unsigned long)sw;
640 unat = &sw->ar_unat;
641 } else {
642 addr = (unsigned long)regs;
643 unat = &sw->caller_unat;
644 }
645
646 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
647
648 addr += GR_OFFS(regnum);
649
650 *val = *(unsigned long *)addr;
651
652 /*
653 * do it only when requested
654 */
655 if (nat)
656 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
657}
658
659static void
660emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
661{
662 /*
663 * IMPORTANT:
664 * Given the way we handle unaligned speculative loads, we should
665 * not get to this point in the code but we keep this sanity check,
666 * just in case.
667 */
668 if (ld.x6_op == 1 || ld.x6_op == 3) {
669 printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
670 die_if_kernel("unaligned reference on speculative load with register update\n",
671 regs, 30);
672 }
673
674
675 /*
676 * at this point, we know that the base register to update is valid i.e.,
677 * it's not r0
678 */
679 if (type == UPD_IMMEDIATE) {
680 unsigned long imm;
681
682 /*
683 * Load +Imm: ldXZ r1=[r3],imm(9)
684 *
685 *
686 * form imm9: [13:19] contain the first 7 bits
687 */
688 imm = ld.x << 7 | ld.imm;
689
690 /*
691 * sign extend (1+8bits) if m set
692 */
693 if (ld.m) imm |= SIGN_EXT9;
694
695 /*
696 * ifa == r3 and we know that the NaT bit on r3 was clear so
697 * we can directly use ifa.
698 */
699 ifa += imm;
700
701 setreg(ld.r3, ifa, 0, regs);
702
703 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
704
705 } else if (ld.m) {
706 unsigned long r2;
707 int nat_r2;
708
709 /*
710 * Load +Reg Opcode: ldXZ r1=[r3],r2
711 *
712 * Note: that we update r3 even in the case of ldfX.a
713 * (where the load does not happen)
714 *
715 * The way the load algorithm works, we know that r3 does not
716 * have its NaT bit set (would have gotten NaT consumption
717 * before getting the unaligned fault). So we can use ifa
718 * which equals r3 at this point.
719 *
720 * IMPORTANT:
721 * The above statement holds ONLY because we know that we
722 * never reach this code when trying to do a ldX.s.
723 * If we ever make it to here on an ldfX.s then
724 */
725 getreg(ld.imm, &r2, &nat_r2, regs);
726
727 ifa += r2;
728
729 /*
730 * propagate Nat r2 -> r3
731 */
732 setreg(ld.r3, ifa, nat_r2, regs);
733
734 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
735 }
736}
737
738
739static int
740emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
741{
742 unsigned int len = 1 << ld.x6_sz;
743 unsigned long val = 0;
744
745 /*
746 * r0, as target, doesn't need to be checked because Illegal Instruction
747 * faults have higher priority than unaligned faults.
748 *
749 * r0 cannot be found as the base as it would never generate an
750 * unaligned reference.
751 */
752
753 /*
754 * ldX.a we will emulate load and also invalidate the ALAT entry.
755 * See comment below for explanation on how we handle ldX.a
756 */
757
758 if (len != 2 && len != 4 && len != 8) {
759 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
760 return -1;
761 }
762 /* this assumes little-endian byte-order: */
763 if (copy_from_user(&val, (void __user *) ifa, len))
764 return -1;
765 setreg(ld.r1, val, 0, regs);
766
767 /*
768 * check for updates on any kind of loads
769 */
770 if (ld.op == 0x5 || ld.m)
771 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
772
773 /*
774 * handling of various loads (based on EAS2.4):
775 *
776 * ldX.acq (ordered load):
777 * - acquire semantics would have been used, so force fence instead.
778 *
779 * ldX.c.clr (check load and clear):
780 * - if we get to this handler, it's because the entry was not in the ALAT.
781 * Therefore the operation reverts to a normal load
782 *
783 * ldX.c.nc (check load no clear):
784 * - same as previous one
785 *
786 * ldX.c.clr.acq (ordered check load and clear):
787 * - same as above for c.clr part. The load needs to have acquire semantics. So
788 * we use the fence semantics which is stronger and thus ensures correctness.
789 *
790 * ldX.a (advanced load):
791 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
792 * address doesn't match requested size alignment. This means that we would
793 * possibly need more than one load to get the result.
794 *
795 * The load part can be handled just like a normal load, however the difficult
796 * part is to get the right thing into the ALAT. The critical piece of information
797 * in the base address of the load & size. To do that, a ld.a must be executed,
798 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
799 * if we use the same target register, we will be okay for the check.a instruction.
800 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
801 * which would overlap within [r3,r3+X] (the size of the load was store in the
802 * ALAT). If such an entry is found the entry is invalidated. But this is not good
803 * enough, take the following example:
804 * r3=3
805 * ld4.a r1=[r3]
806 *
807 * Could be emulated by doing:
808 * ld1.a r1=[r3],1
809 * store to temporary;
810 * ld1.a r1=[r3],1
811 * store & shift to temporary;
812 * ld1.a r1=[r3],1
813 * store & shift to temporary;
814 * ld1.a r1=[r3]
815 * store & shift to temporary;
816 * r1=temporary
817 *
818 * So in this case, you would get the right value is r1 but the wrong info in
819 * the ALAT. Notice that you could do it in reverse to finish with address 3
820 * but you would still get the size wrong. To get the size right, one needs to
821 * execute exactly the same kind of load. You could do it from a aligned
822 * temporary location, but you would get the address wrong.
823 *
824 * So no matter what, it is not possible to emulate an advanced load
825 * correctly. But is that really critical ?
826 *
827 * We will always convert ld.a into a normal load with ALAT invalidated. This
828 * will enable compiler to do optimization where certain code path after ld.a
829 * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
830 *
831 * If there is a store after the advanced load, one must either do a ld.c.* or
832 * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
833 * entry found in ALAT), and that's perfectly ok because:
834 *
835 * - ld.c.*, if the entry is not present a normal load is executed
836 * - chk.a.*, if the entry is not present, execution jumps to recovery code
837 *
838 * In either case, the load can be potentially retried in another form.
839 *
840 * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
841 * up a stale entry later). The register base update MUST also be performed.
842 */
843
844 /*
845 * when the load has the .acq completer then
846 * use ordering fence.
847 */
848 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
849 mb();
850
851 /*
852 * invalidate ALAT entry in case of advanced load
853 */
854 if (ld.x6_op == 0x2)
855 invala_gr(ld.r1);
856
857 return 0;
858}
859
860static int
861emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
862{
863 unsigned long r2;
864 unsigned int len = 1 << ld.x6_sz;
865
866 /*
867 * if we get to this handler, Nat bits on both r3 and r2 have already
868 * been checked. so we don't need to do it
869 *
870 * extract the value to be stored
871 */
872 getreg(ld.imm, &r2, NULL, regs);
873
874 /*
875 * we rely on the macros in unaligned.h for now i.e.,
876 * we let the compiler figure out how to read memory gracefully.
877 *
878 * We need this switch/case because the way the inline function
879 * works. The code is optimized by the compiler and looks like
880 * a single switch/case.
881 */
882 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
883
884 if (len != 2 && len != 4 && len != 8) {
885 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
886 return -1;
887 }
888
889 /* this assumes little-endian byte-order: */
890 if (copy_to_user((void __user *) ifa, &r2, len))
891 return -1;
892
893 /*
894 * stX [r3]=r2,imm(9)
895 *
896 * NOTE:
897 * ld.r3 can never be r0, because r0 would not generate an
898 * unaligned access.
899 */
900 if (ld.op == 0x5) {
901 unsigned long imm;
902
903 /*
904 * form imm9: [12:6] contain first 7bits
905 */
906 imm = ld.x << 7 | ld.r1;
907 /*
908 * sign extend (8bits) if m set
909 */
910 if (ld.m) imm |= SIGN_EXT9;
911 /*
912 * ifa == r3 (NaT is necessarily cleared)
913 */
914 ifa += imm;
915
916 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
917
918 setreg(ld.r3, ifa, 0, regs);
919 }
920 /*
921 * we don't have alat_invalidate_multiple() so we need
922 * to do the complete flush :-<<
923 */
924 ia64_invala();
925
926 /*
927 * stX.rel: use fence instead of release
928 */
929 if (ld.x6_op == 0xd)
930 mb();
931
932 return 0;
933}
934
935/*
936 * floating point operations sizes in bytes
937 */
938static const unsigned char float_fsz[4]={
939 10, /* extended precision (e) */
940 8, /* integer (8) */
941 4, /* single precision (s) */
942 8 /* double precision (d) */
943};
944
945static inline void
946mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
947{
948 ia64_ldfe(6, init);
949 ia64_stop();
950 ia64_stf_spill(final, 6);
951}
952
953static inline void
954mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
955{
956 ia64_ldf8(6, init);
957 ia64_stop();
958 ia64_stf_spill(final, 6);
959}
960
961static inline void
962mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
963{
964 ia64_ldfs(6, init);
965 ia64_stop();
966 ia64_stf_spill(final, 6);
967}
968
969static inline void
970mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
971{
972 ia64_ldfd(6, init);
973 ia64_stop();
974 ia64_stf_spill(final, 6);
975}
976
977static inline void
978float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
979{
980 ia64_ldf_fill(6, init);
981 ia64_stop();
982 ia64_stfe(final, 6);
983}
984
985static inline void
986float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
987{
988 ia64_ldf_fill(6, init);
989 ia64_stop();
990 ia64_stf8(final, 6);
991}
992
993static inline void
994float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
995{
996 ia64_ldf_fill(6, init);
997 ia64_stop();
998 ia64_stfs(final, 6);
999}
1000
1001static inline void
1002float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1003{
1004 ia64_ldf_fill(6, init);
1005 ia64_stop();
1006 ia64_stfd(final, 6);
1007}
1008
1009static int
1010emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1011{
1012 struct ia64_fpreg fpr_init[2];
1013 struct ia64_fpreg fpr_final[2];
1014 unsigned long len = float_fsz[ld.x6_sz];
1015
1016 /*
1017 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1018 * higher priority than unaligned faults.
1019 *
1020 * r0 cannot be found as the base as it would never generate an unaligned
1021 * reference.
1022 */
1023
1024 /*
1025 * make sure we get clean buffers
1026 */
1027 memset(&fpr_init, 0, sizeof(fpr_init));
1028 memset(&fpr_final, 0, sizeof(fpr_final));
1029
1030 /*
1031 * ldfpX.a: we don't try to emulate anything but we must
1032 * invalidate the ALAT entry and execute updates, if any.
1033 */
1034 if (ld.x6_op != 0x2) {
1035 /*
1036 * This assumes little-endian byte-order. Note that there is no "ldfpe"
1037 * instruction:
1038 */
1039 if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1040 || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1041 return -1;
1042
1043 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1044 DDUMP("frp_init =", &fpr_init, 2*len);
1045 /*
1046 * XXX fixme
1047 * Could optimize inlines by using ldfpX & 2 spills
1048 */
1049 switch( ld.x6_sz ) {
1050 case 0:
1051 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1052 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1053 break;
1054 case 1:
1055 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1056 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1057 break;
1058 case 2:
1059 mem2float_single(&fpr_init[0], &fpr_final[0]);
1060 mem2float_single(&fpr_init[1], &fpr_final[1]);
1061 break;
1062 case 3:
1063 mem2float_double(&fpr_init[0], &fpr_final[0]);
1064 mem2float_double(&fpr_init[1], &fpr_final[1]);
1065 break;
1066 }
1067 DDUMP("fpr_final =", &fpr_final, 2*len);
1068 /*
1069 * XXX fixme
1070 *
1071 * A possible optimization would be to drop fpr_final and directly
1072 * use the storage from the saved context i.e., the actual final
1073 * destination (pt_regs, switch_stack or thread structure).
1074 */
1075 setfpreg(ld.r1, &fpr_final[0], regs);
1076 setfpreg(ld.imm, &fpr_final[1], regs);
1077 }
1078
1079 /*
1080 * Check for updates: only immediate updates are available for this
1081 * instruction.
1082 */
1083 if (ld.m) {
1084 /*
1085 * the immediate is implicit given the ldsz of the operation:
1086 * single: 8 (2x4) and for all others it's 16 (2x8)
1087 */
1088 ifa += len<<1;
1089
1090 /*
1091 * IMPORTANT:
1092 * the fact that we force the NaT of r3 to zero is ONLY valid
1093 * as long as we don't come here with a ldfpX.s.
1094 * For this reason we keep this sanity check
1095 */
1096 if (ld.x6_op == 1 || ld.x6_op == 3)
1097 printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1098 __FUNCTION__);
1099
1100 setreg(ld.r3, ifa, 0, regs);
1101 }
1102
1103 /*
1104 * Invalidate ALAT entries, if any, for both registers.
1105 */
1106 if (ld.x6_op == 0x2) {
1107 invala_fr(ld.r1);
1108 invala_fr(ld.imm);
1109 }
1110 return 0;
1111}
1112
1113
1114static int
1115emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1116{
1117 struct ia64_fpreg fpr_init;
1118 struct ia64_fpreg fpr_final;
1119 unsigned long len = float_fsz[ld.x6_sz];
1120
1121 /*
1122 * fr0 & fr1 don't need to be checked because Illegal Instruction
1123 * faults have higher priority than unaligned faults.
1124 *
1125 * r0 cannot be found as the base as it would never generate an
1126 * unaligned reference.
1127 */
1128
1129 /*
1130 * make sure we get clean buffers
1131 */
1132 memset(&fpr_init,0, sizeof(fpr_init));
1133 memset(&fpr_final,0, sizeof(fpr_final));
1134
1135 /*
1136 * ldfX.a we don't try to emulate anything but we must
1137 * invalidate the ALAT entry.
1138 * See comments in ldX for descriptions on how the various loads are handled.
1139 */
1140 if (ld.x6_op != 0x2) {
1141 if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1142 return -1;
1143
1144 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1145 DDUMP("fpr_init =", &fpr_init, len);
1146 /*
1147 * we only do something for x6_op={0,8,9}
1148 */
1149 switch( ld.x6_sz ) {
1150 case 0:
1151 mem2float_extended(&fpr_init, &fpr_final);
1152 break;
1153 case 1:
1154 mem2float_integer(&fpr_init, &fpr_final);
1155 break;
1156 case 2:
1157 mem2float_single(&fpr_init, &fpr_final);
1158 break;
1159 case 3:
1160 mem2float_double(&fpr_init, &fpr_final);
1161 break;
1162 }
1163 DDUMP("fpr_final =", &fpr_final, len);
1164 /*
1165 * XXX fixme
1166 *
1167 * A possible optimization would be to drop fpr_final and directly
1168 * use the storage from the saved context i.e., the actual final
1169 * destination (pt_regs, switch_stack or thread structure).
1170 */
1171 setfpreg(ld.r1, &fpr_final, regs);
1172 }
1173
1174 /*
1175 * check for updates on any loads
1176 */
1177 if (ld.op == 0x7 || ld.m)
1178 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1179
1180 /*
1181 * invalidate ALAT entry in case of advanced floating point loads
1182 */
1183 if (ld.x6_op == 0x2)
1184 invala_fr(ld.r1);
1185
1186 return 0;
1187}
1188
1189
1190static int
1191emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1192{
1193 struct ia64_fpreg fpr_init;
1194 struct ia64_fpreg fpr_final;
1195 unsigned long len = float_fsz[ld.x6_sz];
1196
1197 /*
1198 * make sure we get clean buffers
1199 */
1200 memset(&fpr_init,0, sizeof(fpr_init));
1201 memset(&fpr_final,0, sizeof(fpr_final));
1202
1203 /*
1204 * if we get to this handler, Nat bits on both r3 and r2 have already
1205 * been checked. so we don't need to do it
1206 *
1207 * extract the value to be stored
1208 */
1209 getfpreg(ld.imm, &fpr_init, regs);
1210 /*
1211 * during this step, we extract the spilled registers from the saved
1212 * context i.e., we refill. Then we store (no spill) to temporary
1213 * aligned location
1214 */
1215 switch( ld.x6_sz ) {
1216 case 0:
1217 float2mem_extended(&fpr_init, &fpr_final);
1218 break;
1219 case 1:
1220 float2mem_integer(&fpr_init, &fpr_final);
1221 break;
1222 case 2:
1223 float2mem_single(&fpr_init, &fpr_final);
1224 break;
1225 case 3:
1226 float2mem_double(&fpr_init, &fpr_final);
1227 break;
1228 }
1229 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1230 DDUMP("fpr_init =", &fpr_init, len);
1231 DDUMP("fpr_final =", &fpr_final, len);
1232
1233 if (copy_to_user((void __user *) ifa, &fpr_final, len))
1234 return -1;
1235
1236 /*
1237 * stfX [r3]=r2,imm(9)
1238 *
1239 * NOTE:
1240 * ld.r3 can never be r0, because r0 would not generate an
1241 * unaligned access.
1242 */
1243 if (ld.op == 0x7) {
1244 unsigned long imm;
1245
1246 /*
1247 * form imm9: [12:6] contain first 7bits
1248 */
1249 imm = ld.x << 7 | ld.r1;
1250 /*
1251 * sign extend (8bits) if m set
1252 */
1253 if (ld.m)
1254 imm |= SIGN_EXT9;
1255 /*
1256 * ifa == r3 (NaT is necessarily cleared)
1257 */
1258 ifa += imm;
1259
1260 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1261
1262 setreg(ld.r3, ifa, 0, regs);
1263 }
1264 /*
1265 * we don't have alat_invalidate_multiple() so we need
1266 * to do the complete flush :-<<
1267 */
1268 ia64_invala();
1269
1270 return 0;
1271}
1272
1273/*
1274 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1275 * eventually fix the program. However, we don't want to do that for every access so we
1276 * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
1277 * either...
1278 */
1279static int
1280within_logging_rate_limit (void)
1281{
1282 static unsigned long count, last_time;
1283
1284 if (jiffies - last_time > 5*HZ)
1285 count = 0;
Jack Steiner79c83bd2006-01-24 16:32:11 -06001286 if (count < 5) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 last_time = jiffies;
Jack Steiner79c83bd2006-01-24 16:32:11 -06001288 count++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 return 1;
1290 }
1291 return 0;
1292
1293}
1294
1295void
1296ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1297{
1298 struct ia64_psr *ipsr = ia64_psr(regs);
1299 mm_segment_t old_fs = get_fs();
1300 unsigned long bundle[2];
1301 unsigned long opcode;
1302 struct siginfo si;
1303 const struct exception_table_entry *eh = NULL;
1304 union {
1305 unsigned long l;
1306 load_store_t insn;
1307 } u;
1308 int ret = -1;
1309
1310 if (ia64_psr(regs)->be) {
1311 /* we don't support big-endian accesses */
1312 die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1313 goto force_sigbus;
1314 }
1315
1316 /*
1317 * Treat kernel accesses for which there is an exception handler entry the same as
1318 * user-level unaligned accesses. Otherwise, a clever program could trick this
1319 * handler into reading an arbitrary kernel addresses...
1320 */
1321 if (!user_mode(regs))
1322 eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1323 if (user_mode(regs) || eh) {
1324 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1325 goto force_sigbus;
1326
1327 if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1328 && within_logging_rate_limit())
1329 {
1330 char buf[200]; /* comm[] is at most 16 bytes... */
1331 size_t len;
1332
1333 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1334 "ip=0x%016lx\n\r", current->comm, current->pid,
1335 ifa, regs->cr_iip + ipsr->ri);
1336 /*
1337 * Don't call tty_write_message() if we're in the kernel; we might
1338 * be holding locks...
1339 */
1340 if (user_mode(regs))
1341 tty_write_message(current->signal->tty, buf);
1342 buf[len-1] = '\0'; /* drop '\r' */
1343 printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1344 }
1345 } else {
1346 if (within_logging_rate_limit())
1347 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1348 ifa, regs->cr_iip + ipsr->ri);
1349 set_fs(KERNEL_DS);
1350 }
1351
1352 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1353 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1354
1355 if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1356 goto failure;
1357
1358 /*
1359 * extract the instruction from the bundle given the slot number
1360 */
1361 switch (ipsr->ri) {
1362 case 0: u.l = (bundle[0] >> 5); break;
1363 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1364 case 2: u.l = (bundle[1] >> 23); break;
1365 }
1366 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1367
1368 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1369 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1370 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1371
1372 /*
1373 * IMPORTANT:
1374 * Notice that the switch statement DOES not cover all possible instructions
1375 * that DO generate unaligned references. This is made on purpose because for some
1376 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1377 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1378 * the program will get a signal and die:
1379 *
1380 * load/store:
1381 * - ldX.spill
1382 * - stX.spill
1383 * Reason: RNATs are based on addresses
1384 * - ld16
1385 * - st16
1386 * Reason: ld16 and st16 are supposed to occur in a single
1387 * memory op
1388 *
1389 * synchronization:
1390 * - cmpxchg
1391 * - fetchadd
1392 * - xchg
1393 * Reason: ATOMIC operations cannot be emulated properly using multiple
1394 * instructions.
1395 *
1396 * speculative loads:
1397 * - ldX.sZ
1398 * Reason: side effects, code must be ready to deal with failure so simpler
1399 * to let the load fail.
1400 * ---------------------------------------------------------------------------------
1401 * XXX fixme
1402 *
1403 * I would like to get rid of this switch case and do something
1404 * more elegant.
1405 */
1406 switch (opcode) {
1407 case LDS_OP:
1408 case LDSA_OP:
1409 if (u.insn.x)
1410 /* oops, really a semaphore op (cmpxchg, etc) */
1411 goto failure;
1412 /* no break */
1413 case LDS_IMM_OP:
1414 case LDSA_IMM_OP:
1415 case LDFS_OP:
1416 case LDFSA_OP:
1417 case LDFS_IMM_OP:
1418 /*
1419 * The instruction will be retried with deferred exceptions turned on, and
1420 * we should get Nat bit installed
1421 *
1422 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1423 * are actually executed even though the operation failed. So we don't
1424 * need to take care of this.
1425 */
1426 DPRINT("forcing PSR_ED\n");
1427 regs->cr_ipsr |= IA64_PSR_ED;
1428 goto done;
1429
1430 case LD_OP:
1431 case LDA_OP:
1432 case LDBIAS_OP:
1433 case LDACQ_OP:
1434 case LDCCLR_OP:
1435 case LDCNC_OP:
1436 case LDCCLRACQ_OP:
1437 if (u.insn.x)
1438 /* oops, really a semaphore op (cmpxchg, etc) */
1439 goto failure;
1440 /* no break */
1441 case LD_IMM_OP:
1442 case LDA_IMM_OP:
1443 case LDBIAS_IMM_OP:
1444 case LDACQ_IMM_OP:
1445 case LDCCLR_IMM_OP:
1446 case LDCNC_IMM_OP:
1447 case LDCCLRACQ_IMM_OP:
1448 ret = emulate_load_int(ifa, u.insn, regs);
1449 break;
1450
1451 case ST_OP:
1452 case STREL_OP:
1453 if (u.insn.x)
1454 /* oops, really a semaphore op (cmpxchg, etc) */
1455 goto failure;
1456 /* no break */
1457 case ST_IMM_OP:
1458 case STREL_IMM_OP:
1459 ret = emulate_store_int(ifa, u.insn, regs);
1460 break;
1461
1462 case LDF_OP:
1463 case LDFA_OP:
1464 case LDFCCLR_OP:
1465 case LDFCNC_OP:
1466 case LDF_IMM_OP:
1467 case LDFA_IMM_OP:
1468 case LDFCCLR_IMM_OP:
1469 case LDFCNC_IMM_OP:
1470 if (u.insn.x)
1471 ret = emulate_load_floatpair(ifa, u.insn, regs);
1472 else
1473 ret = emulate_load_float(ifa, u.insn, regs);
1474 break;
1475
1476 case STF_OP:
1477 case STF_IMM_OP:
1478 ret = emulate_store_float(ifa, u.insn, regs);
1479 break;
1480
1481 default:
1482 goto failure;
1483 }
1484 DPRINT("ret=%d\n", ret);
1485 if (ret)
1486 goto failure;
1487
1488 if (ipsr->ri == 2)
1489 /*
1490 * given today's architecture this case is not likely to happen because a
1491 * memory access instruction (M) can never be in the last slot of a
1492 * bundle. But let's keep it for now.
1493 */
1494 regs->cr_iip += 16;
1495 ipsr->ri = (ipsr->ri + 1) & 0x3;
1496
1497 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1498 done:
1499 set_fs(old_fs); /* restore original address limit */
1500 return;
1501
1502 failure:
1503 /* something went wrong... */
1504 if (!user_mode(regs)) {
1505 if (eh) {
1506 ia64_handle_exception(regs, eh);
1507 goto done;
1508 }
1509 die_if_kernel("error during unaligned kernel access\n", regs, ret);
1510 /* NOT_REACHED */
1511 }
1512 force_sigbus:
1513 si.si_signo = SIGBUS;
1514 si.si_errno = 0;
1515 si.si_code = BUS_ADRALN;
1516 si.si_addr = (void __user *) ifa;
1517 si.si_flags = 0;
1518 si.si_isr = 0;
1519 si.si_imm = 0;
1520 force_sig_info(SIGBUS, &si, current);
1521 goto done;
1522}