blob: 571f9fe490ce19e93f741e1036f33a4b7e8bbb35 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/ptrace.h>
36#include <linux/utsname.h>
37#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58static atomic_t hlt_counter = ATOMIC_INIT(0);
59
60unsigned long boot_option_idle_override = 0;
61EXPORT_SYMBOL(boot_option_idle_override);
62
63/*
64 * Powermanagement idle function, if any..
65 */
66void (*pm_idle)(void);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69void disable_hlt(void)
70{
71 atomic_inc(&hlt_counter);
72}
73
74EXPORT_SYMBOL(disable_hlt);
75
76void enable_hlt(void)
77{
78 atomic_dec(&hlt_counter);
79}
80
81EXPORT_SYMBOL(enable_hlt);
82
83/*
84 * We use this if we don't have any better
85 * idle routine..
86 */
87void default_idle(void)
88{
89 if (!atomic_read(&hlt_counter)) {
90 local_irq_disable();
91 if (!need_resched())
92 safe_halt();
93 else
94 local_irq_enable();
95 }
96}
97
98/*
99 * On SMP it's slightly faster (but much more power-consuming!)
100 * to poll the ->need_resched flag instead of waiting for the
101 * cross-CPU IPI to arrive. Use this option with caution.
102 */
103static void poll_idle (void)
104{
105 int oldval;
106
107 local_irq_enable();
108
109 /*
110 * Deal with another CPU just having chosen a thread to
111 * run here:
112 */
113 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
114
115 if (!oldval) {
116 set_thread_flag(TIF_POLLING_NRFLAG);
117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
Hugh Dickinsb8f68e92005-09-12 18:49:24 +0200125 clear_thread_flag(TIF_POLLING_NRFLAG);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map);
153 }
154 cpus_and(map, map, cpu_online_map);
155 } while (!cpus_empty(map));
156}
157EXPORT_SYMBOL_GPL(cpu_idle_wait);
158
Ashok Raj76e4f662005-06-25 14:55:00 -0700159#ifdef CONFIG_HOTPLUG_CPU
160DECLARE_PER_CPU(int, cpu_state);
161
162#include <asm/nmi.h>
163/* We don't actually take CPU down, just spin without interrupts. */
164static inline void play_dead(void)
165{
166 idle_task_exit();
167 wbinvd();
168 mb();
169 /* Ack it */
170 __get_cpu_var(cpu_state) = CPU_DEAD;
171
172 while (1)
173 safe_halt();
174}
175#else
176static inline void play_dead(void)
177{
178 BUG();
179}
180#endif /* CONFIG_HOTPLUG_CPU */
181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
183 * The idle thread. There's no useful work to be
184 * done, so just try to conserve power and have a
185 * low exit latency (ie sit in a loop waiting for
186 * somebody to say that they'd like to reschedule)
187 */
188void cpu_idle (void)
189{
190 /* endless idle loop with no priority at all */
191 while (1) {
192 while (!need_resched()) {
193 void (*idle)(void);
194
195 if (__get_cpu_var(cpu_idle_state))
196 __get_cpu_var(cpu_idle_state) = 0;
197
198 rmb();
199 idle = pm_idle;
200 if (!idle)
201 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700202 if (cpu_is_offline(smp_processor_id()))
203 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 idle();
205 }
206
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800207 preempt_enable_no_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 schedule();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800209 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 }
211}
212
213/*
214 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
215 * which can obviate IPI to trigger checking of need_resched.
216 * We execute MONITOR against need_resched and enter optimized wait state
217 * through MWAIT. Whenever someone changes need_resched, we would be woken
218 * up from MWAIT (without an IPI).
219 */
220static void mwait_idle(void)
221{
222 local_irq_enable();
223
224 if (!need_resched()) {
225 set_thread_flag(TIF_POLLING_NRFLAG);
226 do {
227 __monitor((void *)&current_thread_info()->flags, 0, 0);
228 if (need_resched())
229 break;
230 __mwait(0, 0);
231 } while (!need_resched());
232 clear_thread_flag(TIF_POLLING_NRFLAG);
233 }
234}
235
Ashok Raje6982c62005-06-25 14:54:58 -0700236void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
238 static int printed;
239 if (cpu_has(c, X86_FEATURE_MWAIT)) {
240 /*
241 * Skip, if setup has overridden idle.
242 * One CPU supports mwait => All CPUs supports mwait
243 */
244 if (!pm_idle) {
245 if (!printed) {
246 printk("using mwait in idle threads.\n");
247 printed = 1;
248 }
249 pm_idle = mwait_idle;
250 }
251 }
252}
253
254static int __init idle_setup (char *str)
255{
256 if (!strncmp(str, "poll", 4)) {
257 printk("using polling idle threads.\n");
258 pm_idle = poll_idle;
259 }
260
261 boot_option_idle_override = 1;
262 return 1;
263}
264
265__setup("idle=", idle_setup);
266
267/* Prints also some state that isn't saved in the pt_regs */
268void __show_regs(struct pt_regs * regs)
269{
270 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
271 unsigned int fsindex,gsindex;
272 unsigned int ds,cs,es;
273
274 printk("\n");
275 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200276 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
277 current->pid, current->comm, print_tainted(),
278 system_utsname.release,
279 (int)strcspn(system_utsname.version, " "),
280 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
282 printk_address(regs->rip);
283 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
284 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
285 regs->rax, regs->rbx, regs->rcx);
286 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
287 regs->rdx, regs->rsi, regs->rdi);
288 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
289 regs->rbp, regs->r8, regs->r9);
290 printk("R10: %016lx R11: %016lx R12: %016lx\n",
291 regs->r10, regs->r11, regs->r12);
292 printk("R13: %016lx R14: %016lx R15: %016lx\n",
293 regs->r13, regs->r14, regs->r15);
294
295 asm("movl %%ds,%0" : "=r" (ds));
296 asm("movl %%cs,%0" : "=r" (cs));
297 asm("movl %%es,%0" : "=r" (es));
298 asm("movl %%fs,%0" : "=r" (fsindex));
299 asm("movl %%gs,%0" : "=r" (gsindex));
300
301 rdmsrl(MSR_FS_BASE, fs);
302 rdmsrl(MSR_GS_BASE, gs);
303 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
304
305 asm("movq %%cr0, %0": "=r" (cr0));
306 asm("movq %%cr2, %0": "=r" (cr2));
307 asm("movq %%cr3, %0": "=r" (cr3));
308 asm("movq %%cr4, %0": "=r" (cr4));
309
310 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
311 fs,fsindex,gs,gsindex,shadowgs);
312 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
313 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
314}
315
316void show_regs(struct pt_regs *regs)
317{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700318 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 __show_regs(regs);
320 show_trace(&regs->rsp);
321}
322
323/*
324 * Free current thread data structures etc..
325 */
326void exit_thread(void)
327{
328 struct task_struct *me = current;
329 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700330
331 /*
332 * Remove function-return probe instances associated with this task
333 * and put them back on the free list. Do not insert an exit probe for
334 * this function, it will be disabled by kprobe_flush_task if you do.
335 */
336 kprobe_flush_task(me);
337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 if (me->thread.io_bitmap_ptr) {
339 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
340
341 kfree(t->io_bitmap_ptr);
342 t->io_bitmap_ptr = NULL;
343 /*
344 * Careful, clear this in the TSS too:
345 */
346 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
347 t->io_bitmap_max = 0;
348 put_cpu();
349 }
350}
351
352void flush_thread(void)
353{
354 struct task_struct *tsk = current;
355 struct thread_info *t = current_thread_info();
356
Rusty Lynch73649da2005-06-23 00:09:23 -0700357 /*
358 * Remove function-return probe instances associated with this task
359 * and put them back on the free list. Do not insert an exit probe for
360 * this function, it will be disabled by kprobe_flush_task if you do.
361 */
362 kprobe_flush_task(tsk);
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 if (t->flags & _TIF_ABI_PENDING)
365 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
366
367 tsk->thread.debugreg0 = 0;
368 tsk->thread.debugreg1 = 0;
369 tsk->thread.debugreg2 = 0;
370 tsk->thread.debugreg3 = 0;
371 tsk->thread.debugreg6 = 0;
372 tsk->thread.debugreg7 = 0;
373 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
374 /*
375 * Forget coprocessor state..
376 */
377 clear_fpu(tsk);
378 clear_used_math();
379}
380
381void release_thread(struct task_struct *dead_task)
382{
383 if (dead_task->mm) {
384 if (dead_task->mm->context.size) {
385 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
386 dead_task->comm,
387 dead_task->mm->context.ldt,
388 dead_task->mm->context.size);
389 BUG();
390 }
391 }
392}
393
394static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
395{
396 struct user_desc ud = {
397 .base_addr = addr,
398 .limit = 0xfffff,
399 .seg_32bit = 1,
400 .limit_in_pages = 1,
401 .useable = 1,
402 };
403 struct n_desc_struct *desc = (void *)t->thread.tls_array;
404 desc += tls;
405 desc->a = LDT_entry_a(&ud);
406 desc->b = LDT_entry_b(&ud);
407}
408
409static inline u32 read_32bit_tls(struct task_struct *t, int tls)
410{
411 struct desc_struct *desc = (void *)t->thread.tls_array;
412 desc += tls;
413 return desc->base0 |
414 (((u32)desc->base1) << 16) |
415 (((u32)desc->base2) << 24);
416}
417
418/*
419 * This gets called before we allocate a new thread and copy
420 * the current task into it.
421 */
422void prepare_to_copy(struct task_struct *tsk)
423{
424 unlazy_fpu(tsk);
425}
426
427int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
428 unsigned long unused,
429 struct task_struct * p, struct pt_regs * regs)
430{
431 int err;
432 struct pt_regs * childregs;
433 struct task_struct *me = current;
434
435 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
436
437 *childregs = *regs;
438
439 childregs->rax = 0;
440 childregs->rsp = rsp;
441 if (rsp == ~0UL) {
442 childregs->rsp = (unsigned long)childregs;
443 }
444
445 p->thread.rsp = (unsigned long) childregs;
446 p->thread.rsp0 = (unsigned long) (childregs+1);
447 p->thread.userrsp = me->thread.userrsp;
448
449 set_ti_thread_flag(p->thread_info, TIF_FORK);
450
451 p->thread.fs = me->thread.fs;
452 p->thread.gs = me->thread.gs;
453
H. J. Lufd51f662005-05-01 08:58:48 -0700454 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
455 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
456 asm("mov %%es,%0" : "=m" (p->thread.es));
457 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
459 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
460 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
461 if (!p->thread.io_bitmap_ptr) {
462 p->thread.io_bitmap_max = 0;
463 return -ENOMEM;
464 }
465 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
466 }
467
468 /*
469 * Set a new TLS for the child thread?
470 */
471 if (clone_flags & CLONE_SETTLS) {
472#ifdef CONFIG_IA32_EMULATION
473 if (test_thread_flag(TIF_IA32))
474 err = ia32_child_tls(p, childregs);
475 else
476#endif
477 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
478 if (err)
479 goto out;
480 }
481 err = 0;
482out:
483 if (err && p->thread.io_bitmap_ptr) {
484 kfree(p->thread.io_bitmap_ptr);
485 p->thread.io_bitmap_max = 0;
486 }
487 return err;
488}
489
490/*
491 * This special macro can be used to load a debugging register
492 */
493#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
494
495/*
496 * switch_to(x,y) should switch tasks from x to y.
497 *
498 * This could still be optimized:
499 * - fold all the options into a flag word and test it with a single test.
500 * - could test fs/gs bitsliced
501 */
502struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
503{
504 struct thread_struct *prev = &prev_p->thread,
505 *next = &next_p->thread;
506 int cpu = smp_processor_id();
507 struct tss_struct *tss = &per_cpu(init_tss, cpu);
508
509 unlazy_fpu(prev_p);
510
511 /*
512 * Reload esp0, LDT and the page table pointer:
513 */
514 tss->rsp0 = next->rsp0;
515
516 /*
517 * Switch DS and ES.
518 * This won't pick up thread selector changes, but I guess that is ok.
519 */
H. J. Lufd51f662005-05-01 08:58:48 -0700520 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 if (unlikely(next->es | prev->es))
522 loadsegment(es, next->es);
523
H. J. Lufd51f662005-05-01 08:58:48 -0700524 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (unlikely(next->ds | prev->ds))
526 loadsegment(ds, next->ds);
527
528 load_TLS(next, cpu);
529
530 /*
531 * Switch FS and GS.
532 */
533 {
534 unsigned fsindex;
535 asm volatile("movl %%fs,%0" : "=r" (fsindex));
536 /* segment register != 0 always requires a reload.
537 also reload when it has changed.
538 when prev process used 64bit base always reload
539 to avoid an information leak. */
540 if (unlikely(fsindex | next->fsindex | prev->fs)) {
541 loadsegment(fs, next->fsindex);
542 /* check if the user used a selector != 0
543 * if yes clear 64bit base, since overloaded base
544 * is always mapped to the Null selector
545 */
546 if (fsindex)
547 prev->fs = 0;
548 }
549 /* when next process has a 64bit base use it */
550 if (next->fs)
551 wrmsrl(MSR_FS_BASE, next->fs);
552 prev->fsindex = fsindex;
553 }
554 {
555 unsigned gsindex;
556 asm volatile("movl %%gs,%0" : "=r" (gsindex));
557 if (unlikely(gsindex | next->gsindex | prev->gs)) {
558 load_gs_index(next->gsindex);
559 if (gsindex)
560 prev->gs = 0;
561 }
562 if (next->gs)
563 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
564 prev->gsindex = gsindex;
565 }
566
567 /*
568 * Switch the PDA context.
569 */
570 prev->userrsp = read_pda(oldrsp);
571 write_pda(oldrsp, next->userrsp);
572 write_pda(pcurrent, next_p);
573 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
574
575 /*
576 * Now maybe reload the debug registers
577 */
578 if (unlikely(next->debugreg7)) {
579 loaddebug(next, 0);
580 loaddebug(next, 1);
581 loaddebug(next, 2);
582 loaddebug(next, 3);
583 /* no 4 and 5 */
584 loaddebug(next, 6);
585 loaddebug(next, 7);
586 }
587
588
589 /*
590 * Handle the IO bitmap
591 */
592 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
593 if (next->io_bitmap_ptr)
594 /*
595 * Copy the relevant range of the IO bitmap.
596 * Normally this is 128 bytes or less:
597 */
598 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
599 max(prev->io_bitmap_max, next->io_bitmap_max));
600 else {
601 /*
602 * Clear any possible leftover bits:
603 */
604 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
605 }
606 }
607
608 return prev_p;
609}
610
611/*
612 * sys_execve() executes a new program.
613 */
614asmlinkage
615long sys_execve(char __user *name, char __user * __user *argv,
616 char __user * __user *envp, struct pt_regs regs)
617{
618 long error;
619 char * filename;
620
621 filename = getname(name);
622 error = PTR_ERR(filename);
623 if (IS_ERR(filename))
624 return error;
625 error = do_execve(filename, argv, envp, &regs);
626 if (error == 0) {
627 task_lock(current);
628 current->ptrace &= ~PT_DTRACE;
629 task_unlock(current);
630 }
631 putname(filename);
632 return error;
633}
634
635void set_personality_64bit(void)
636{
637 /* inherit personality from parent */
638
639 /* Make sure to be in 64bit mode */
640 clear_thread_flag(TIF_IA32);
641
642 /* TBD: overwrites user setup. Should have two bits.
643 But 64bit processes have always behaved this way,
644 so it's not too bad. The main problem is just that
645 32bit childs are affected again. */
646 current->personality &= ~READ_IMPLIES_EXEC;
647}
648
649asmlinkage long sys_fork(struct pt_regs *regs)
650{
651 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
652}
653
654asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
655{
656 if (!newsp)
657 newsp = regs->rsp;
658 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
659}
660
661/*
662 * This is trivial, and on the face of it looks like it
663 * could equally well be done in user mode.
664 *
665 * Not so, for quite unobvious reasons - register pressure.
666 * In user mode vfork() cannot have a stack frame, and if
667 * done by calling the "clone()" system call directly, you
668 * do not have enough call-clobbered registers to hold all
669 * the information you need.
670 */
671asmlinkage long sys_vfork(struct pt_regs *regs)
672{
673 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
674 NULL, NULL);
675}
676
677unsigned long get_wchan(struct task_struct *p)
678{
679 unsigned long stack;
680 u64 fp,rip;
681 int count = 0;
682
683 if (!p || p == current || p->state==TASK_RUNNING)
684 return 0;
685 stack = (unsigned long)p->thread_info;
686 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
687 return 0;
688 fp = *(u64 *)(p->thread.rsp);
689 do {
690 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
691 return 0;
692 rip = *(u64 *)(fp+8);
693 if (!in_sched_functions(rip))
694 return rip;
695 fp = *(u64 *)fp;
696 } while (count++ < 16);
697 return 0;
698}
699
700long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
701{
702 int ret = 0;
703 int doit = task == current;
704 int cpu;
705
706 switch (code) {
707 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700708 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 return -EPERM;
710 cpu = get_cpu();
711 /* handle small bases via the GDT because that's faster to
712 switch. */
713 if (addr <= 0xffffffff) {
714 set_32bit_tls(task, GS_TLS, addr);
715 if (doit) {
716 load_TLS(&task->thread, cpu);
717 load_gs_index(GS_TLS_SEL);
718 }
719 task->thread.gsindex = GS_TLS_SEL;
720 task->thread.gs = 0;
721 } else {
722 task->thread.gsindex = 0;
723 task->thread.gs = addr;
724 if (doit) {
725 load_gs_index(0);
726 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
727 }
728 }
729 put_cpu();
730 break;
731 case ARCH_SET_FS:
732 /* Not strictly needed for fs, but do it for symmetry
733 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700734 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 return -EPERM;
736 cpu = get_cpu();
737 /* handle small bases via the GDT because that's faster to
738 switch. */
739 if (addr <= 0xffffffff) {
740 set_32bit_tls(task, FS_TLS, addr);
741 if (doit) {
742 load_TLS(&task->thread, cpu);
743 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
744 }
745 task->thread.fsindex = FS_TLS_SEL;
746 task->thread.fs = 0;
747 } else {
748 task->thread.fsindex = 0;
749 task->thread.fs = addr;
750 if (doit) {
751 /* set the selector to 0 to not confuse
752 __switch_to */
753 asm volatile("movl %0,%%fs" :: "r" (0));
754 ret = checking_wrmsrl(MSR_FS_BASE, addr);
755 }
756 }
757 put_cpu();
758 break;
759 case ARCH_GET_FS: {
760 unsigned long base;
761 if (task->thread.fsindex == FS_TLS_SEL)
762 base = read_32bit_tls(task, FS_TLS);
763 else if (doit) {
764 rdmsrl(MSR_FS_BASE, base);
765 } else
766 base = task->thread.fs;
767 ret = put_user(base, (unsigned long __user *)addr);
768 break;
769 }
770 case ARCH_GET_GS: {
771 unsigned long base;
772 if (task->thread.gsindex == GS_TLS_SEL)
773 base = read_32bit_tls(task, GS_TLS);
774 else if (doit) {
775 rdmsrl(MSR_KERNEL_GS_BASE, base);
776 } else
777 base = task->thread.gs;
778 ret = put_user(base, (unsigned long __user *)addr);
779 break;
780 }
781
782 default:
783 ret = -EINVAL;
784 break;
785 }
786
787 return ret;
788}
789
790long sys_arch_prctl(int code, unsigned long addr)
791{
792 return do_arch_prctl(current, code, addr);
793}
794
795/*
796 * Capture the user space registers if the task is not running (in user space)
797 */
798int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
799{
800 struct pt_regs *pp, ptregs;
801
802 pp = (struct pt_regs *)(tsk->thread.rsp0);
803 --pp;
804
805 ptregs = *pp;
806 ptregs.cs &= 0xffff;
807 ptregs.ss &= 0xffff;
808
809 elf_core_copy_regs(regs, &ptregs);
810
811 return 1;
812}
813
814unsigned long arch_align_stack(unsigned long sp)
815{
816 if (randomize_va_space)
817 sp -= get_random_int() % 8192;
818 return sp & ~0xf;
819}