blob: d6fa41459c8095b7126d5f170352d68f657a422c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 */
14
15/*
16 * This file handles the architecture-dependent parts of process handling..
17 */
18
19#include <stdarg.h>
20
Ashok Raj76e4f662005-06-25 14:55:00 -070021#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/module.h>
31#include <linux/a.out.h>
32#include <linux/interrupt.h>
33#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/ptrace.h>
35#include <linux/utsname.h>
36#include <linux/random.h>
Andi Kleen95833c82006-01-11 22:44:36 +010037#include <linux/notifier.h>
bibo maoc6fd91f2006-03-26 01:38:20 -080038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
Andi Kleen95833c82006-01-11 22:44:36 +010053#include <asm/idle.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
Linus Torvalds1da177e2005-04-16 15:20:36 -070059unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
66static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
67
Alan Sterne041c682006-03-27 01:16:30 -080068static ATOMIC_NOTIFIER_HEAD(idle_notifier);
Andi Kleen95833c82006-01-11 22:44:36 +010069
70void idle_notifier_register(struct notifier_block *n)
71{
Alan Sterne041c682006-03-27 01:16:30 -080072 atomic_notifier_chain_register(&idle_notifier, n);
Andi Kleen95833c82006-01-11 22:44:36 +010073}
74EXPORT_SYMBOL_GPL(idle_notifier_register);
75
76void idle_notifier_unregister(struct notifier_block *n)
77{
Alan Sterne041c682006-03-27 01:16:30 -080078 atomic_notifier_chain_unregister(&idle_notifier, n);
Andi Kleen95833c82006-01-11 22:44:36 +010079}
80EXPORT_SYMBOL(idle_notifier_unregister);
81
82enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
83static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
84
85void enter_idle(void)
86{
87 __get_cpu_var(idle_state) = CPU_IDLE;
Alan Sterne041c682006-03-27 01:16:30 -080088 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
Andi Kleen95833c82006-01-11 22:44:36 +010089}
90
91static void __exit_idle(void)
92{
93 __get_cpu_var(idle_state) = CPU_NOT_IDLE;
Alan Sterne041c682006-03-27 01:16:30 -080094 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
Andi Kleen95833c82006-01-11 22:44:36 +010095}
96
97/* Called from interrupts to signify idle end */
98void exit_idle(void)
99{
100 if (current->pid | read_pda(irqcount))
101 return;
102 __exit_idle();
103}
104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105/*
106 * We use this if we don't have any better
107 * idle routine..
108 */
Adrian Bunkcdb04522006-03-24 03:15:57 -0800109static void default_idle(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110{
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800111 local_irq_enable();
112
Andi Kleen2d52ede2006-01-11 22:42:42 +0100113 clear_thread_flag(TIF_POLLING_NRFLAG);
114 smp_mb__after_clear_bit();
115 while (!need_resched()) {
116 local_irq_disable();
117 if (!need_resched())
118 safe_halt();
119 else
120 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121 }
Andi Kleen2d52ede2006-01-11 22:42:42 +0100122 set_thread_flag(TIF_POLLING_NRFLAG);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123}
124
125/*
126 * On SMP it's slightly faster (but much more power-consuming!)
127 * to poll the ->need_resched flag instead of waiting for the
128 * cross-CPU IPI to arrive. Use this option with caution.
129 */
130static void poll_idle (void)
131{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 local_irq_enable();
133
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800134 asm volatile(
135 "2:"
136 "testl %0,%1;"
137 "rep; nop;"
138 "je 2b;"
139 : :
140 "i" (_TIF_NEED_RESCHED),
141 "m" (current_thread_info()->flags));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142}
143
144void cpu_idle_wait(void)
145{
146 unsigned int cpu, this_cpu = get_cpu();
147 cpumask_t map;
148
149 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
150 put_cpu();
151
152 cpus_clear(map);
153 for_each_online_cpu(cpu) {
154 per_cpu(cpu_idle_state, cpu) = 1;
155 cpu_set(cpu, map);
156 }
157
158 __get_cpu_var(cpu_idle_state) = 0;
159
160 wmb();
161 do {
162 ssleep(1);
163 for_each_online_cpu(cpu) {
Andi Kleena88cde12005-11-05 17:25:54 +0100164 if (cpu_isset(cpu, map) &&
165 !per_cpu(cpu_idle_state, cpu))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 cpu_clear(cpu, map);
167 }
168 cpus_and(map, map, cpu_online_map);
169 } while (!cpus_empty(map));
170}
171EXPORT_SYMBOL_GPL(cpu_idle_wait);
172
Ashok Raj76e4f662005-06-25 14:55:00 -0700173#ifdef CONFIG_HOTPLUG_CPU
174DECLARE_PER_CPU(int, cpu_state);
175
176#include <asm/nmi.h>
Shaohua Li1fa744e2006-01-06 00:12:20 -0800177/* We halt the CPU with physical CPU hotplug */
Ashok Raj76e4f662005-06-25 14:55:00 -0700178static inline void play_dead(void)
179{
180 idle_task_exit();
181 wbinvd();
182 mb();
183 /* Ack it */
184 __get_cpu_var(cpu_state) = CPU_DEAD;
185
Shaohua Li1fa744e2006-01-06 00:12:20 -0800186 local_irq_disable();
Ashok Raj76e4f662005-06-25 14:55:00 -0700187 while (1)
Shaohua Li1fa744e2006-01-06 00:12:20 -0800188 halt();
Ashok Raj76e4f662005-06-25 14:55:00 -0700189}
190#else
191static inline void play_dead(void)
192{
193 BUG();
194}
195#endif /* CONFIG_HOTPLUG_CPU */
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197/*
198 * The idle thread. There's no useful work to be
199 * done, so just try to conserve power and have a
200 * low exit latency (ie sit in a loop waiting for
201 * somebody to say that they'd like to reschedule)
202 */
203void cpu_idle (void)
204{
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800205 set_thread_flag(TIF_POLLING_NRFLAG);
206
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 /* endless idle loop with no priority at all */
208 while (1) {
209 while (!need_resched()) {
210 void (*idle)(void);
211
212 if (__get_cpu_var(cpu_idle_state))
213 __get_cpu_var(cpu_idle_state) = 0;
214
215 rmb();
216 idle = pm_idle;
217 if (!idle)
218 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700219 if (cpu_is_offline(smp_processor_id()))
220 play_dead();
Andi Kleen95833c82006-01-11 22:44:36 +0100221 enter_idle();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 idle();
Andi Kleen95833c82006-01-11 22:44:36 +0100223 __exit_idle();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 }
225
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800226 preempt_enable_no_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 schedule();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800228 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 }
230}
231
232/*
233 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
234 * which can obviate IPI to trigger checking of need_resched.
235 * We execute MONITOR against need_resched and enter optimized wait state
236 * through MWAIT. Whenever someone changes need_resched, we would be woken
237 * up from MWAIT (without an IPI).
238 */
239static void mwait_idle(void)
240{
241 local_irq_enable();
242
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800243 while (!need_resched()) {
244 __monitor((void *)&current_thread_info()->flags, 0, 0);
245 smp_mb();
246 if (need_resched())
247 break;
248 __mwait(0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 }
250}
251
Ashok Raje6982c62005-06-25 14:54:58 -0700252void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
254 static int printed;
255 if (cpu_has(c, X86_FEATURE_MWAIT)) {
256 /*
257 * Skip, if setup has overridden idle.
258 * One CPU supports mwait => All CPUs supports mwait
259 */
260 if (!pm_idle) {
261 if (!printed) {
262 printk("using mwait in idle threads.\n");
263 printed = 1;
264 }
265 pm_idle = mwait_idle;
266 }
267 }
268}
269
270static int __init idle_setup (char *str)
271{
272 if (!strncmp(str, "poll", 4)) {
273 printk("using polling idle threads.\n");
274 pm_idle = poll_idle;
275 }
276
277 boot_option_idle_override = 1;
278 return 1;
279}
280
281__setup("idle=", idle_setup);
282
283/* Prints also some state that isn't saved in the pt_regs */
284void __show_regs(struct pt_regs * regs)
285{
286 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
287 unsigned int fsindex,gsindex;
288 unsigned int ds,cs,es;
289
290 printk("\n");
291 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200292 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
293 current->pid, current->comm, print_tainted(),
294 system_utsname.release,
295 (int)strcspn(system_utsname.version, " "),
296 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298 printk_address(regs->rip);
Andi Kleena88cde12005-11-05 17:25:54 +0100299 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
300 regs->eflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302 regs->rax, regs->rbx, regs->rcx);
303 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
304 regs->rdx, regs->rsi, regs->rdi);
305 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
306 regs->rbp, regs->r8, regs->r9);
307 printk("R10: %016lx R11: %016lx R12: %016lx\n",
308 regs->r10, regs->r11, regs->r12);
309 printk("R13: %016lx R14: %016lx R15: %016lx\n",
310 regs->r13, regs->r14, regs->r15);
311
312 asm("movl %%ds,%0" : "=r" (ds));
313 asm("movl %%cs,%0" : "=r" (cs));
314 asm("movl %%es,%0" : "=r" (es));
315 asm("movl %%fs,%0" : "=r" (fsindex));
316 asm("movl %%gs,%0" : "=r" (gsindex));
317
318 rdmsrl(MSR_FS_BASE, fs);
319 rdmsrl(MSR_GS_BASE, gs);
320 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
321
322 asm("movq %%cr0, %0": "=r" (cr0));
323 asm("movq %%cr2, %0": "=r" (cr2));
324 asm("movq %%cr3, %0": "=r" (cr3));
325 asm("movq %%cr4, %0": "=r" (cr4));
326
327 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
328 fs,fsindex,gs,gsindex,shadowgs);
329 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
330 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
331}
332
333void show_regs(struct pt_regs *regs)
334{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700335 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 __show_regs(regs);
Jan Beulichb538ed22006-06-26 13:57:32 +0200337 show_trace(NULL, regs, (void *)(regs + 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338}
339
340/*
341 * Free current thread data structures etc..
342 */
343void exit_thread(void)
344{
345 struct task_struct *me = current;
346 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700347
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 if (me->thread.io_bitmap_ptr) {
349 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
350
351 kfree(t->io_bitmap_ptr);
352 t->io_bitmap_ptr = NULL;
353 /*
354 * Careful, clear this in the TSS too:
355 */
356 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
357 t->io_bitmap_max = 0;
358 put_cpu();
359 }
360}
361
362void flush_thread(void)
363{
364 struct task_struct *tsk = current;
365 struct thread_info *t = current_thread_info();
366
Andi Kleen4d9bc792006-06-26 13:57:19 +0200367 if (t->flags & _TIF_ABI_PENDING) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
Andi Kleen4d9bc792006-06-26 13:57:19 +0200369 if (t->flags & _TIF_IA32)
370 current_thread_info()->status |= TS_COMPAT;
371 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372
373 tsk->thread.debugreg0 = 0;
374 tsk->thread.debugreg1 = 0;
375 tsk->thread.debugreg2 = 0;
376 tsk->thread.debugreg3 = 0;
377 tsk->thread.debugreg6 = 0;
378 tsk->thread.debugreg7 = 0;
379 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
380 /*
381 * Forget coprocessor state..
382 */
383 clear_fpu(tsk);
384 clear_used_math();
385}
386
387void release_thread(struct task_struct *dead_task)
388{
389 if (dead_task->mm) {
390 if (dead_task->mm->context.size) {
391 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
392 dead_task->comm,
393 dead_task->mm->context.ldt,
394 dead_task->mm->context.size);
395 BUG();
396 }
397 }
398}
399
400static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
401{
402 struct user_desc ud = {
403 .base_addr = addr,
404 .limit = 0xfffff,
405 .seg_32bit = 1,
406 .limit_in_pages = 1,
407 .useable = 1,
408 };
409 struct n_desc_struct *desc = (void *)t->thread.tls_array;
410 desc += tls;
411 desc->a = LDT_entry_a(&ud);
412 desc->b = LDT_entry_b(&ud);
413}
414
415static inline u32 read_32bit_tls(struct task_struct *t, int tls)
416{
417 struct desc_struct *desc = (void *)t->thread.tls_array;
418 desc += tls;
419 return desc->base0 |
420 (((u32)desc->base1) << 16) |
421 (((u32)desc->base2) << 24);
422}
423
424/*
425 * This gets called before we allocate a new thread and copy
426 * the current task into it.
427 */
428void prepare_to_copy(struct task_struct *tsk)
429{
430 unlazy_fpu(tsk);
431}
432
433int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
434 unsigned long unused,
435 struct task_struct * p, struct pt_regs * regs)
436{
437 int err;
438 struct pt_regs * childregs;
439 struct task_struct *me = current;
440
Andi Kleena88cde12005-11-05 17:25:54 +0100441 childregs = ((struct pt_regs *)
Al Viro57eafdc2006-01-12 01:05:39 -0800442 (THREAD_SIZE + task_stack_page(p))) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 *childregs = *regs;
444
445 childregs->rax = 0;
446 childregs->rsp = rsp;
Andi Kleena88cde12005-11-05 17:25:54 +0100447 if (rsp == ~0UL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 childregs->rsp = (unsigned long)childregs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
450 p->thread.rsp = (unsigned long) childregs;
451 p->thread.rsp0 = (unsigned long) (childregs+1);
452 p->thread.userrsp = me->thread.userrsp;
453
Al Viroe4f17c42006-01-12 01:05:38 -0800454 set_tsk_thread_flag(p, TIF_FORK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
456 p->thread.fs = me->thread.fs;
457 p->thread.gs = me->thread.gs;
458
H. J. Lufd51f662005-05-01 08:58:48 -0700459 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
460 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
461 asm("mov %%es,%0" : "=m" (p->thread.es));
462 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
464 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
465 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
466 if (!p->thread.io_bitmap_ptr) {
467 p->thread.io_bitmap_max = 0;
468 return -ENOMEM;
469 }
Andi Kleena88cde12005-11-05 17:25:54 +0100470 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
471 IO_BITMAP_BYTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 }
473
474 /*
475 * Set a new TLS for the child thread?
476 */
477 if (clone_flags & CLONE_SETTLS) {
478#ifdef CONFIG_IA32_EMULATION
479 if (test_thread_flag(TIF_IA32))
480 err = ia32_child_tls(p, childregs);
481 else
482#endif
483 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
484 if (err)
485 goto out;
486 }
487 err = 0;
488out:
489 if (err && p->thread.io_bitmap_ptr) {
490 kfree(p->thread.io_bitmap_ptr);
491 p->thread.io_bitmap_max = 0;
492 }
493 return err;
494}
495
496/*
497 * This special macro can be used to load a debugging register
498 */
Jan Beulich2b514e72006-03-25 16:29:22 +0100499#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
501/*
502 * switch_to(x,y) should switch tasks from x to y.
503 *
504 * This could still be optimized:
505 * - fold all the options into a flag word and test it with a single test.
506 * - could test fs/gs bitsliced
Andi Kleen099f3182006-02-03 21:51:38 +0100507 *
508 * Kprobes not supported here. Set the probe on schedule instead.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 */
Andi Kleen099f3182006-02-03 21:51:38 +0100510__kprobes struct task_struct *
Andi Kleena88cde12005-11-05 17:25:54 +0100511__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512{
513 struct thread_struct *prev = &prev_p->thread,
514 *next = &next_p->thread;
515 int cpu = smp_processor_id();
516 struct tss_struct *tss = &per_cpu(init_tss, cpu);
517
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 /*
519 * Reload esp0, LDT and the page table pointer:
520 */
521 tss->rsp0 = next->rsp0;
522
523 /*
524 * Switch DS and ES.
525 * This won't pick up thread selector changes, but I guess that is ok.
526 */
H. J. Lufd51f662005-05-01 08:58:48 -0700527 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 if (unlikely(next->es | prev->es))
529 loadsegment(es, next->es);
530
H. J. Lufd51f662005-05-01 08:58:48 -0700531 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 if (unlikely(next->ds | prev->ds))
533 loadsegment(ds, next->ds);
534
535 load_TLS(next, cpu);
536
537 /*
538 * Switch FS and GS.
539 */
540 {
541 unsigned fsindex;
542 asm volatile("movl %%fs,%0" : "=r" (fsindex));
543 /* segment register != 0 always requires a reload.
544 also reload when it has changed.
545 when prev process used 64bit base always reload
546 to avoid an information leak. */
547 if (unlikely(fsindex | next->fsindex | prev->fs)) {
548 loadsegment(fs, next->fsindex);
549 /* check if the user used a selector != 0
550 * if yes clear 64bit base, since overloaded base
551 * is always mapped to the Null selector
552 */
553 if (fsindex)
554 prev->fs = 0;
555 }
556 /* when next process has a 64bit base use it */
557 if (next->fs)
558 wrmsrl(MSR_FS_BASE, next->fs);
559 prev->fsindex = fsindex;
560 }
561 {
562 unsigned gsindex;
563 asm volatile("movl %%gs,%0" : "=r" (gsindex));
564 if (unlikely(gsindex | next->gsindex | prev->gs)) {
565 load_gs_index(next->gsindex);
566 if (gsindex)
567 prev->gs = 0;
568 }
569 if (next->gs)
570 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
571 prev->gsindex = gsindex;
572 }
573
574 /*
Jan Beulich45948d72006-03-25 16:29:25 +0100575 * Switch the PDA and FPU contexts.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 */
577 prev->userrsp = read_pda(oldrsp);
578 write_pda(oldrsp, next->userrsp);
579 write_pda(pcurrent, next_p);
Andi Kleen18bd0572006-04-20 02:36:45 +0200580
Jan Beulich45948d72006-03-25 16:29:25 +0100581 /* This must be here to ensure both math_state_restore() and
Andi Kleen18bd0572006-04-20 02:36:45 +0200582 kernel_fpu_begin() work consistently.
583 And the AMD workaround requires it to be after DS reload. */
Jan Beulich45948d72006-03-25 16:29:25 +0100584 unlazy_fpu(prev_p);
Andi Kleena88cde12005-11-05 17:25:54 +0100585 write_pda(kernelstack,
Al Viro57eafdc2006-01-12 01:05:39 -0800586 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587
588 /*
589 * Now maybe reload the debug registers
590 */
591 if (unlikely(next->debugreg7)) {
592 loaddebug(next, 0);
593 loaddebug(next, 1);
594 loaddebug(next, 2);
595 loaddebug(next, 3);
596 /* no 4 and 5 */
597 loaddebug(next, 6);
598 loaddebug(next, 7);
599 }
600
601
602 /*
603 * Handle the IO bitmap
604 */
605 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
606 if (next->io_bitmap_ptr)
607 /*
608 * Copy the relevant range of the IO bitmap.
609 * Normally this is 128 bytes or less:
610 */
611 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
612 max(prev->io_bitmap_max, next->io_bitmap_max));
613 else {
614 /*
615 * Clear any possible leftover bits:
616 */
617 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
618 }
619 }
620
621 return prev_p;
622}
623
624/*
625 * sys_execve() executes a new program.
626 */
627asmlinkage
628long sys_execve(char __user *name, char __user * __user *argv,
629 char __user * __user *envp, struct pt_regs regs)
630{
631 long error;
632 char * filename;
633
634 filename = getname(name);
635 error = PTR_ERR(filename);
636 if (IS_ERR(filename))
637 return error;
638 error = do_execve(filename, argv, envp, &regs);
639 if (error == 0) {
640 task_lock(current);
641 current->ptrace &= ~PT_DTRACE;
642 task_unlock(current);
643 }
644 putname(filename);
645 return error;
646}
647
648void set_personality_64bit(void)
649{
650 /* inherit personality from parent */
651
652 /* Make sure to be in 64bit mode */
653 clear_thread_flag(TIF_IA32);
654
655 /* TBD: overwrites user setup. Should have two bits.
656 But 64bit processes have always behaved this way,
657 so it's not too bad. The main problem is just that
658 32bit childs are affected again. */
659 current->personality &= ~READ_IMPLIES_EXEC;
660}
661
662asmlinkage long sys_fork(struct pt_regs *regs)
663{
664 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
665}
666
Andi Kleena88cde12005-11-05 17:25:54 +0100667asmlinkage long
668sys_clone(unsigned long clone_flags, unsigned long newsp,
669 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 if (!newsp)
672 newsp = regs->rsp;
673 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
674}
675
676/*
677 * This is trivial, and on the face of it looks like it
678 * could equally well be done in user mode.
679 *
680 * Not so, for quite unobvious reasons - register pressure.
681 * In user mode vfork() cannot have a stack frame, and if
682 * done by calling the "clone()" system call directly, you
683 * do not have enough call-clobbered registers to hold all
684 * the information you need.
685 */
686asmlinkage long sys_vfork(struct pt_regs *regs)
687{
688 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
689 NULL, NULL);
690}
691
692unsigned long get_wchan(struct task_struct *p)
693{
694 unsigned long stack;
695 u64 fp,rip;
696 int count = 0;
697
698 if (!p || p == current || p->state==TASK_RUNNING)
699 return 0;
Al Viro57eafdc2006-01-12 01:05:39 -0800700 stack = (unsigned long)task_stack_page(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
702 return 0;
703 fp = *(u64 *)(p->thread.rsp);
704 do {
Andi Kleena88cde12005-11-05 17:25:54 +0100705 if (fp < (unsigned long)stack ||
706 fp > (unsigned long)stack+THREAD_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 return 0;
708 rip = *(u64 *)(fp+8);
709 if (!in_sched_functions(rip))
710 return rip;
711 fp = *(u64 *)fp;
712 } while (count++ < 16);
713 return 0;
714}
715
716long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
717{
718 int ret = 0;
719 int doit = task == current;
720 int cpu;
721
722 switch (code) {
723 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700724 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 return -EPERM;
726 cpu = get_cpu();
727 /* handle small bases via the GDT because that's faster to
728 switch. */
729 if (addr <= 0xffffffff) {
730 set_32bit_tls(task, GS_TLS, addr);
731 if (doit) {
732 load_TLS(&task->thread, cpu);
733 load_gs_index(GS_TLS_SEL);
734 }
735 task->thread.gsindex = GS_TLS_SEL;
736 task->thread.gs = 0;
737 } else {
738 task->thread.gsindex = 0;
739 task->thread.gs = addr;
740 if (doit) {
Andi Kleena88cde12005-11-05 17:25:54 +0100741 load_gs_index(0);
742 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 }
744 }
745 put_cpu();
746 break;
747 case ARCH_SET_FS:
748 /* Not strictly needed for fs, but do it for symmetry
749 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700750 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 return -EPERM;
752 cpu = get_cpu();
753 /* handle small bases via the GDT because that's faster to
754 switch. */
755 if (addr <= 0xffffffff) {
756 set_32bit_tls(task, FS_TLS, addr);
757 if (doit) {
758 load_TLS(&task->thread, cpu);
Andi Kleena88cde12005-11-05 17:25:54 +0100759 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 }
761 task->thread.fsindex = FS_TLS_SEL;
762 task->thread.fs = 0;
763 } else {
764 task->thread.fsindex = 0;
765 task->thread.fs = addr;
766 if (doit) {
767 /* set the selector to 0 to not confuse
768 __switch_to */
Andi Kleena88cde12005-11-05 17:25:54 +0100769 asm volatile("movl %0,%%fs" :: "r" (0));
770 ret = checking_wrmsrl(MSR_FS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 }
772 }
773 put_cpu();
774 break;
775 case ARCH_GET_FS: {
776 unsigned long base;
777 if (task->thread.fsindex == FS_TLS_SEL)
778 base = read_32bit_tls(task, FS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100779 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 rdmsrl(MSR_FS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100781 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 base = task->thread.fs;
783 ret = put_user(base, (unsigned long __user *)addr);
784 break;
785 }
786 case ARCH_GET_GS: {
787 unsigned long base;
John Blackwood97c28032006-04-07 19:50:25 +0200788 unsigned gsindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 if (task->thread.gsindex == GS_TLS_SEL)
790 base = read_32bit_tls(task, GS_TLS);
John Blackwood97c28032006-04-07 19:50:25 +0200791 else if (doit) {
792 asm("movl %%gs,%0" : "=r" (gsindex));
793 if (gsindex)
794 rdmsrl(MSR_KERNEL_GS_BASE, base);
795 else
796 base = task->thread.gs;
797 }
Andi Kleena88cde12005-11-05 17:25:54 +0100798 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 base = task->thread.gs;
800 ret = put_user(base, (unsigned long __user *)addr);
801 break;
802 }
803
804 default:
805 ret = -EINVAL;
806 break;
807 }
808
809 return ret;
810}
811
812long sys_arch_prctl(int code, unsigned long addr)
813{
814 return do_arch_prctl(current, code, addr);
815}
816
817/*
818 * Capture the user space registers if the task is not running (in user space)
819 */
820int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
821{
822 struct pt_regs *pp, ptregs;
823
Al Virobb049232006-01-12 01:05:38 -0800824 pp = task_pt_regs(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826 ptregs = *pp;
827 ptregs.cs &= 0xffff;
828 ptregs.ss &= 0xffff;
829
830 elf_core_copy_regs(regs, &ptregs);
831
832 return 1;
833}
834
835unsigned long arch_align_stack(unsigned long sp)
836{
837 if (randomize_va_space)
838 sp -= get_random_int() % 8192;
839 return sp & ~0xf;
840}