blob: 537893a16014cbbdd6d72259e795030625e91a77 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
34#include <linux/smp_lock.h>
35#include <linux/compiler.h>
36#include <linux/highmem.h>
37#include <linux/pagemap.h>
38#include <linux/security.h>
39#include <linux/syscalls.h>
40#include <linux/random.h>
41
42#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
46#include <linux/elf.h>
47
48static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
49static int load_elf_library(struct file*);
50static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
51extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
52
53#ifndef elf_addr_t
54#define elf_addr_t unsigned long
55#endif
56
57/*
58 * If we don't support core dumping, then supply a NULL so we
59 * don't even try.
60 */
Matt Mackall708e9a72006-01-08 01:05:25 -080061#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -070062static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
63#else
64#define elf_core_dump NULL
65#endif
66
67#if ELF_EXEC_PAGESIZE > PAGE_SIZE
68# define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
69#else
70# define ELF_MIN_ALIGN PAGE_SIZE
71#endif
72
73#ifndef ELF_CORE_EFLAGS
74#define ELF_CORE_EFLAGS 0
75#endif
76
77#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
78#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
79#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
80
81static struct linux_binfmt elf_format = {
82 .module = THIS_MODULE,
83 .load_binary = load_elf_binary,
84 .load_shlib = load_elf_library,
85 .core_dump = elf_core_dump,
86 .min_coredump = ELF_EXEC_PAGESIZE
87};
88
89#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
90
91static int set_brk(unsigned long start, unsigned long end)
92{
93 start = ELF_PAGEALIGN(start);
94 end = ELF_PAGEALIGN(end);
95 if (end > start) {
96 unsigned long addr;
97 down_write(&current->mm->mmap_sem);
98 addr = do_brk(start, end - start);
99 up_write(&current->mm->mmap_sem);
100 if (BAD_ADDR(addr))
101 return addr;
102 }
103 current->mm->start_brk = current->mm->brk = end;
104 return 0;
105}
106
107
108/* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
111 be in memory */
112
113
114static int padzero(unsigned long elf_bss)
115{
116 unsigned long nbyte;
117
118 nbyte = ELF_PAGEOFFSET(elf_bss);
119 if (nbyte) {
120 nbyte = ELF_MIN_ALIGN - nbyte;
121 if (clear_user((void __user *) elf_bss, nbyte))
122 return -EFAULT;
123 }
124 return 0;
125}
126
127/* Let's use some macros to make this stack manipulation a litle clearer */
128#ifdef CONFIG_STACK_GROWSUP
129#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
130#define STACK_ROUND(sp, items) \
131 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
132#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
133#else
134#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135#define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138#endif
139
140static int
141create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
142 int interp_aout, unsigned long load_addr,
143 unsigned long interp_load_addr)
144{
145 unsigned long p = bprm->p;
146 int argc = bprm->argc;
147 int envc = bprm->envc;
148 elf_addr_t __user *argv;
149 elf_addr_t __user *envp;
150 elf_addr_t __user *sp;
151 elf_addr_t __user *u_platform;
152 const char *k_platform = ELF_PLATFORM;
153 int items;
154 elf_addr_t *elf_info;
155 int ei_index = 0;
156 struct task_struct *tsk = current;
157
158 /*
159 * If this architecture has a platform capability string, copy it
160 * to userspace. In some cases (Sparc), this info is impossible
161 * for userspace to get any other way, in others (i386) it is
162 * merely difficult.
163 */
164
165 u_platform = NULL;
166 if (k_platform) {
167 size_t len = strlen(k_platform) + 1;
168
169 /*
170 * In some cases (e.g. Hyper-Threading), we want to avoid L1
171 * evictions by the processes running on the same package. One
172 * thing we can do is to shuffle the initial stack for them.
173 */
174
175 p = arch_align_stack(p);
176
177 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
178 if (__copy_to_user(u_platform, k_platform, len))
179 return -EFAULT;
180 }
181
182 /* Create the ELF interpreter info */
183 elf_info = (elf_addr_t *) current->mm->saved_auxv;
184#define NEW_AUX_ENT(id, val) \
185 do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
186
187#ifdef ARCH_DLINFO
188 /*
189 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 * AUXV.
191 */
192 ARCH_DLINFO;
193#endif
194 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
199 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 NEW_AUX_ENT(AT_FLAGS, 0);
202 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
204 NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
205 NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
206 NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
207 NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
208 if (k_platform) {
209 NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
210 }
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
213 }
214#undef NEW_AUX_ENT
215 /* AT_NULL is zero; clear the rest too */
216 memset(&elf_info[ei_index], 0,
217 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
218
219 /* And advance past the AT_NULL entry. */
220 ei_index += 2;
221
222 sp = STACK_ADD(p, ei_index);
223
224 items = (argc + 1) + (envc + 1);
225 if (interp_aout) {
226 items += 3; /* a.out interpreters require argv & envp too */
227 } else {
228 items += 1; /* ELF interpreters only put argc on the stack */
229 }
230 bprm->p = STACK_ROUND(sp, items);
231
232 /* Point sp at the lowest address on the stack */
233#ifdef CONFIG_STACK_GROWSUP
234 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
235 bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
236#else
237 sp = (elf_addr_t __user *)bprm->p;
238#endif
239
240 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
241 if (__put_user(argc, sp++))
242 return -EFAULT;
243 if (interp_aout) {
244 argv = sp + 2;
245 envp = argv + argc + 1;
246 __put_user((elf_addr_t)(unsigned long)argv, sp++);
247 __put_user((elf_addr_t)(unsigned long)envp, sp++);
248 } else {
249 argv = sp;
250 envp = argv + argc + 1;
251 }
252
253 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700254 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 while (argc-- > 0) {
256 size_t len;
257 __put_user((elf_addr_t)p, argv++);
258 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
259 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
260 return 0;
261 p += len;
262 }
263 if (__put_user(0, argv))
264 return -EFAULT;
265 current->mm->arg_end = current->mm->env_start = p;
266 while (envc-- > 0) {
267 size_t len;
268 __put_user((elf_addr_t)p, envp++);
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
273 }
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
277
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
283}
284
285#ifndef elf_map
286
287static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type)
289{
290 unsigned long map_addr;
David Gibsondda6ebd2006-01-08 01:03:35 -0800291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
293 down_write(&current->mm->mmap_sem);
David Gibsondda6ebd2006-01-08 01:03:35 -0800294 /* mmap() will return -EINVAL if given a zero size, but a
295 * segment with zero filesize is perfectly valid */
296 if (eppnt->p_filesz + pageoffset)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 eppnt->p_filesz + pageoffset, prot, type,
299 eppnt->p_offset - pageoffset);
300 else
301 map_addr = ELF_PAGESTART(addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 up_write(&current->mm->mmap_sem);
303 return(map_addr);
304}
305
306#endif /* !elf_map */
307
308/* This is much more generalized than the library routine read function,
309 so we keep this separate. Technically the library read function
310 is only provided so that we can read a.out libraries that have
311 an ELF header */
312
313static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
314 struct file * interpreter,
315 unsigned long *interp_load_addr)
316{
317 struct elf_phdr *elf_phdata;
318 struct elf_phdr *eppnt;
319 unsigned long load_addr = 0;
320 int load_addr_set = 0;
321 unsigned long last_bss = 0, elf_bss = 0;
322 unsigned long error = ~0UL;
323 int retval, i, size;
324
325 /* First of all, some simple consistency checks */
326 if (interp_elf_ex->e_type != ET_EXEC &&
327 interp_elf_ex->e_type != ET_DYN)
328 goto out;
329 if (!elf_check_arch(interp_elf_ex))
330 goto out;
331 if (!interpreter->f_op || !interpreter->f_op->mmap)
332 goto out;
333
334 /*
335 * If the size of this structure has changed, then punt, since
336 * we will be doing the wrong thing.
337 */
338 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 goto out;
340 if (interp_elf_ex->e_phnum < 1 ||
341 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 goto out;
343
344 /* Now read in all of the header information */
345
346 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
347 if (size > ELF_MIN_ALIGN)
348 goto out;
349 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
350 if (!elf_phdata)
351 goto out;
352
353 retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
354 error = -EIO;
355 if (retval != size) {
356 if (retval < 0)
357 error = retval;
358 goto out_close;
359 }
360
361 eppnt = elf_phdata;
362 for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
363 if (eppnt->p_type == PT_LOAD) {
364 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 int elf_prot = 0;
366 unsigned long vaddr = 0;
367 unsigned long k, map_addr;
368
369 if (eppnt->p_flags & PF_R) elf_prot = PROT_READ;
370 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
371 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
372 vaddr = eppnt->p_vaddr;
373 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
374 elf_type |= MAP_FIXED;
375
376 map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
377 error = map_addr;
378 if (BAD_ADDR(map_addr))
379 goto out_close;
380
381 if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
382 load_addr = map_addr - ELF_PAGESTART(vaddr);
383 load_addr_set = 1;
384 }
385
386 /*
387 * Check to see if the section's size will overflow the
388 * allowed task size. Note that p_filesz must always be
389 * <= p_memsize so it is only necessary to check p_memsz.
390 */
391 k = load_addr + eppnt->p_vaddr;
392 if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
393 eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
394 error = -ENOMEM;
395 goto out_close;
396 }
397
398 /*
399 * Find the end of the file mapping for this phdr, and keep
400 * track of the largest address we see for this.
401 */
402 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
403 if (k > elf_bss)
404 elf_bss = k;
405
406 /*
407 * Do the same thing for the memory mapping - between
408 * elf_bss and last_bss is the bss section.
409 */
410 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
411 if (k > last_bss)
412 last_bss = k;
413 }
414 }
415
416 /*
417 * Now fill out the bss section. First pad the last page up
418 * to the page boundary, and then perform a mmap to make sure
419 * that there are zero-mapped pages up to and including the
420 * last bss page.
421 */
422 if (padzero(elf_bss)) {
423 error = -EFAULT;
424 goto out_close;
425 }
426
427 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
428
429 /* Map the last of the bss segment */
430 if (last_bss > elf_bss) {
431 down_write(&current->mm->mmap_sem);
432 error = do_brk(elf_bss, last_bss - elf_bss);
433 up_write(&current->mm->mmap_sem);
434 if (BAD_ADDR(error))
435 goto out_close;
436 }
437
438 *interp_load_addr = load_addr;
439 error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
440
441out_close:
442 kfree(elf_phdata);
443out:
444 return error;
445}
446
447static unsigned long load_aout_interp(struct exec * interp_ex,
448 struct file * interpreter)
449{
450 unsigned long text_data, elf_entry = ~0UL;
451 char __user * addr;
452 loff_t offset;
453
454 current->mm->end_code = interp_ex->a_text;
455 text_data = interp_ex->a_text + interp_ex->a_data;
456 current->mm->end_data = text_data;
457 current->mm->brk = interp_ex->a_bss + text_data;
458
459 switch (N_MAGIC(*interp_ex)) {
460 case OMAGIC:
461 offset = 32;
462 addr = (char __user *)0;
463 break;
464 case ZMAGIC:
465 case QMAGIC:
466 offset = N_TXTOFF(*interp_ex);
467 addr = (char __user *) N_TXTADDR(*interp_ex);
468 break;
469 default:
470 goto out;
471 }
472
473 down_write(&current->mm->mmap_sem);
474 do_brk(0, text_data);
475 up_write(&current->mm->mmap_sem);
476 if (!interpreter->f_op || !interpreter->f_op->read)
477 goto out;
478 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
479 goto out;
480 flush_icache_range((unsigned long)addr,
481 (unsigned long)addr + text_data);
482
483
484 down_write(&current->mm->mmap_sem);
485 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
486 interp_ex->a_bss);
487 up_write(&current->mm->mmap_sem);
488 elf_entry = interp_ex->a_entry;
489
490out:
491 return elf_entry;
492}
493
494/*
495 * These are the functions used to load ELF style executables and shared
496 * libraries. There is no binary dependent code anywhere else.
497 */
498
499#define INTERPRETER_NONE 0
500#define INTERPRETER_AOUT 1
501#define INTERPRETER_ELF 2
502
Andi Kleen913bd902006-03-25 16:29:09 +0100503#ifndef STACK_RND_MASK
504#define STACK_RND_MASK 0x7ff /* with 4K pages 8MB of VA */
505#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
507static unsigned long randomize_stack_top(unsigned long stack_top)
508{
509 unsigned int random_variable = 0;
510
Andi Kleen913bd902006-03-25 16:29:09 +0100511 if (current->flags & PF_RANDOMIZE) {
512 random_variable = get_random_int() & STACK_RND_MASK;
513 random_variable <<= PAGE_SHIFT;
514 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100516 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517#else
Andi Kleen913bd902006-03-25 16:29:09 +0100518 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519#endif
520}
521
522static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
523{
524 struct file *interpreter = NULL; /* to shut gcc up */
525 unsigned long load_addr = 0, load_bias = 0;
526 int load_addr_set = 0;
527 char * elf_interpreter = NULL;
528 unsigned int interpreter_type = INTERPRETER_NONE;
529 unsigned char ibcs2_interpreter = 0;
530 unsigned long error;
531 struct elf_phdr * elf_ppnt, *elf_phdata;
532 unsigned long elf_bss, elf_brk;
533 int elf_exec_fileno;
534 int retval, i;
535 unsigned int size;
536 unsigned long elf_entry, interp_load_addr = 0;
537 unsigned long start_code, end_code, start_data, end_data;
538 unsigned long reloc_func_desc = 0;
539 char passed_fileno[6];
540 struct files_struct *files;
541 int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
542 unsigned long def_flags = 0;
543 struct {
544 struct elfhdr elf_ex;
545 struct elfhdr interp_elf_ex;
546 struct exec interp_ex;
547 } *loc;
548
549 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
550 if (!loc) {
551 retval = -ENOMEM;
552 goto out_ret;
553 }
554
555 /* Get the exec-header */
556 loc->elf_ex = *((struct elfhdr *) bprm->buf);
557
558 retval = -ENOEXEC;
559 /* First of all, some simple consistency checks */
560 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
561 goto out;
562
563 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
564 goto out;
565 if (!elf_check_arch(&loc->elf_ex))
566 goto out;
567 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
568 goto out;
569
570 /* Now read in all of the header information */
571
572 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
573 goto out;
574 if (loc->elf_ex.e_phnum < 1 ||
575 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
576 goto out;
577 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
578 retval = -ENOMEM;
579 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
580 if (!elf_phdata)
581 goto out;
582
583 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
584 if (retval != size) {
585 if (retval >= 0)
586 retval = -EIO;
587 goto out_free_ph;
588 }
589
590 files = current->files; /* Refcounted so ok */
591 retval = unshare_files();
592 if (retval < 0)
593 goto out_free_ph;
594 if (files == current->files) {
595 put_files_struct(files);
596 files = NULL;
597 }
598
599 /* exec will make our files private anyway, but for the a.out
600 loader stuff we need to do it earlier */
601
602 retval = get_unused_fd();
603 if (retval < 0)
604 goto out_free_fh;
605 get_file(bprm->file);
606 fd_install(elf_exec_fileno = retval, bprm->file);
607
608 elf_ppnt = elf_phdata;
609 elf_bss = 0;
610 elf_brk = 0;
611
612 start_code = ~0UL;
613 end_code = 0;
614 start_data = 0;
615 end_data = 0;
616
617 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
618 if (elf_ppnt->p_type == PT_INTERP) {
619 /* This is the program interpreter used for
620 * shared libraries - for now assume that this
621 * is an a.out format binary
622 */
623
624 retval = -ENOEXEC;
625 if (elf_ppnt->p_filesz > PATH_MAX ||
626 elf_ppnt->p_filesz < 2)
627 goto out_free_file;
628
629 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800630 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 GFP_KERNEL);
632 if (!elf_interpreter)
633 goto out_free_file;
634
635 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
636 elf_interpreter,
637 elf_ppnt->p_filesz);
638 if (retval != elf_ppnt->p_filesz) {
639 if (retval >= 0)
640 retval = -EIO;
641 goto out_free_interp;
642 }
643 /* make sure path is NULL terminated */
644 retval = -ENOEXEC;
645 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
646 goto out_free_interp;
647
648 /* If the program interpreter is one of these two,
649 * then assume an iBCS2 image. Otherwise assume
650 * a native linux image.
651 */
652 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
653 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
654 ibcs2_interpreter = 1;
655
656 /*
657 * The early SET_PERSONALITY here is so that the lookup
658 * for the interpreter happens in the namespace of the
659 * to-be-execed image. SET_PERSONALITY can select an
660 * alternate root.
661 *
662 * However, SET_PERSONALITY is NOT allowed to switch
663 * this task into the new images's memory mapping
664 * policy - that is, TASK_SIZE must still evaluate to
665 * that which is appropriate to the execing application.
666 * This is because exit_mmap() needs to have TASK_SIZE
667 * evaluate to the size of the old image.
668 *
669 * So if (say) a 64-bit application is execing a 32-bit
670 * application it is the architecture's responsibility
671 * to defer changing the value of TASK_SIZE until the
672 * switch really is going to happen - do this in
673 * flush_thread(). - akpm
674 */
675 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
676
677 interpreter = open_exec(elf_interpreter);
678 retval = PTR_ERR(interpreter);
679 if (IS_ERR(interpreter))
680 goto out_free_interp;
681 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
682 if (retval != BINPRM_BUF_SIZE) {
683 if (retval >= 0)
684 retval = -EIO;
685 goto out_free_dentry;
686 }
687
688 /* Get the exec headers */
689 loc->interp_ex = *((struct exec *) bprm->buf);
690 loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
691 break;
692 }
693 elf_ppnt++;
694 }
695
696 elf_ppnt = elf_phdata;
697 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698 if (elf_ppnt->p_type == PT_GNU_STACK) {
699 if (elf_ppnt->p_flags & PF_X)
700 executable_stack = EXSTACK_ENABLE_X;
701 else
702 executable_stack = EXSTACK_DISABLE_X;
703 break;
704 }
705 have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
706
707 /* Some simple consistency checks for the interpreter */
708 if (elf_interpreter) {
709 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
710
711 /* Now figure out which format our binary is */
712 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
713 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
714 (N_MAGIC(loc->interp_ex) != QMAGIC))
715 interpreter_type = INTERPRETER_ELF;
716
717 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
718 interpreter_type &= ~INTERPRETER_ELF;
719
720 retval = -ELIBBAD;
721 if (!interpreter_type)
722 goto out_free_dentry;
723
724 /* Make sure only one type was selected */
725 if ((interpreter_type & INTERPRETER_ELF) &&
726 interpreter_type != INTERPRETER_ELF) {
727 // FIXME - ratelimit this before re-enabling
728 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
729 interpreter_type = INTERPRETER_ELF;
730 }
731 /* Verify the interpreter has a valid arch */
732 if ((interpreter_type == INTERPRETER_ELF) &&
733 !elf_check_arch(&loc->interp_elf_ex))
734 goto out_free_dentry;
735 } else {
736 /* Executables without an interpreter also need a personality */
737 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
738 }
739
740 /* OK, we are done with that, now set up the arg stuff,
741 and then start this sucker up */
742
743 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
744 char *passed_p = passed_fileno;
745 sprintf(passed_fileno, "%d", elf_exec_fileno);
746
747 if (elf_interpreter) {
748 retval = copy_strings_kernel(1, &passed_p, bprm);
749 if (retval)
750 goto out_free_dentry;
751 bprm->argc++;
752 }
753 }
754
755 /* Flush all traces of the currently running executable */
756 retval = flush_old_exec(bprm);
757 if (retval)
758 goto out_free_dentry;
759
760 /* Discard our unneeded old files struct */
761 if (files) {
762 steal_locks(files);
763 put_files_struct(files);
764 files = NULL;
765 }
766
767 /* OK, This is the point of no return */
768 current->mm->start_data = 0;
769 current->mm->end_data = 0;
770 current->mm->end_code = 0;
771 current->mm->mmap = NULL;
772 current->flags &= ~PF_FORKNOEXEC;
773 current->mm->def_flags = def_flags;
774
775 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
776 may depend on the personality. */
777 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
778 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
779 current->personality |= READ_IMPLIES_EXEC;
780
781 if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
782 current->flags |= PF_RANDOMIZE;
783 arch_pick_mmap_layout(current->mm);
784
785 /* Do this so that we can load the interpreter, if need be. We will
786 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700788 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
790 executable_stack);
791 if (retval < 0) {
792 send_sig(SIGKILL, current, 0);
793 goto out_free_dentry;
794 }
795
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 current->mm->start_stack = bprm->p;
797
798 /* Now we do a little grungy work by mmaping the ELF image into
799 the correct location in memory. At this point, we assume that
800 the image should be loaded at fixed address, not at a variable
801 address. */
802
803 for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
804 int elf_prot = 0, elf_flags;
805 unsigned long k, vaddr;
806
807 if (elf_ppnt->p_type != PT_LOAD)
808 continue;
809
810 if (unlikely (elf_brk > elf_bss)) {
811 unsigned long nbyte;
812
813 /* There was a PT_LOAD segment with p_memsz > p_filesz
814 before this one. Map anonymous pages, if needed,
815 and clear the area. */
816 retval = set_brk (elf_bss + load_bias,
817 elf_brk + load_bias);
818 if (retval) {
819 send_sig(SIGKILL, current, 0);
820 goto out_free_dentry;
821 }
822 nbyte = ELF_PAGEOFFSET(elf_bss);
823 if (nbyte) {
824 nbyte = ELF_MIN_ALIGN - nbyte;
825 if (nbyte > elf_brk - elf_bss)
826 nbyte = elf_brk - elf_bss;
827 if (clear_user((void __user *)elf_bss +
828 load_bias, nbyte)) {
829 /*
830 * This bss-zeroing can fail if the ELF
831 * file specifies odd protections. So
832 * we don't check the return value
833 */
834 }
835 }
836 }
837
838 if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
839 if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
840 if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
841
842 elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
843
844 vaddr = elf_ppnt->p_vaddr;
845 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
846 elf_flags |= MAP_FIXED;
847 } else if (loc->elf_ex.e_type == ET_DYN) {
848 /* Try and get dynamic programs out of the way of the default mmap
849 base, as well as whatever program they might try to exec. This
850 is because the brk will follow the loader, and is not movable. */
851 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
852 }
853
854 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
855 if (BAD_ADDR(error)) {
856 send_sig(SIGKILL, current, 0);
857 goto out_free_dentry;
858 }
859
860 if (!load_addr_set) {
861 load_addr_set = 1;
862 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
863 if (loc->elf_ex.e_type == ET_DYN) {
864 load_bias += error -
865 ELF_PAGESTART(load_bias + vaddr);
866 load_addr += load_bias;
867 reloc_func_desc = load_bias;
868 }
869 }
870 k = elf_ppnt->p_vaddr;
871 if (k < start_code) start_code = k;
872 if (start_data < k) start_data = k;
873
874 /*
875 * Check to see if the section's size will overflow the
876 * allowed task size. Note that p_filesz must always be
877 * <= p_memsz so it is only necessary to check p_memsz.
878 */
879 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
880 elf_ppnt->p_memsz > TASK_SIZE ||
881 TASK_SIZE - elf_ppnt->p_memsz < k) {
882 /* set_brk can never work. Avoid overflows. */
883 send_sig(SIGKILL, current, 0);
884 goto out_free_dentry;
885 }
886
887 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
888
889 if (k > elf_bss)
890 elf_bss = k;
891 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
892 end_code = k;
893 if (end_data < k)
894 end_data = k;
895 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
896 if (k > elf_brk)
897 elf_brk = k;
898 }
899
900 loc->elf_ex.e_entry += load_bias;
901 elf_bss += load_bias;
902 elf_brk += load_bias;
903 start_code += load_bias;
904 end_code += load_bias;
905 start_data += load_bias;
906 end_data += load_bias;
907
908 /* Calling set_brk effectively mmaps the pages that we need
909 * for the bss and break sections. We must do this before
910 * mapping in the interpreter, to make sure it doesn't wind
911 * up getting placed where the bss needs to go.
912 */
913 retval = set_brk(elf_bss, elf_brk);
914 if (retval) {
915 send_sig(SIGKILL, current, 0);
916 goto out_free_dentry;
917 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700918 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 send_sig(SIGSEGV, current, 0);
920 retval = -EFAULT; /* Nobody gets to see this, but.. */
921 goto out_free_dentry;
922 }
923
924 if (elf_interpreter) {
925 if (interpreter_type == INTERPRETER_AOUT)
926 elf_entry = load_aout_interp(&loc->interp_ex,
927 interpreter);
928 else
929 elf_entry = load_elf_interp(&loc->interp_elf_ex,
930 interpreter,
931 &interp_load_addr);
932 if (BAD_ADDR(elf_entry)) {
933 printk(KERN_ERR "Unable to load interpreter %.128s\n",
934 elf_interpreter);
935 force_sig(SIGSEGV, current);
936 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
937 goto out_free_dentry;
938 }
939 reloc_func_desc = interp_load_addr;
940
941 allow_write_access(interpreter);
942 fput(interpreter);
943 kfree(elf_interpreter);
944 } else {
945 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100946 if (BAD_ADDR(elf_entry)) {
947 send_sig(SIGSEGV, current, 0);
948 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
949 goto out_free_dentry;
950 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 }
952
953 kfree(elf_phdata);
954
955 if (interpreter_type != INTERPRETER_AOUT)
956 sys_close(elf_exec_fileno);
957
958 set_binfmt(&elf_format);
959
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700960#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
961 retval = arch_setup_additional_pages(bprm, executable_stack);
962 if (retval < 0) {
963 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baff2005-04-28 15:17:19 -0700964 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700965 }
966#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
967
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 compute_creds(bprm);
969 current->flags &= ~PF_FORKNOEXEC;
970 create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
971 load_addr, interp_load_addr);
972 /* N.B. passed_fileno might not be initialized? */
973 if (interpreter_type == INTERPRETER_AOUT)
974 current->mm->arg_start += strlen(passed_fileno) + 1;
975 current->mm->end_code = end_code;
976 current->mm->start_code = start_code;
977 current->mm->start_data = start_data;
978 current->mm->end_data = end_data;
979 current->mm->start_stack = bprm->p;
980
981 if (current->personality & MMAP_PAGE_ZERO) {
982 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
983 and some applications "depend" upon this behavior.
984 Since we do not have the power to recompile these, we
985 emulate the SVr4 behavior. Sigh. */
986 down_write(&current->mm->mmap_sem);
987 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
988 MAP_FIXED | MAP_PRIVATE, 0);
989 up_write(&current->mm->mmap_sem);
990 }
991
992#ifdef ELF_PLAT_INIT
993 /*
994 * The ABI may specify that certain registers be set up in special
995 * ways (on i386 %edx is the address of a DT_FINI function, for
996 * example. In addition, it may also specify (eg, PowerPC64 ELF)
997 * that the e_entry field is the address of the function descriptor
998 * for the startup routine, rather than the address of the startup
999 * routine itself. This macro performs whatever initialization to
1000 * the regs structure is required as well as any relocations to the
1001 * function descriptor entries when executing dynamically links apps.
1002 */
1003 ELF_PLAT_INIT(regs, reloc_func_desc);
1004#endif
1005
1006 start_thread(regs, elf_entry, bprm->p);
1007 if (unlikely(current->ptrace & PT_PTRACED)) {
1008 if (current->ptrace & PT_TRACE_EXEC)
1009 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1010 else
1011 send_sig(SIGTRAP, current, 0);
1012 }
1013 retval = 0;
1014out:
1015 kfree(loc);
1016out_ret:
1017 return retval;
1018
1019 /* error cleanup */
1020out_free_dentry:
1021 allow_write_access(interpreter);
1022 if (interpreter)
1023 fput(interpreter);
1024out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001025 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026out_free_file:
1027 sys_close(elf_exec_fileno);
1028out_free_fh:
1029 if (files) {
1030 put_files_struct(current->files);
1031 current->files = files;
1032 }
1033out_free_ph:
1034 kfree(elf_phdata);
1035 goto out;
1036}
1037
1038/* This is really simpleminded and specialized - we are loading an
1039 a.out library that is given an ELF header. */
1040
1041static int load_elf_library(struct file *file)
1042{
1043 struct elf_phdr *elf_phdata;
1044 struct elf_phdr *eppnt;
1045 unsigned long elf_bss, bss, len;
1046 int retval, error, i, j;
1047 struct elfhdr elf_ex;
1048
1049 error = -ENOEXEC;
1050 retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
1051 if (retval != sizeof(elf_ex))
1052 goto out;
1053
1054 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1055 goto out;
1056
1057 /* First of all, some simple consistency checks */
1058 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1059 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1060 goto out;
1061
1062 /* Now read in all of the header information */
1063
1064 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1065 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1066
1067 error = -ENOMEM;
1068 elf_phdata = kmalloc(j, GFP_KERNEL);
1069 if (!elf_phdata)
1070 goto out;
1071
1072 eppnt = elf_phdata;
1073 error = -ENOEXEC;
1074 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1075 if (retval != j)
1076 goto out_free_ph;
1077
1078 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1079 if ((eppnt + i)->p_type == PT_LOAD)
1080 j++;
1081 if (j != 1)
1082 goto out_free_ph;
1083
1084 while (eppnt->p_type != PT_LOAD)
1085 eppnt++;
1086
1087 /* Now use mmap to map the library into memory. */
1088 down_write(&current->mm->mmap_sem);
1089 error = do_mmap(file,
1090 ELF_PAGESTART(eppnt->p_vaddr),
1091 (eppnt->p_filesz +
1092 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1093 PROT_READ | PROT_WRITE | PROT_EXEC,
1094 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1095 (eppnt->p_offset -
1096 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1097 up_write(&current->mm->mmap_sem);
1098 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1099 goto out_free_ph;
1100
1101 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1102 if (padzero(elf_bss)) {
1103 error = -EFAULT;
1104 goto out_free_ph;
1105 }
1106
1107 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
1108 bss = eppnt->p_memsz + eppnt->p_vaddr;
1109 if (bss > len) {
1110 down_write(&current->mm->mmap_sem);
1111 do_brk(len, bss - len);
1112 up_write(&current->mm->mmap_sem);
1113 }
1114 error = 0;
1115
1116out_free_ph:
1117 kfree(elf_phdata);
1118out:
1119 return error;
1120}
1121
1122/*
1123 * Note that some platforms still use traditional core dumps and not
1124 * the ELF core dump. Each platform can select it as appropriate.
1125 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001126#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127
1128/*
1129 * ELF core dumper
1130 *
1131 * Modelled on fs/exec.c:aout_core_dump()
1132 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1133 */
1134/*
1135 * These are the only things you should do on a core-file: use only these
1136 * functions to write out all the necessary info.
1137 */
1138static int dump_write(struct file *file, const void *addr, int nr)
1139{
1140 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1141}
1142
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001143static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144{
1145 if (file->f_op->llseek) {
1146 if (file->f_op->llseek(file, off, 0) != off)
1147 return 0;
1148 } else
1149 file->f_pos = off;
1150 return 1;
1151}
1152
1153/*
1154 * Decide whether a segment is worth dumping; default is yes to be
1155 * sure (missing info is worse than too much; etc).
1156 * Personally I'd include everything, and use the coredump limit...
1157 *
1158 * I think we should skip something. But I am not sure how. H.J.
1159 */
1160static int maydump(struct vm_area_struct *vma)
1161{
1162 /* Do not dump I/O mapped devices or special mappings */
1163 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1164 return 0;
1165
1166 /* Dump shared memory only if mapped from an anonymous file. */
1167 if (vma->vm_flags & VM_SHARED)
1168 return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1169
1170 /* If it hasn't been written to, don't write it out */
1171 if (!vma->anon_vma)
1172 return 0;
1173
1174 return 1;
1175}
1176
1177#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
1178
1179/* An ELF note in memory */
1180struct memelfnote
1181{
1182 const char *name;
1183 int type;
1184 unsigned int datasz;
1185 void *data;
1186};
1187
1188static int notesize(struct memelfnote *en)
1189{
1190 int sz;
1191
1192 sz = sizeof(struct elf_note);
1193 sz += roundup(strlen(en->name) + 1, 4);
1194 sz += roundup(en->datasz, 4);
1195
1196 return sz;
1197}
1198
1199#define DUMP_WRITE(addr, nr) \
1200 do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1201#define DUMP_SEEK(off) \
1202 do { if (!dump_seek(file, (off))) return 0; } while(0)
1203
1204static int writenote(struct memelfnote *men, struct file *file)
1205{
1206 struct elf_note en;
1207
1208 en.n_namesz = strlen(men->name) + 1;
1209 en.n_descsz = men->datasz;
1210 en.n_type = men->type;
1211
1212 DUMP_WRITE(&en, sizeof(en));
1213 DUMP_WRITE(men->name, en.n_namesz);
1214 /* XXX - cast from long long to long to avoid need for libgcc.a */
1215 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1216 DUMP_WRITE(men->data, men->datasz);
1217 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1218
1219 return 1;
1220}
1221#undef DUMP_WRITE
1222#undef DUMP_SEEK
1223
1224#define DUMP_WRITE(addr, nr) \
1225 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1226 goto end_coredump;
1227#define DUMP_SEEK(off) \
1228 if (!dump_seek(file, (off))) \
1229 goto end_coredump;
1230
Arjan van de Ven858119e2006-01-14 13:20:43 -08001231static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232{
1233 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1234 elf->e_ident[EI_CLASS] = ELF_CLASS;
1235 elf->e_ident[EI_DATA] = ELF_DATA;
1236 elf->e_ident[EI_VERSION] = EV_CURRENT;
1237 elf->e_ident[EI_OSABI] = ELF_OSABI;
1238 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1239
1240 elf->e_type = ET_CORE;
1241 elf->e_machine = ELF_ARCH;
1242 elf->e_version = EV_CURRENT;
1243 elf->e_entry = 0;
1244 elf->e_phoff = sizeof(struct elfhdr);
1245 elf->e_shoff = 0;
1246 elf->e_flags = ELF_CORE_EFLAGS;
1247 elf->e_ehsize = sizeof(struct elfhdr);
1248 elf->e_phentsize = sizeof(struct elf_phdr);
1249 elf->e_phnum = segs;
1250 elf->e_shentsize = 0;
1251 elf->e_shnum = 0;
1252 elf->e_shstrndx = 0;
1253 return;
1254}
1255
Arjan van de Ven858119e2006-01-14 13:20:43 -08001256static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257{
1258 phdr->p_type = PT_NOTE;
1259 phdr->p_offset = offset;
1260 phdr->p_vaddr = 0;
1261 phdr->p_paddr = 0;
1262 phdr->p_filesz = sz;
1263 phdr->p_memsz = 0;
1264 phdr->p_flags = 0;
1265 phdr->p_align = 0;
1266 return;
1267}
1268
1269static void fill_note(struct memelfnote *note, const char *name, int type,
1270 unsigned int sz, void *data)
1271{
1272 note->name = name;
1273 note->type = type;
1274 note->datasz = sz;
1275 note->data = data;
1276 return;
1277}
1278
1279/*
1280 * fill up all the fields in prstatus from the given task struct, except registers
1281 * which need to be filled up separately.
1282 */
1283static void fill_prstatus(struct elf_prstatus *prstatus,
1284 struct task_struct *p, long signr)
1285{
1286 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1287 prstatus->pr_sigpend = p->pending.signal.sig[0];
1288 prstatus->pr_sighold = p->blocked.sig[0];
1289 prstatus->pr_pid = p->pid;
1290 prstatus->pr_ppid = p->parent->pid;
1291 prstatus->pr_pgrp = process_group(p);
1292 prstatus->pr_sid = p->signal->session;
1293 if (thread_group_leader(p)) {
1294 /*
1295 * This is the record for the group leader. Add in the
1296 * cumulative times of previous dead threads. This total
1297 * won't include the time of each live thread whose state
1298 * is included in the core dump. The final total reported
1299 * to our parent process when it calls wait4 will include
1300 * those sums as well as the little bit more time it takes
1301 * this and each other thread to finish dying after the
1302 * core dump synchronization phase.
1303 */
1304 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1305 &prstatus->pr_utime);
1306 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1307 &prstatus->pr_stime);
1308 } else {
1309 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1310 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1311 }
1312 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1313 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1314}
1315
1316static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1317 struct mm_struct *mm)
1318{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001319 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
1321 /* first copy the parameters from user space */
1322 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1323
1324 len = mm->arg_end - mm->arg_start;
1325 if (len >= ELF_PRARGSZ)
1326 len = ELF_PRARGSZ-1;
1327 if (copy_from_user(&psinfo->pr_psargs,
1328 (const char __user *)mm->arg_start, len))
1329 return -EFAULT;
1330 for(i = 0; i < len; i++)
1331 if (psinfo->pr_psargs[i] == 0)
1332 psinfo->pr_psargs[i] = ' ';
1333 psinfo->pr_psargs[len] = 0;
1334
1335 psinfo->pr_pid = p->pid;
1336 psinfo->pr_ppid = p->parent->pid;
1337 psinfo->pr_pgrp = process_group(p);
1338 psinfo->pr_sid = p->signal->session;
1339
1340 i = p->state ? ffz(~p->state) + 1 : 0;
1341 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001342 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1344 psinfo->pr_nice = task_nice(p);
1345 psinfo->pr_flag = p->flags;
1346 SET_UID(psinfo->pr_uid, p->uid);
1347 SET_GID(psinfo->pr_gid, p->gid);
1348 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1349
1350 return 0;
1351}
1352
1353/* Here is the structure in which status of each thread is captured. */
1354struct elf_thread_status
1355{
1356 struct list_head list;
1357 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1358 elf_fpregset_t fpu; /* NT_PRFPREG */
1359 struct task_struct *thread;
1360#ifdef ELF_CORE_COPY_XFPREGS
1361 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1362#endif
1363 struct memelfnote notes[3];
1364 int num_notes;
1365};
1366
1367/*
1368 * In order to add the specific thread information for the elf file format,
1369 * we need to keep a linked list of every threads pr_status and then
1370 * create a single section for them in the final core file.
1371 */
1372static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1373{
1374 int sz = 0;
1375 struct task_struct *p = t->thread;
1376 t->num_notes = 0;
1377
1378 fill_prstatus(&t->prstatus, p, signr);
1379 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1380
1381 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1382 t->num_notes++;
1383 sz += notesize(&t->notes[0]);
1384
1385 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1386 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1387 t->num_notes++;
1388 sz += notesize(&t->notes[1]);
1389 }
1390
1391#ifdef ELF_CORE_COPY_XFPREGS
1392 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1393 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1394 t->num_notes++;
1395 sz += notesize(&t->notes[2]);
1396 }
1397#endif
1398 return sz;
1399}
1400
1401/*
1402 * Actual dumper
1403 *
1404 * This is a two-pass process; first we find the offsets of the bits,
1405 * and then they are actually written out. If we run out of core limit
1406 * we just truncate.
1407 */
1408static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1409{
1410#define NUM_NOTES 6
1411 int has_dumped = 0;
1412 mm_segment_t fs;
1413 int segs;
1414 size_t size = 0;
1415 int i;
1416 struct vm_area_struct *vma;
1417 struct elfhdr *elf = NULL;
1418 off_t offset = 0, dataoff;
1419 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1420 int numnote;
1421 struct memelfnote *notes = NULL;
1422 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1423 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1424 struct task_struct *g, *p;
1425 LIST_HEAD(thread_list);
1426 struct list_head *t;
1427 elf_fpregset_t *fpu = NULL;
1428#ifdef ELF_CORE_COPY_XFPREGS
1429 elf_fpxregset_t *xfpu = NULL;
1430#endif
1431 int thread_status_size = 0;
1432 elf_addr_t *auxv;
1433
1434 /*
1435 * We no longer stop all VM operations.
1436 *
1437 * This is because those proceses that could possibly change map_count or
1438 * the mmap / vma pages are now blocked in do_exit on current finishing
1439 * this core dump.
1440 *
1441 * Only ptrace can touch these memory addresses, but it doesn't change
1442 * the map_count or the pages allocated. So no possibility of crashing
1443 * exists while dumping the mm->vm_next areas to the core file.
1444 */
1445
1446 /* alloc memory for large data structures: too large to be on stack */
1447 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1448 if (!elf)
1449 goto cleanup;
1450 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1451 if (!prstatus)
1452 goto cleanup;
1453 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1454 if (!psinfo)
1455 goto cleanup;
1456 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1457 if (!notes)
1458 goto cleanup;
1459 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1460 if (!fpu)
1461 goto cleanup;
1462#ifdef ELF_CORE_COPY_XFPREGS
1463 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1464 if (!xfpu)
1465 goto cleanup;
1466#endif
1467
1468 if (signr) {
1469 struct elf_thread_status *tmp;
1470 read_lock(&tasklist_lock);
1471 do_each_thread(g,p)
1472 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001473 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 if (!tmp) {
1475 read_unlock(&tasklist_lock);
1476 goto cleanup;
1477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 INIT_LIST_HEAD(&tmp->list);
1479 tmp->thread = p;
1480 list_add(&tmp->list, &thread_list);
1481 }
1482 while_each_thread(g,p);
1483 read_unlock(&tasklist_lock);
1484 list_for_each(t, &thread_list) {
1485 struct elf_thread_status *tmp;
1486 int sz;
1487
1488 tmp = list_entry(t, struct elf_thread_status, list);
1489 sz = elf_dump_thread_status(signr, tmp);
1490 thread_status_size += sz;
1491 }
1492 }
1493 /* now collect the dump for the current */
1494 memset(prstatus, 0, sizeof(*prstatus));
1495 fill_prstatus(prstatus, current, signr);
1496 elf_core_copy_regs(&prstatus->pr_reg, regs);
1497
1498 segs = current->mm->map_count;
1499#ifdef ELF_CORE_EXTRA_PHDRS
1500 segs += ELF_CORE_EXTRA_PHDRS;
1501#endif
1502
1503 /* Set up header */
1504 fill_elf_header(elf, segs+1); /* including notes section */
1505
1506 has_dumped = 1;
1507 current->flags |= PF_DUMPCORE;
1508
1509 /*
1510 * Set up the notes in similar form to SVR4 core dumps made
1511 * with info from their /proc.
1512 */
1513
1514 fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1515
1516 fill_psinfo(psinfo, current->group_leader, current->mm);
1517 fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1518
Eric W. Biedermana9289722005-10-30 15:02:08 -08001519 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520
1521 auxv = (elf_addr_t *) current->mm->saved_auxv;
1522
1523 i = 0;
1524 do
1525 i += 2;
1526 while (auxv[i - 2] != AT_NULL);
1527 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1528 i * sizeof (elf_addr_t), auxv);
1529
1530 /* Try to dump the FPU. */
1531 if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1532 fill_note(notes + numnote++,
1533 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1534#ifdef ELF_CORE_COPY_XFPREGS
1535 if (elf_core_copy_task_xfpregs(current, xfpu))
1536 fill_note(notes + numnote++,
1537 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1538#endif
1539
1540 fs = get_fs();
1541 set_fs(KERNEL_DS);
1542
1543 DUMP_WRITE(elf, sizeof(*elf));
1544 offset += sizeof(*elf); /* Elf header */
1545 offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
1546
1547 /* Write notes phdr entry */
1548 {
1549 struct elf_phdr phdr;
1550 int sz = 0;
1551
1552 for (i = 0; i < numnote; i++)
1553 sz += notesize(notes + i);
1554
1555 sz += thread_status_size;
1556
1557 fill_elf_note_phdr(&phdr, sz, offset);
1558 offset += sz;
1559 DUMP_WRITE(&phdr, sizeof(phdr));
1560 }
1561
1562 /* Page-align dumped data */
1563 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1564
1565 /* Write program headers for segments dump */
1566 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1567 struct elf_phdr phdr;
1568 size_t sz;
1569
1570 sz = vma->vm_end - vma->vm_start;
1571
1572 phdr.p_type = PT_LOAD;
1573 phdr.p_offset = offset;
1574 phdr.p_vaddr = vma->vm_start;
1575 phdr.p_paddr = 0;
1576 phdr.p_filesz = maydump(vma) ? sz : 0;
1577 phdr.p_memsz = sz;
1578 offset += phdr.p_filesz;
1579 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1580 if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1581 if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1582 phdr.p_align = ELF_EXEC_PAGESIZE;
1583
1584 DUMP_WRITE(&phdr, sizeof(phdr));
1585 }
1586
1587#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1588 ELF_CORE_WRITE_EXTRA_PHDRS;
1589#endif
1590
1591 /* write out the notes section */
1592 for (i = 0; i < numnote; i++)
1593 if (!writenote(notes + i, file))
1594 goto end_coredump;
1595
1596 /* write out the thread status notes section */
1597 list_for_each(t, &thread_list) {
1598 struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1599 for (i = 0; i < tmp->num_notes; i++)
1600 if (!writenote(&tmp->notes[i], file))
1601 goto end_coredump;
1602 }
1603
1604 DUMP_SEEK(dataoff);
1605
1606 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1607 unsigned long addr;
1608
1609 if (!maydump(vma))
1610 continue;
1611
1612 for (addr = vma->vm_start;
1613 addr < vma->vm_end;
1614 addr += PAGE_SIZE) {
1615 struct page* page;
1616 struct vm_area_struct *vma;
1617
1618 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1619 &page, &vma) <= 0) {
1620 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1621 } else {
1622 if (page == ZERO_PAGE(addr)) {
1623 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1624 } else {
1625 void *kaddr;
1626 flush_cache_page(vma, addr, page_to_pfn(page));
1627 kaddr = kmap(page);
1628 if ((size += PAGE_SIZE) > limit ||
1629 !dump_write(file, kaddr,
1630 PAGE_SIZE)) {
1631 kunmap(page);
1632 page_cache_release(page);
1633 goto end_coredump;
1634 }
1635 kunmap(page);
1636 }
1637 page_cache_release(page);
1638 }
1639 }
1640 }
1641
1642#ifdef ELF_CORE_WRITE_EXTRA_DATA
1643 ELF_CORE_WRITE_EXTRA_DATA;
1644#endif
1645
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001646 if ((off_t)file->f_pos != offset) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 /* Sanity check */
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001648 printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1649 (off_t)file->f_pos, offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 }
1651
1652end_coredump:
1653 set_fs(fs);
1654
1655cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001656 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 struct list_head *tmp = thread_list.next;
1658 list_del(tmp);
1659 kfree(list_entry(tmp, struct elf_thread_status, list));
1660 }
1661
1662 kfree(elf);
1663 kfree(prstatus);
1664 kfree(psinfo);
1665 kfree(notes);
1666 kfree(fpu);
1667#ifdef ELF_CORE_COPY_XFPREGS
1668 kfree(xfpu);
1669#endif
1670 return has_dumped;
1671#undef NUM_NOTES
1672}
1673
1674#endif /* USE_ELF_CORE_DUMP */
1675
1676static int __init init_elf_binfmt(void)
1677{
1678 return register_binfmt(&elf_format);
1679}
1680
1681static void __exit exit_elf_binfmt(void)
1682{
1683 /* Remove the COFF and ELF loaders. */
1684 unregister_binfmt(&elf_format);
1685}
1686
1687core_initcall(init_elf_binfmt);
1688module_exit(exit_elf_binfmt);
1689MODULE_LICENSE("GPL");