blob: 295cbaa0e58ac6592c07aea6934170ac91bc3b3a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/compiler.h>
35#include <linux/highmem.h>
36#include <linux/pagemap.h>
37#include <linux/security.h>
38#include <linux/syscalls.h>
39#include <linux/random.h>
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070040#include <linux/elf.h>
Alexey Dobriyan7e80d0d2007-05-08 00:28:59 -070041#include <linux/utsname.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070046static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *);
Jan Kratochvil60bfba72007-07-15 23:40:06 -070048static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
Matt Mackall708e9a72006-01-08 01:05:25 -080054#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070055static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#else
57#define elf_core_dump NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070061#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070063#define ELF_MIN_ALIGN PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS 0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
Andi Kleen9fbbd4d2007-02-13 13:26:26 +010079 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070081};
82
Jan Kratochvil60bfba72007-07-15 23:40:06 -070083#define BAD_ADDR(x) IS_ERR_VALUE(x)
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int set_brk(unsigned long start, unsigned long end)
86{
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700104 be in memory
105 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int padzero(unsigned long elf_bss)
107{
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117}
118
119/* Let's use some macros to make this stack manipulation a litle clearer */
120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700124#define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
134static int
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138{
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700151 struct vm_area_struct *vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 /*
154 * If this architecture has a platform capability string, copy it
155 * to userspace. In some cases (Sparc), this info is impossible
156 * for userspace to get any other way, in others (i386) it is
157 * merely difficult.
158 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 u_platform = NULL;
160 if (k_platform) {
161 size_t len = strlen(k_platform) + 1;
162
163 /*
164 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 * evictions by the processes running on the same package. One
166 * thing we can do is to shuffle the initial stack for them.
167 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 p = arch_align_stack(p);
170
171 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 if (__copy_to_user(u_platform, k_platform, len))
173 return -EFAULT;
174 }
175
176 /* Create the ELF interpreter info */
Jesper Juhl785d5572006-06-23 02:05:35 -0700177 elf_info = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178#define NEW_AUX_ENT(id, val) \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700179 do { \
Jesper Juhl785d5572006-06-23 02:05:35 -0700180 elf_info[ei_index++] = id; \
181 elf_info[ei_index++] = val; \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700182 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184#ifdef ARCH_DLINFO
185 /*
186 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 * AUXV.
188 */
189 ARCH_DLINFO;
190#endif
191 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700195 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 NEW_AUX_ENT(AT_FLAGS, 0);
199 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
Jesper Juhl785d5572006-06-23 02:05:35 -0700200 NEW_AUX_ENT(AT_UID, tsk->uid);
201 NEW_AUX_ENT(AT_EUID, tsk->euid);
202 NEW_AUX_ENT(AT_GID, tsk->gid);
203 NEW_AUX_ENT(AT_EGID, tsk->egid);
204 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 if (k_platform) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700206 NEW_AUX_ENT(AT_PLATFORM,
Jesper Juhl785d5572006-06-23 02:05:35 -0700207 (elf_addr_t)(unsigned long)u_platform);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
209 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
Jesper Juhl785d5572006-06-23 02:05:35 -0700210 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 }
212#undef NEW_AUX_ENT
213 /* AT_NULL is zero; clear the rest too */
214 memset(&elf_info[ei_index], 0,
215 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216
217 /* And advance past the AT_NULL entry. */
218 ei_index += 2;
219
220 sp = STACK_ADD(p, ei_index);
221
222 items = (argc + 1) + (envc + 1);
223 if (interp_aout) {
224 items += 3; /* a.out interpreters require argv & envp too */
225 } else {
226 items += 1; /* ELF interpreters only put argc on the stack */
227 }
228 bprm->p = STACK_ROUND(sp, items);
229
230 /* Point sp at the lowest address on the stack */
231#ifdef CONFIG_STACK_GROWSUP
232 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700233 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234#else
235 sp = (elf_addr_t __user *)bprm->p;
236#endif
237
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700238
239 /*
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
242 */
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 if (__put_user(argc, sp++))
249 return -EFAULT;
250 if (interp_aout) {
251 argv = sp + 2;
252 envp = argv + argc + 1;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800253 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 } else {
257 argv = sp;
258 envp = argv + argc + 1;
259 }
260
261 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700262 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 while (argc-- > 0) {
264 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800265 if (__put_user((elf_addr_t)p, argv++))
266 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return 0;
270 p += len;
271 }
272 if (__put_user(0, argv))
273 return -EFAULT;
274 current->mm->arg_end = current->mm->env_start = p;
275 while (envc-- > 0) {
276 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800277 if (__put_user((elf_addr_t)p, envp++))
278 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return 0;
282 p += len;
283 }
284 if (__put_user(0, envp))
285 return -EFAULT;
286 current->mm->env_end = p;
287
288 /* Put the elf_info on the stack in the right place. */
289 sp = (elf_addr_t __user *)envp + 1;
290 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 return -EFAULT;
292 return 0;
293}
294
295#ifndef elf_map
296
297static unsigned long elf_map(struct file *filep, unsigned long addr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700298 struct elf_phdr *eppnt, int prot, int type,
299 unsigned long total_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300{
301 unsigned long map_addr;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700302 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
303 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
304 addr = ELF_PAGESTART(addr);
305 size = ELF_PAGEALIGN(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306
David Gibsondda6ebd2006-01-08 01:03:35 -0800307 /* mmap() will return -EINVAL if given a zero size, but a
308 * segment with zero filesize is perfectly valid */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700309 if (!size)
310 return addr;
311
312 down_write(&current->mm->mmap_sem);
313 /*
314 * total_size is the size of the ELF (interpreter) image.
315 * The _first_ mmap needs to know the full size, otherwise
316 * randomization might put this image into an overlapping
317 * position with the ELF binary image. (since size < total_size)
318 * So we first map the 'big' image - and unmap the remainder at
319 * the end. (which unmap is needed for ELF images with holes.)
320 */
321 if (total_size) {
322 total_size = ELF_PAGEALIGN(total_size);
323 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
324 if (!BAD_ADDR(map_addr))
325 do_munmap(current->mm, map_addr+size, total_size-size);
326 } else
327 map_addr = do_mmap(filep, addr, size, prot, type, off);
328
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 up_write(&current->mm->mmap_sem);
330 return(map_addr);
331}
332
333#endif /* !elf_map */
334
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700335static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
336{
337 int i, first_idx = -1, last_idx = -1;
338
339 for (i = 0; i < nr; i++) {
340 if (cmds[i].p_type == PT_LOAD) {
341 last_idx = i;
342 if (first_idx == -1)
343 first_idx = i;
344 }
345 }
346 if (first_idx == -1)
347 return 0;
348
349 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
350 ELF_PAGESTART(cmds[first_idx].p_vaddr);
351}
352
353
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354/* This is much more generalized than the library routine read function,
355 so we keep this separate. Technically the library read function
356 is only provided so that we can read a.out libraries that have
357 an ELF header */
358
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700359static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700360 struct file *interpreter, unsigned long *interp_map_addr,
361 unsigned long no_base)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362{
363 struct elf_phdr *elf_phdata;
364 struct elf_phdr *eppnt;
365 unsigned long load_addr = 0;
366 int load_addr_set = 0;
367 unsigned long last_bss = 0, elf_bss = 0;
368 unsigned long error = ~0UL;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700369 unsigned long total_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 int retval, i, size;
371
372 /* First of all, some simple consistency checks */
373 if (interp_elf_ex->e_type != ET_EXEC &&
374 interp_elf_ex->e_type != ET_DYN)
375 goto out;
376 if (!elf_check_arch(interp_elf_ex))
377 goto out;
378 if (!interpreter->f_op || !interpreter->f_op->mmap)
379 goto out;
380
381 /*
382 * If the size of this structure has changed, then punt, since
383 * we will be doing the wrong thing.
384 */
385 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
386 goto out;
387 if (interp_elf_ex->e_phnum < 1 ||
388 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
389 goto out;
390
391 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
393 if (size > ELF_MIN_ALIGN)
394 goto out;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700395 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 if (!elf_phdata)
397 goto out;
398
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700399 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
400 (char *)elf_phdata,size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 error = -EIO;
402 if (retval != size) {
403 if (retval < 0)
404 error = retval;
405 goto out_close;
406 }
407
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700408 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
409 if (!total_size) {
410 error = -EINVAL;
411 goto out_close;
412 }
413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 eppnt = elf_phdata;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700415 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
416 if (eppnt->p_type == PT_LOAD) {
417 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
418 int elf_prot = 0;
419 unsigned long vaddr = 0;
420 unsigned long k, map_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700422 if (eppnt->p_flags & PF_R)
423 elf_prot = PROT_READ;
424 if (eppnt->p_flags & PF_W)
425 elf_prot |= PROT_WRITE;
426 if (eppnt->p_flags & PF_X)
427 elf_prot |= PROT_EXEC;
428 vaddr = eppnt->p_vaddr;
429 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
430 elf_type |= MAP_FIXED;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700431 else if (no_base && interp_elf_ex->e_type == ET_DYN)
432 load_addr = -vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700434 map_addr = elf_map(interpreter, load_addr + vaddr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700435 eppnt, elf_prot, elf_type, total_size);
436 total_size = 0;
437 if (!*interp_map_addr)
438 *interp_map_addr = map_addr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700439 error = map_addr;
440 if (BAD_ADDR(map_addr))
441 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700443 if (!load_addr_set &&
444 interp_elf_ex->e_type == ET_DYN) {
445 load_addr = map_addr - ELF_PAGESTART(vaddr);
446 load_addr_set = 1;
447 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700449 /*
450 * Check to see if the section's size will overflow the
451 * allowed task size. Note that p_filesz must always be
452 * <= p_memsize so it's only necessary to check p_memsz.
453 */
454 k = load_addr + eppnt->p_vaddr;
Chuck Ebbertce510592006-07-03 00:24:14 -0700455 if (BAD_ADDR(k) ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700456 eppnt->p_filesz > eppnt->p_memsz ||
457 eppnt->p_memsz > TASK_SIZE ||
458 TASK_SIZE - eppnt->p_memsz < k) {
459 error = -ENOMEM;
460 goto out_close;
461 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700463 /*
464 * Find the end of the file mapping for this phdr, and
465 * keep track of the largest address we see for this.
466 */
467 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
468 if (k > elf_bss)
469 elf_bss = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700471 /*
472 * Do the same thing for the memory mapping - between
473 * elf_bss and last_bss is the bss section.
474 */
475 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
476 if (k > last_bss)
477 last_bss = k;
478 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 }
480
481 /*
482 * Now fill out the bss section. First pad the last page up
483 * to the page boundary, and then perform a mmap to make sure
484 * that there are zero-mapped pages up to and including the
485 * last bss page.
486 */
487 if (padzero(elf_bss)) {
488 error = -EFAULT;
489 goto out_close;
490 }
491
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700492 /* What we have mapped so far */
493 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 /* Map the last of the bss segment */
496 if (last_bss > elf_bss) {
497 down_write(&current->mm->mmap_sem);
498 error = do_brk(elf_bss, last_bss - elf_bss);
499 up_write(&current->mm->mmap_sem);
500 if (BAD_ADDR(error))
501 goto out_close;
502 }
503
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700504 error = load_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
506out_close:
507 kfree(elf_phdata);
508out:
509 return error;
510}
511
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700512static unsigned long load_aout_interp(struct exec *interp_ex,
513 struct file *interpreter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514{
515 unsigned long text_data, elf_entry = ~0UL;
516 char __user * addr;
517 loff_t offset;
518
519 current->mm->end_code = interp_ex->a_text;
520 text_data = interp_ex->a_text + interp_ex->a_data;
521 current->mm->end_data = text_data;
522 current->mm->brk = interp_ex->a_bss + text_data;
523
524 switch (N_MAGIC(*interp_ex)) {
525 case OMAGIC:
526 offset = 32;
527 addr = (char __user *)0;
528 break;
529 case ZMAGIC:
530 case QMAGIC:
531 offset = N_TXTOFF(*interp_ex);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700532 addr = (char __user *)N_TXTADDR(*interp_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 break;
534 default:
535 goto out;
536 }
537
538 down_write(&current->mm->mmap_sem);
539 do_brk(0, text_data);
540 up_write(&current->mm->mmap_sem);
541 if (!interpreter->f_op || !interpreter->f_op->read)
542 goto out;
543 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
544 goto out;
545 flush_icache_range((unsigned long)addr,
546 (unsigned long)addr + text_data);
547
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 down_write(&current->mm->mmap_sem);
549 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
550 interp_ex->a_bss);
551 up_write(&current->mm->mmap_sem);
552 elf_entry = interp_ex->a_entry;
553
554out:
555 return elf_entry;
556}
557
558/*
559 * These are the functions used to load ELF style executables and shared
560 * libraries. There is no binary dependent code anywhere else.
561 */
562
563#define INTERPRETER_NONE 0
564#define INTERPRETER_AOUT 1
565#define INTERPRETER_ELF 2
566
Andi Kleen913bd902006-03-25 16:29:09 +0100567#ifndef STACK_RND_MASK
James Bottomleyd1cabd632007-03-16 13:38:35 -0800568#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
Andi Kleen913bd902006-03-25 16:29:09 +0100569#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571static unsigned long randomize_stack_top(unsigned long stack_top)
572{
573 unsigned int random_variable = 0;
574
Andi Kleenc16b63e02006-09-26 10:52:28 +0200575 if ((current->flags & PF_RANDOMIZE) &&
576 !(current->personality & ADDR_NO_RANDOMIZE)) {
Andi Kleen913bd902006-03-25 16:29:09 +0100577 random_variable = get_random_int() & STACK_RND_MASK;
578 random_variable <<= PAGE_SHIFT;
579 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100581 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582#else
Andi Kleen913bd902006-03-25 16:29:09 +0100583 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584#endif
585}
586
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700587static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
589 struct file *interpreter = NULL; /* to shut gcc up */
590 unsigned long load_addr = 0, load_bias = 0;
591 int load_addr_set = 0;
592 char * elf_interpreter = NULL;
593 unsigned int interpreter_type = INTERPRETER_NONE;
594 unsigned char ibcs2_interpreter = 0;
595 unsigned long error;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700596 struct elf_phdr *elf_ppnt, *elf_phdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 unsigned long elf_bss, elf_brk;
598 int elf_exec_fileno;
599 int retval, i;
600 unsigned int size;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700601 unsigned long elf_entry;
602 unsigned long interp_load_addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 unsigned long start_code, end_code, start_data, end_data;
604 unsigned long reloc_func_desc = 0;
605 char passed_fileno[6];
606 struct files_struct *files;
David Rientjes8de61e62006-12-06 20:40:16 -0800607 int executable_stack = EXSTACK_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 unsigned long def_flags = 0;
609 struct {
610 struct elfhdr elf_ex;
611 struct elfhdr interp_elf_ex;
612 struct exec interp_ex;
613 } *loc;
614
615 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
616 if (!loc) {
617 retval = -ENOMEM;
618 goto out_ret;
619 }
620
621 /* Get the exec-header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700622 loc->elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 retval = -ENOEXEC;
625 /* First of all, some simple consistency checks */
626 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
627 goto out;
628
629 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
630 goto out;
631 if (!elf_check_arch(&loc->elf_ex))
632 goto out;
633 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
634 goto out;
635
636 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
638 goto out;
639 if (loc->elf_ex.e_phnum < 1 ||
640 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
641 goto out;
642 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
643 retval = -ENOMEM;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700644 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (!elf_phdata)
646 goto out;
647
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700648 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
649 (char *)elf_phdata, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 if (retval != size) {
651 if (retval >= 0)
652 retval = -EIO;
653 goto out_free_ph;
654 }
655
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700656 files = current->files; /* Refcounted so ok */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 retval = unshare_files();
658 if (retval < 0)
659 goto out_free_ph;
660 if (files == current->files) {
661 put_files_struct(files);
662 files = NULL;
663 }
664
665 /* exec will make our files private anyway, but for the a.out
666 loader stuff we need to do it earlier */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 retval = get_unused_fd();
668 if (retval < 0)
669 goto out_free_fh;
670 get_file(bprm->file);
671 fd_install(elf_exec_fileno = retval, bprm->file);
672
673 elf_ppnt = elf_phdata;
674 elf_bss = 0;
675 elf_brk = 0;
676
677 start_code = ~0UL;
678 end_code = 0;
679 start_data = 0;
680 end_data = 0;
681
682 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
683 if (elf_ppnt->p_type == PT_INTERP) {
684 /* This is the program interpreter used for
685 * shared libraries - for now assume that this
686 * is an a.out format binary
687 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 retval = -ENOEXEC;
689 if (elf_ppnt->p_filesz > PATH_MAX ||
690 elf_ppnt->p_filesz < 2)
691 goto out_free_file;
692
693 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800694 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700695 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 if (!elf_interpreter)
697 goto out_free_file;
698
699 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700700 elf_interpreter,
701 elf_ppnt->p_filesz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 if (retval != elf_ppnt->p_filesz) {
703 if (retval >= 0)
704 retval = -EIO;
705 goto out_free_interp;
706 }
707 /* make sure path is NULL terminated */
708 retval = -ENOEXEC;
709 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
710 goto out_free_interp;
711
712 /* If the program interpreter is one of these two,
713 * then assume an iBCS2 image. Otherwise assume
714 * a native linux image.
715 */
716 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
717 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
718 ibcs2_interpreter = 1;
719
720 /*
721 * The early SET_PERSONALITY here is so that the lookup
722 * for the interpreter happens in the namespace of the
723 * to-be-execed image. SET_PERSONALITY can select an
724 * alternate root.
725 *
726 * However, SET_PERSONALITY is NOT allowed to switch
727 * this task into the new images's memory mapping
728 * policy - that is, TASK_SIZE must still evaluate to
729 * that which is appropriate to the execing application.
730 * This is because exit_mmap() needs to have TASK_SIZE
731 * evaluate to the size of the old image.
732 *
733 * So if (say) a 64-bit application is execing a 32-bit
734 * application it is the architecture's responsibility
735 * to defer changing the value of TASK_SIZE until the
736 * switch really is going to happen - do this in
737 * flush_thread(). - akpm
738 */
739 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
740
741 interpreter = open_exec(elf_interpreter);
742 retval = PTR_ERR(interpreter);
743 if (IS_ERR(interpreter))
744 goto out_free_interp;
Alexey Dobriyan1fb84492007-01-26 00:57:16 -0800745
746 /*
747 * If the binary is not readable then enforce
748 * mm->dumpable = 0 regardless of the interpreter's
749 * permissions.
750 */
751 if (file_permission(interpreter, MAY_READ) < 0)
752 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
753
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700754 retval = kernel_read(interpreter, 0, bprm->buf,
755 BINPRM_BUF_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (retval != BINPRM_BUF_SIZE) {
757 if (retval >= 0)
758 retval = -EIO;
759 goto out_free_dentry;
760 }
761
762 /* Get the exec headers */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700763 loc->interp_ex = *((struct exec *)bprm->buf);
764 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 break;
766 }
767 elf_ppnt++;
768 }
769
770 elf_ppnt = elf_phdata;
771 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
772 if (elf_ppnt->p_type == PT_GNU_STACK) {
773 if (elf_ppnt->p_flags & PF_X)
774 executable_stack = EXSTACK_ENABLE_X;
775 else
776 executable_stack = EXSTACK_DISABLE_X;
777 break;
778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
780 /* Some simple consistency checks for the interpreter */
781 if (elf_interpreter) {
782 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
783
784 /* Now figure out which format our binary is */
785 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
786 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
787 (N_MAGIC(loc->interp_ex) != QMAGIC))
788 interpreter_type = INTERPRETER_ELF;
789
790 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
791 interpreter_type &= ~INTERPRETER_ELF;
792
793 retval = -ELIBBAD;
794 if (!interpreter_type)
795 goto out_free_dentry;
796
797 /* Make sure only one type was selected */
798 if ((interpreter_type & INTERPRETER_ELF) &&
799 interpreter_type != INTERPRETER_ELF) {
800 // FIXME - ratelimit this before re-enabling
801 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
802 interpreter_type = INTERPRETER_ELF;
803 }
804 /* Verify the interpreter has a valid arch */
805 if ((interpreter_type == INTERPRETER_ELF) &&
806 !elf_check_arch(&loc->interp_elf_ex))
807 goto out_free_dentry;
808 } else {
809 /* Executables without an interpreter also need a personality */
810 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
811 }
812
813 /* OK, we are done with that, now set up the arg stuff,
814 and then start this sucker up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
816 char *passed_p = passed_fileno;
817 sprintf(passed_fileno, "%d", elf_exec_fileno);
818
819 if (elf_interpreter) {
820 retval = copy_strings_kernel(1, &passed_p, bprm);
821 if (retval)
822 goto out_free_dentry;
823 bprm->argc++;
824 }
825 }
826
827 /* Flush all traces of the currently running executable */
828 retval = flush_old_exec(bprm);
829 if (retval)
830 goto out_free_dentry;
831
832 /* Discard our unneeded old files struct */
833 if (files) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 put_files_struct(files);
835 files = NULL;
836 }
837
838 /* OK, This is the point of no return */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 current->flags &= ~PF_FORKNOEXEC;
840 current->mm->def_flags = def_flags;
841
842 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
843 may depend on the personality. */
844 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
845 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
846 current->personality |= READ_IMPLIES_EXEC;
847
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700848 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 current->flags |= PF_RANDOMIZE;
850 arch_pick_mmap_layout(current->mm);
851
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700855 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
857 executable_stack);
858 if (retval < 0) {
859 send_sig(SIGKILL, current, 0);
860 goto out_free_dentry;
861 }
862
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 current->mm->start_stack = bprm->p;
864
865 /* Now we do a little grungy work by mmaping the ELF image into
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700866 the correct location in memory. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700867 for(i = 0, elf_ppnt = elf_phdata;
868 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 int elf_prot = 0, elf_flags;
870 unsigned long k, vaddr;
871
872 if (elf_ppnt->p_type != PT_LOAD)
873 continue;
874
875 if (unlikely (elf_brk > elf_bss)) {
876 unsigned long nbyte;
877
878 /* There was a PT_LOAD segment with p_memsz > p_filesz
879 before this one. Map anonymous pages, if needed,
880 and clear the area. */
881 retval = set_brk (elf_bss + load_bias,
882 elf_brk + load_bias);
883 if (retval) {
884 send_sig(SIGKILL, current, 0);
885 goto out_free_dentry;
886 }
887 nbyte = ELF_PAGEOFFSET(elf_bss);
888 if (nbyte) {
889 nbyte = ELF_MIN_ALIGN - nbyte;
890 if (nbyte > elf_brk - elf_bss)
891 nbyte = elf_brk - elf_bss;
892 if (clear_user((void __user *)elf_bss +
893 load_bias, nbyte)) {
894 /*
895 * This bss-zeroing can fail if the ELF
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700896 * file specifies odd protections. So
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 * we don't check the return value
898 */
899 }
900 }
901 }
902
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700903 if (elf_ppnt->p_flags & PF_R)
904 elf_prot |= PROT_READ;
905 if (elf_ppnt->p_flags & PF_W)
906 elf_prot |= PROT_WRITE;
907 if (elf_ppnt->p_flags & PF_X)
908 elf_prot |= PROT_EXEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700910 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 vaddr = elf_ppnt->p_vaddr;
913 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
914 elf_flags |= MAP_FIXED;
915 } else if (loc->elf_ex.e_type == ET_DYN) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700916 /* Try and get dynamic programs out of the way of the
917 * default mmap base, as well as whatever program they
918 * might try to exec. This is because the brk will
919 * follow the loader, and is not movable. */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700920#ifdef CONFIG_X86
921 load_bias = 0;
922#else
Linus Torvalds90cb28e2007-01-06 13:28:21 -0800923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700924#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
926
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700928 elf_prot, elf_flags,0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 if (BAD_ADDR(error)) {
930 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f2512007-05-08 00:31:57 -0700931 retval = IS_ERR((void *)error) ?
932 PTR_ERR((void*)error) : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 goto out_free_dentry;
934 }
935
936 if (!load_addr_set) {
937 load_addr_set = 1;
938 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
939 if (loc->elf_ex.e_type == ET_DYN) {
940 load_bias += error -
941 ELF_PAGESTART(load_bias + vaddr);
942 load_addr += load_bias;
943 reloc_func_desc = load_bias;
944 }
945 }
946 k = elf_ppnt->p_vaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700947 if (k < start_code)
948 start_code = k;
949 if (start_data < k)
950 start_data = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 /*
953 * Check to see if the section's size will overflow the
954 * allowed task size. Note that p_filesz must always be
955 * <= p_memsz so it is only necessary to check p_memsz.
956 */
Chuck Ebbertce510592006-07-03 00:24:14 -0700957 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 elf_ppnt->p_memsz > TASK_SIZE ||
959 TASK_SIZE - elf_ppnt->p_memsz < k) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700960 /* set_brk can never work. Avoid overflows. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f2512007-05-08 00:31:57 -0700962 retval = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 goto out_free_dentry;
964 }
965
966 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
967
968 if (k > elf_bss)
969 elf_bss = k;
970 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 end_code = k;
972 if (end_data < k)
973 end_data = k;
974 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
975 if (k > elf_brk)
976 elf_brk = k;
977 }
978
979 loc->elf_ex.e_entry += load_bias;
980 elf_bss += load_bias;
981 elf_brk += load_bias;
982 start_code += load_bias;
983 end_code += load_bias;
984 start_data += load_bias;
985 end_data += load_bias;
986
987 /* Calling set_brk effectively mmaps the pages that we need
988 * for the bss and break sections. We must do this before
989 * mapping in the interpreter, to make sure it doesn't wind
990 * up getting placed where the bss needs to go.
991 */
992 retval = set_brk(elf_bss, elf_brk);
993 if (retval) {
994 send_sig(SIGKILL, current, 0);
995 goto out_free_dentry;
996 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700997 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 send_sig(SIGSEGV, current, 0);
999 retval = -EFAULT; /* Nobody gets to see this, but.. */
1000 goto out_free_dentry;
1001 }
1002
1003 if (elf_interpreter) {
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001004 if (interpreter_type == INTERPRETER_AOUT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 elf_entry = load_aout_interp(&loc->interp_ex,
1006 interpreter);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001007 } else {
Andrew Morton4d3b5732007-07-15 23:41:03 -07001008 unsigned long uninitialized_var(interp_map_addr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 interpreter,
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001012 &interp_map_addr,
1013 load_bias);
1014 if (!BAD_ADDR(elf_entry)) {
1015 /*
1016 * load_elf_interp() returns relocation
1017 * adjustment
1018 */
1019 interp_load_addr = elf_entry;
1020 elf_entry += loc->interp_elf_ex.e_entry;
1021 }
1022 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 if (BAD_ADDR(elf_entry)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 force_sig(SIGSEGV, current);
Chuck Ebbertce510592006-07-03 00:24:14 -07001025 retval = IS_ERR((void *)elf_entry) ?
1026 (int)elf_entry : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 goto out_free_dentry;
1028 }
1029 reloc_func_desc = interp_load_addr;
1030
1031 allow_write_access(interpreter);
1032 fput(interpreter);
1033 kfree(elf_interpreter);
1034 } else {
1035 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001036 if (BAD_ADDR(elf_entry)) {
Chuck Ebbertce510592006-07-03 00:24:14 -07001037 force_sig(SIGSEGV, current);
1038 retval = -EINVAL;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001039 goto out_free_dentry;
1040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042
1043 kfree(elf_phdata);
1044
1045 if (interpreter_type != INTERPRETER_AOUT)
1046 sys_close(elf_exec_fileno);
1047
1048 set_binfmt(&elf_format);
1049
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001050#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1051 retval = arch_setup_additional_pages(bprm, executable_stack);
1052 if (retval < 0) {
1053 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baff2005-04-28 15:17:19 -07001054 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001055 }
1056#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1057
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 compute_creds(bprm);
1059 current->flags &= ~PF_FORKNOEXEC;
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001060 retval = create_elf_tables(bprm, &loc->elf_ex,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001061 (interpreter_type == INTERPRETER_AOUT),
1062 load_addr, interp_load_addr);
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1066 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 /* N.B. passed_fileno might not be initialized? */
1068 if (interpreter_type == INTERPRETER_AOUT)
1069 current->mm->arg_start += strlen(passed_fileno) + 1;
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1075
1076 if (current->personality & MMAP_PAGE_ZERO) {
1077 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1078 and some applications "depend" upon this behavior.
1079 Since we do not have the power to recompile these, we
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001080 emulate the SVr4 behavior. Sigh. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 down_write(&current->mm->mmap_sem);
1082 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1083 MAP_FIXED | MAP_PRIVATE, 0);
1084 up_write(&current->mm->mmap_sem);
1085 }
1086
1087#ifdef ELF_PLAT_INIT
1088 /*
1089 * The ABI may specify that certain registers be set up in special
1090 * ways (on i386 %edx is the address of a DT_FINI function, for
1091 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1092 * that the e_entry field is the address of the function descriptor
1093 * for the startup routine, rather than the address of the startup
1094 * routine itself. This macro performs whatever initialization to
1095 * the regs structure is required as well as any relocations to the
1096 * function descriptor entries when executing dynamically links apps.
1097 */
1098 ELF_PLAT_INIT(regs, reloc_func_desc);
1099#endif
1100
1101 start_thread(regs, elf_entry, bprm->p);
1102 if (unlikely(current->ptrace & PT_PTRACED)) {
1103 if (current->ptrace & PT_TRACE_EXEC)
1104 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1105 else
1106 send_sig(SIGTRAP, current, 0);
1107 }
1108 retval = 0;
1109out:
1110 kfree(loc);
1111out_ret:
1112 return retval;
1113
1114 /* error cleanup */
1115out_free_dentry:
1116 allow_write_access(interpreter);
1117 if (interpreter)
1118 fput(interpreter);
1119out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001120 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121out_free_file:
1122 sys_close(elf_exec_fileno);
1123out_free_fh:
Kirill Korotaev3b9b8ab2006-09-29 02:00:05 -07001124 if (files)
1125 reset_files_struct(current, files);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126out_free_ph:
1127 kfree(elf_phdata);
1128 goto out;
1129}
1130
1131/* This is really simpleminded and specialized - we are loading an
1132 a.out library that is given an ELF header. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133static int load_elf_library(struct file *file)
1134{
1135 struct elf_phdr *elf_phdata;
1136 struct elf_phdr *eppnt;
1137 unsigned long elf_bss, bss, len;
1138 int retval, error, i, j;
1139 struct elfhdr elf_ex;
1140
1141 error = -ENOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001142 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 if (retval != sizeof(elf_ex))
1144 goto out;
1145
1146 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1147 goto out;
1148
1149 /* First of all, some simple consistency checks */
1150 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001151 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 goto out;
1153
1154 /* Now read in all of the header information */
1155
1156 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1157 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1158
1159 error = -ENOMEM;
1160 elf_phdata = kmalloc(j, GFP_KERNEL);
1161 if (!elf_phdata)
1162 goto out;
1163
1164 eppnt = elf_phdata;
1165 error = -ENOEXEC;
1166 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1167 if (retval != j)
1168 goto out_free_ph;
1169
1170 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1171 if ((eppnt + i)->p_type == PT_LOAD)
1172 j++;
1173 if (j != 1)
1174 goto out_free_ph;
1175
1176 while (eppnt->p_type != PT_LOAD)
1177 eppnt++;
1178
1179 /* Now use mmap to map the library into memory. */
1180 down_write(&current->mm->mmap_sem);
1181 error = do_mmap(file,
1182 ELF_PAGESTART(eppnt->p_vaddr),
1183 (eppnt->p_filesz +
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1185 PROT_READ | PROT_WRITE | PROT_EXEC,
1186 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1187 (eppnt->p_offset -
1188 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1189 up_write(&current->mm->mmap_sem);
1190 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1191 goto out_free_ph;
1192
1193 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1194 if (padzero(elf_bss)) {
1195 error = -EFAULT;
1196 goto out_free_ph;
1197 }
1198
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001199 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1200 ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 bss = eppnt->p_memsz + eppnt->p_vaddr;
1202 if (bss > len) {
1203 down_write(&current->mm->mmap_sem);
1204 do_brk(len, bss - len);
1205 up_write(&current->mm->mmap_sem);
1206 }
1207 error = 0;
1208
1209out_free_ph:
1210 kfree(elf_phdata);
1211out:
1212 return error;
1213}
1214
1215/*
1216 * Note that some platforms still use traditional core dumps and not
1217 * the ELF core dump. Each platform can select it as appropriate.
1218 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001219#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
1221/*
1222 * ELF core dumper
1223 *
1224 * Modelled on fs/exec.c:aout_core_dump()
1225 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1226 */
1227/*
1228 * These are the only things you should do on a core-file: use only these
1229 * functions to write out all the necessary info.
1230 */
1231static int dump_write(struct file *file, const void *addr, int nr)
1232{
1233 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1234}
1235
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001236static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237{
Andi Kleend025c9d2006-09-30 23:29:28 -07001238 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
Petr Vandrovec7f14daa2006-10-13 04:13:16 +02001239 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 return 0;
Andi Kleend025c9d2006-09-30 23:29:28 -07001241 } else {
1242 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1243 if (!buf)
1244 return 0;
1245 while (off > 0) {
1246 unsigned long n = off;
1247 if (n > PAGE_SIZE)
1248 n = PAGE_SIZE;
1249 if (!dump_write(file, buf, n))
1250 return 0;
1251 off -= n;
1252 }
1253 free_page((unsigned long)buf);
1254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 return 1;
1256}
1257
1258/*
1259 * Decide whether a segment is worth dumping; default is yes to be
1260 * sure (missing info is worse than too much; etc).
1261 * Personally I'd include everything, and use the coredump limit...
1262 *
1263 * I think we should skip something. But I am not sure how. H.J.
1264 */
1265static int maydump(struct vm_area_struct *vma)
1266{
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001267 /* The vma can be set up to tell us the answer directly. */
1268 if (vma->vm_flags & VM_ALWAYSDUMP)
1269 return 1;
1270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 /* Do not dump I/O mapped devices or special mappings */
1272 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1273 return 0;
1274
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001275 /* Dump shared memory only if mapped from an anonymous file. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 if (vma->vm_flags & VM_SHARED)
Josef "Jeff" Sipek0f7fc9e2006-12-08 02:36:35 -08001277 return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
1279 /* If it hasn't been written to, don't write it out */
1280 if (!vma->anon_vma)
1281 return 0;
1282
1283 return 1;
1284}
1285
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286/* An ELF note in memory */
1287struct memelfnote
1288{
1289 const char *name;
1290 int type;
1291 unsigned int datasz;
1292 void *data;
1293};
1294
1295static int notesize(struct memelfnote *en)
1296{
1297 int sz;
1298
1299 sz = sizeof(struct elf_note);
1300 sz += roundup(strlen(en->name) + 1, 4);
1301 sz += roundup(en->datasz, 4);
1302
1303 return sz;
1304}
1305
Andi Kleend025c9d2006-09-30 23:29:28 -07001306#define DUMP_WRITE(addr, nr, foffset) \
1307 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
Andi Kleend025c9d2006-09-30 23:29:28 -07001309static int alignfile(struct file *file, loff_t *foffset)
1310{
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001311 static const char buf[4] = { 0, };
Andi Kleend025c9d2006-09-30 23:29:28 -07001312 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1313 return 1;
1314}
1315
1316static int writenote(struct memelfnote *men, struct file *file,
1317 loff_t *foffset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
1319 struct elf_note en;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 en.n_namesz = strlen(men->name) + 1;
1321 en.n_descsz = men->datasz;
1322 en.n_type = men->type;
1323
Andi Kleend025c9d2006-09-30 23:29:28 -07001324 DUMP_WRITE(&en, sizeof(en), foffset);
1325 DUMP_WRITE(men->name, en.n_namesz, foffset);
1326 if (!alignfile(file, foffset))
1327 return 0;
1328 DUMP_WRITE(men->data, men->datasz, foffset);
1329 if (!alignfile(file, foffset))
1330 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
1332 return 1;
1333}
1334#undef DUMP_WRITE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335
1336#define DUMP_WRITE(addr, nr) \
1337 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1338 goto end_coredump;
1339#define DUMP_SEEK(off) \
1340 if (!dump_seek(file, (off))) \
1341 goto end_coredump;
1342
Arjan van de Ven858119e2006-01-14 13:20:43 -08001343static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344{
1345 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1346 elf->e_ident[EI_CLASS] = ELF_CLASS;
1347 elf->e_ident[EI_DATA] = ELF_DATA;
1348 elf->e_ident[EI_VERSION] = EV_CURRENT;
1349 elf->e_ident[EI_OSABI] = ELF_OSABI;
1350 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1351
1352 elf->e_type = ET_CORE;
1353 elf->e_machine = ELF_ARCH;
1354 elf->e_version = EV_CURRENT;
1355 elf->e_entry = 0;
1356 elf->e_phoff = sizeof(struct elfhdr);
1357 elf->e_shoff = 0;
1358 elf->e_flags = ELF_CORE_EFLAGS;
1359 elf->e_ehsize = sizeof(struct elfhdr);
1360 elf->e_phentsize = sizeof(struct elf_phdr);
1361 elf->e_phnum = segs;
1362 elf->e_shentsize = 0;
1363 elf->e_shnum = 0;
1364 elf->e_shstrndx = 0;
1365 return;
1366}
1367
Andrew Morton8d6b5eee2006-09-25 23:32:04 -07001368static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369{
1370 phdr->p_type = PT_NOTE;
1371 phdr->p_offset = offset;
1372 phdr->p_vaddr = 0;
1373 phdr->p_paddr = 0;
1374 phdr->p_filesz = sz;
1375 phdr->p_memsz = 0;
1376 phdr->p_flags = 0;
1377 phdr->p_align = 0;
1378 return;
1379}
1380
1381static void fill_note(struct memelfnote *note, const char *name, int type,
1382 unsigned int sz, void *data)
1383{
1384 note->name = name;
1385 note->type = type;
1386 note->datasz = sz;
1387 note->data = data;
1388 return;
1389}
1390
1391/*
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001392 * fill up all the fields in prstatus from the given task struct, except
1393 * registers which need to be filled up separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 */
1395static void fill_prstatus(struct elf_prstatus *prstatus,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001396 struct task_struct *p, long signr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397{
1398 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1399 prstatus->pr_sigpend = p->pending.signal.sig[0];
1400 prstatus->pr_sighold = p->blocked.sig[0];
1401 prstatus->pr_pid = p->pid;
1402 prstatus->pr_ppid = p->parent->pid;
1403 prstatus->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001404 prstatus->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 if (thread_group_leader(p)) {
1406 /*
1407 * This is the record for the group leader. Add in the
1408 * cumulative times of previous dead threads. This total
1409 * won't include the time of each live thread whose state
1410 * is included in the core dump. The final total reported
1411 * to our parent process when it calls wait4 will include
1412 * those sums as well as the little bit more time it takes
1413 * this and each other thread to finish dying after the
1414 * core dump synchronization phase.
1415 */
1416 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1417 &prstatus->pr_utime);
1418 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1419 &prstatus->pr_stime);
1420 } else {
1421 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1422 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1423 }
1424 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1425 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1426}
1427
1428static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1429 struct mm_struct *mm)
1430{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001431 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432
1433 /* first copy the parameters from user space */
1434 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1435
1436 len = mm->arg_end - mm->arg_start;
1437 if (len >= ELF_PRARGSZ)
1438 len = ELF_PRARGSZ-1;
1439 if (copy_from_user(&psinfo->pr_psargs,
1440 (const char __user *)mm->arg_start, len))
1441 return -EFAULT;
1442 for(i = 0; i < len; i++)
1443 if (psinfo->pr_psargs[i] == 0)
1444 psinfo->pr_psargs[i] = ' ';
1445 psinfo->pr_psargs[len] = 0;
1446
1447 psinfo->pr_pid = p->pid;
1448 psinfo->pr_ppid = p->parent->pid;
1449 psinfo->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001450 psinfo->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451
1452 i = p->state ? ffz(~p->state) + 1 : 0;
1453 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001454 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1456 psinfo->pr_nice = task_nice(p);
1457 psinfo->pr_flag = p->flags;
1458 SET_UID(psinfo->pr_uid, p->uid);
1459 SET_GID(psinfo->pr_gid, p->gid);
1460 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1461
1462 return 0;
1463}
1464
1465/* Here is the structure in which status of each thread is captured. */
1466struct elf_thread_status
1467{
1468 struct list_head list;
1469 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1470 elf_fpregset_t fpu; /* NT_PRFPREG */
1471 struct task_struct *thread;
1472#ifdef ELF_CORE_COPY_XFPREGS
1473 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1474#endif
1475 struct memelfnote notes[3];
1476 int num_notes;
1477};
1478
1479/*
1480 * In order to add the specific thread information for the elf file format,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001481 * we need to keep a linked list of every threads pr_status and then create
1482 * a single section for them in the final core file.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 */
1484static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1485{
1486 int sz = 0;
1487 struct task_struct *p = t->thread;
1488 t->num_notes = 0;
1489
1490 fill_prstatus(&t->prstatus, p, signr);
1491 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1492
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001493 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1494 &(t->prstatus));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 t->num_notes++;
1496 sz += notesize(&t->notes[0]);
1497
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001498 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1499 &t->fpu))) {
1500 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1501 &(t->fpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 t->num_notes++;
1503 sz += notesize(&t->notes[1]);
1504 }
1505
1506#ifdef ELF_CORE_COPY_XFPREGS
1507 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001508 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1509 &t->xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 t->num_notes++;
1511 sz += notesize(&t->notes[2]);
1512 }
1513#endif
1514 return sz;
1515}
1516
Roland McGrathf47aef52007-01-26 00:56:49 -08001517static struct vm_area_struct *first_vma(struct task_struct *tsk,
1518 struct vm_area_struct *gate_vma)
1519{
1520 struct vm_area_struct *ret = tsk->mm->mmap;
1521
1522 if (ret)
1523 return ret;
1524 return gate_vma;
1525}
1526/*
1527 * Helper function for iterating across a vma list. It ensures that the caller
1528 * will visit `gate_vma' prior to terminating the search.
1529 */
1530static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1531 struct vm_area_struct *gate_vma)
1532{
1533 struct vm_area_struct *ret;
1534
1535 ret = this_vma->vm_next;
1536 if (ret)
1537 return ret;
1538 if (this_vma == gate_vma)
1539 return NULL;
1540 return gate_vma;
1541}
1542
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543/*
1544 * Actual dumper
1545 *
1546 * This is a two-pass process; first we find the offsets of the bits,
1547 * and then they are actually written out. If we run out of core limit
1548 * we just truncate.
1549 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001550static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551{
1552#define NUM_NOTES 6
1553 int has_dumped = 0;
1554 mm_segment_t fs;
1555 int segs;
1556 size_t size = 0;
1557 int i;
Roland McGrathf47aef52007-01-26 00:56:49 -08001558 struct vm_area_struct *vma, *gate_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 struct elfhdr *elf = NULL;
Andi Kleend025c9d2006-09-30 23:29:28 -07001560 loff_t offset = 0, dataoff, foffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1562 int numnote;
1563 struct memelfnote *notes = NULL;
1564 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1565 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1566 struct task_struct *g, *p;
1567 LIST_HEAD(thread_list);
1568 struct list_head *t;
1569 elf_fpregset_t *fpu = NULL;
1570#ifdef ELF_CORE_COPY_XFPREGS
1571 elf_fpxregset_t *xfpu = NULL;
1572#endif
1573 int thread_status_size = 0;
1574 elf_addr_t *auxv;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001575#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1576 int extra_notes_size;
1577#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578
1579 /*
1580 * We no longer stop all VM operations.
1581 *
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001582 * This is because those proceses that could possibly change map_count
1583 * or the mmap / vma pages are now blocked in do_exit on current
1584 * finishing this core dump.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 *
1586 * Only ptrace can touch these memory addresses, but it doesn't change
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001587 * the map_count or the pages allocated. So no possibility of crashing
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 * exists while dumping the mm->vm_next areas to the core file.
1589 */
1590
1591 /* alloc memory for large data structures: too large to be on stack */
1592 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1593 if (!elf)
1594 goto cleanup;
1595 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1596 if (!prstatus)
1597 goto cleanup;
1598 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1599 if (!psinfo)
1600 goto cleanup;
1601 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1602 if (!notes)
1603 goto cleanup;
1604 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1605 if (!fpu)
1606 goto cleanup;
1607#ifdef ELF_CORE_COPY_XFPREGS
1608 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1609 if (!xfpu)
1610 goto cleanup;
1611#endif
1612
1613 if (signr) {
1614 struct elf_thread_status *tmp;
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001615 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 do_each_thread(g,p)
1617 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001618 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 if (!tmp) {
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001620 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 goto cleanup;
1622 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 tmp->thread = p;
1624 list_add(&tmp->list, &thread_list);
1625 }
1626 while_each_thread(g,p);
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001627 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 list_for_each(t, &thread_list) {
1629 struct elf_thread_status *tmp;
1630 int sz;
1631
1632 tmp = list_entry(t, struct elf_thread_status, list);
1633 sz = elf_dump_thread_status(signr, tmp);
1634 thread_status_size += sz;
1635 }
1636 }
1637 /* now collect the dump for the current */
1638 memset(prstatus, 0, sizeof(*prstatus));
1639 fill_prstatus(prstatus, current, signr);
1640 elf_core_copy_regs(&prstatus->pr_reg, regs);
1641
1642 segs = current->mm->map_count;
1643#ifdef ELF_CORE_EXTRA_PHDRS
1644 segs += ELF_CORE_EXTRA_PHDRS;
1645#endif
1646
Roland McGrathf47aef52007-01-26 00:56:49 -08001647 gate_vma = get_gate_vma(current);
1648 if (gate_vma != NULL)
1649 segs++;
1650
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651 /* Set up header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001652 fill_elf_header(elf, segs + 1); /* including notes section */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653
1654 has_dumped = 1;
1655 current->flags |= PF_DUMPCORE;
1656
1657 /*
1658 * Set up the notes in similar form to SVR4 core dumps made
1659 * with info from their /proc.
1660 */
1661
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001662 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 fill_psinfo(psinfo, current->group_leader, current->mm);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001664 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665
Eric W. Biedermana9289722005-10-30 15:02:08 -08001666 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001668 auxv = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669
1670 i = 0;
1671 do
1672 i += 2;
1673 while (auxv[i - 2] != AT_NULL);
1674 fill_note(&notes[numnote++], "CORE", NT_AUXV,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001675 i * sizeof(elf_addr_t), auxv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
1677 /* Try to dump the FPU. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001678 if ((prstatus->pr_fpvalid =
1679 elf_core_copy_task_fpregs(current, regs, fpu)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 fill_note(notes + numnote++,
1681 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1682#ifdef ELF_CORE_COPY_XFPREGS
1683 if (elf_core_copy_task_xfpregs(current, xfpu))
1684 fill_note(notes + numnote++,
1685 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1686#endif
1687
1688 fs = get_fs();
1689 set_fs(KERNEL_DS);
1690
1691 DUMP_WRITE(elf, sizeof(*elf));
1692 offset += sizeof(*elf); /* Elf header */
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001693 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1694 foffset = offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
1696 /* Write notes phdr entry */
1697 {
1698 struct elf_phdr phdr;
1699 int sz = 0;
1700
1701 for (i = 0; i < numnote; i++)
1702 sz += notesize(notes + i);
1703
1704 sz += thread_status_size;
1705
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001706#ifdef ELF_CORE_WRITE_EXTRA_NOTES
Michael Ellermanef7320e2007-07-06 02:39:49 -07001707 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1708 sz += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001709#endif
1710
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 fill_elf_note_phdr(&phdr, sz, offset);
1712 offset += sz;
1713 DUMP_WRITE(&phdr, sizeof(phdr));
1714 }
1715
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1717
1718 /* Write program headers for segments dump */
Roland McGrathf47aef52007-01-26 00:56:49 -08001719 for (vma = first_vma(current, gate_vma); vma != NULL;
1720 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 struct elf_phdr phdr;
1722 size_t sz;
1723
1724 sz = vma->vm_end - vma->vm_start;
1725
1726 phdr.p_type = PT_LOAD;
1727 phdr.p_offset = offset;
1728 phdr.p_vaddr = vma->vm_start;
1729 phdr.p_paddr = 0;
1730 phdr.p_filesz = maydump(vma) ? sz : 0;
1731 phdr.p_memsz = sz;
1732 offset += phdr.p_filesz;
1733 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001734 if (vma->vm_flags & VM_WRITE)
1735 phdr.p_flags |= PF_W;
1736 if (vma->vm_flags & VM_EXEC)
1737 phdr.p_flags |= PF_X;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 phdr.p_align = ELF_EXEC_PAGESIZE;
1739
1740 DUMP_WRITE(&phdr, sizeof(phdr));
1741 }
1742
1743#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1744 ELF_CORE_WRITE_EXTRA_PHDRS;
1745#endif
1746
1747 /* write out the notes section */
1748 for (i = 0; i < numnote; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001749 if (!writenote(notes + i, file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 goto end_coredump;
1751
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001752#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1753 ELF_CORE_WRITE_EXTRA_NOTES;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001754 foffset += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001755#endif
1756
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 /* write out the thread status notes section */
1758 list_for_each(t, &thread_list) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001759 struct elf_thread_status *tmp =
1760 list_entry(t, struct elf_thread_status, list);
1761
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 for (i = 0; i < tmp->num_notes; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001763 if (!writenote(&tmp->notes[i], file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 goto end_coredump;
1765 }
Andi Kleend025c9d2006-09-30 23:29:28 -07001766
1767 /* Align to page */
1768 DUMP_SEEK(dataoff - foffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
Roland McGrathf47aef52007-01-26 00:56:49 -08001770 for (vma = first_vma(current, gate_vma); vma != NULL;
1771 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 unsigned long addr;
1773
1774 if (!maydump(vma))
1775 continue;
1776
1777 for (addr = vma->vm_start;
1778 addr < vma->vm_end;
1779 addr += PAGE_SIZE) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001780 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 struct vm_area_struct *vma;
1782
1783 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1784 &page, &vma) <= 0) {
Andi Kleend025c9d2006-09-30 23:29:28 -07001785 DUMP_SEEK(PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786 } else {
1787 if (page == ZERO_PAGE(addr)) {
Brian Pomerantz03221702007-04-01 23:49:41 -07001788 if (!dump_seek(file, PAGE_SIZE)) {
1789 page_cache_release(page);
1790 goto end_coredump;
1791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 } else {
1793 void *kaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001794 flush_cache_page(vma, addr,
1795 page_to_pfn(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 kaddr = kmap(page);
1797 if ((size += PAGE_SIZE) > limit ||
1798 !dump_write(file, kaddr,
1799 PAGE_SIZE)) {
1800 kunmap(page);
1801 page_cache_release(page);
1802 goto end_coredump;
1803 }
1804 kunmap(page);
1805 }
1806 page_cache_release(page);
1807 }
1808 }
1809 }
1810
1811#ifdef ELF_CORE_WRITE_EXTRA_DATA
1812 ELF_CORE_WRITE_EXTRA_DATA;
1813#endif
1814
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815end_coredump:
1816 set_fs(fs);
1817
1818cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001819 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 struct list_head *tmp = thread_list.next;
1821 list_del(tmp);
1822 kfree(list_entry(tmp, struct elf_thread_status, list));
1823 }
1824
1825 kfree(elf);
1826 kfree(prstatus);
1827 kfree(psinfo);
1828 kfree(notes);
1829 kfree(fpu);
1830#ifdef ELF_CORE_COPY_XFPREGS
1831 kfree(xfpu);
1832#endif
1833 return has_dumped;
1834#undef NUM_NOTES
1835}
1836
1837#endif /* USE_ELF_CORE_DUMP */
1838
1839static int __init init_elf_binfmt(void)
1840{
1841 return register_binfmt(&elf_format);
1842}
1843
1844static void __exit exit_elf_binfmt(void)
1845{
1846 /* Remove the COFF and ELF loaders. */
1847 unregister_binfmt(&elf_format);
1848}
1849
1850core_initcall(init_elf_binfmt);
1851module_exit(exit_elf_binfmt);
1852MODULE_LICENSE("GPL");