Merge tag 'regulator-v3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50                                 int, int, unsigned long);
51
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump   NULL
66 #endif
67
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN   PAGE_SIZE
72 #endif
73
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
76 #endif
77
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81
82 static struct linux_binfmt elf_format = {
83         .module         = THIS_MODULE,
84         .load_binary    = load_elf_binary,
85         .load_shlib     = load_elf_library,
86         .core_dump      = elf_core_dump,
87         .min_coredump   = ELF_EXEC_PAGESIZE,
88 };
89
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94         start = ELF_PAGEALIGN(start);
95         end = ELF_PAGEALIGN(end);
96         if (end > start) {
97                 unsigned long addr;
98                 addr = vm_brk(start, end - start);
99                 if (BAD_ADDR(addr))
100                         return addr;
101         }
102         current->mm->start_brk = current->mm->brk = end;
103         return 0;
104 }
105
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113         unsigned long nbyte;
114
115         nbyte = ELF_PAGEOFFSET(elf_bss);
116         if (nbyte) {
117                 nbyte = ELF_MIN_ALIGN - nbyte;
118                 if (clear_user((void __user *) elf_bss, nbyte))
119                         return -EFAULT;
120         }
121         return 0;
122 }
123
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131         old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135         (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150                 unsigned long load_addr, unsigned long interp_load_addr)
151 {
152         unsigned long p = bprm->p;
153         int argc = bprm->argc;
154         int envc = bprm->envc;
155         elf_addr_t __user *argv;
156         elf_addr_t __user *envp;
157         elf_addr_t __user *sp;
158         elf_addr_t __user *u_platform;
159         elf_addr_t __user *u_base_platform;
160         elf_addr_t __user *u_rand_bytes;
161         const char *k_platform = ELF_PLATFORM;
162         const char *k_base_platform = ELF_BASE_PLATFORM;
163         unsigned char k_rand_bytes[16];
164         int items;
165         elf_addr_t *elf_info;
166         int ei_index = 0;
167         const struct cred *cred = current_cred();
168         struct vm_area_struct *vma;
169
170         /*
171          * In some cases (e.g. Hyper-Threading), we want to avoid L1
172          * evictions by the processes running on the same package. One
173          * thing we can do is to shuffle the initial stack for them.
174          */
175
176         p = arch_align_stack(p);
177
178         /*
179          * If this architecture has a platform capability string, copy it
180          * to userspace.  In some cases (Sparc), this info is impossible
181          * for userspace to get any other way, in others (i386) it is
182          * merely difficult.
183          */
184         u_platform = NULL;
185         if (k_platform) {
186                 size_t len = strlen(k_platform) + 1;
187
188                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_platform, k_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * If this architecture has a "base" platform capability
195          * string, copy it to userspace.
196          */
197         u_base_platform = NULL;
198         if (k_base_platform) {
199                 size_t len = strlen(k_base_platform) + 1;
200
201                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202                 if (__copy_to_user(u_base_platform, k_base_platform, len))
203                         return -EFAULT;
204         }
205
206         /*
207          * Generate 16 random bytes for userspace PRNG seeding.
208          */
209         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210         u_rand_bytes = (elf_addr_t __user *)
211                        STACK_ALLOC(p, sizeof(k_rand_bytes));
212         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213                 return -EFAULT;
214
215         /* Create the ELF interpreter info */
216         elf_info = (elf_addr_t *)current->mm->saved_auxv;
217         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219         do { \
220                 elf_info[ei_index++] = id; \
221                 elf_info[ei_index++] = val; \
222         } while (0)
223
224 #ifdef ARCH_DLINFO
225         /* 
226          * ARCH_DLINFO must come first so PPC can do its special alignment of
227          * AUXV.
228          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229          * ARCH_DLINFO changes
230          */
231         ARCH_DLINFO;
232 #endif
233         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239         NEW_AUX_ENT(AT_BASE, interp_load_addr);
240         NEW_AUX_ENT(AT_FLAGS, 0);
241         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252         if (k_platform) {
253                 NEW_AUX_ENT(AT_PLATFORM,
254                             (elf_addr_t)(unsigned long)u_platform);
255         }
256         if (k_base_platform) {
257                 NEW_AUX_ENT(AT_BASE_PLATFORM,
258                             (elf_addr_t)(unsigned long)u_base_platform);
259         }
260         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262         }
263 #undef NEW_AUX_ENT
264         /* AT_NULL is zero; clear the rest too */
265         memset(&elf_info[ei_index], 0,
266                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267
268         /* And advance past the AT_NULL entry.  */
269         ei_index += 2;
270
271         sp = STACK_ADD(p, ei_index);
272
273         items = (argc + 1) + (envc + 1) + 1;
274         bprm->p = STACK_ROUND(sp, items);
275
276         /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281         sp = (elf_addr_t __user *)bprm->p;
282 #endif
283
284
285         /*
286          * Grow the stack manually; some architectures have a limit on how
287          * far ahead a user-space access may be in order to grow the stack.
288          */
289         vma = find_extend_vma(current->mm, bprm->p);
290         if (!vma)
291                 return -EFAULT;
292
293         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294         if (__put_user(argc, sp++))
295                 return -EFAULT;
296         argv = sp;
297         envp = argv + argc + 1;
298
299         /* Populate argv and envp */
300         p = current->mm->arg_end = current->mm->arg_start;
301         while (argc-- > 0) {
302                 size_t len;
303                 if (__put_user((elf_addr_t)p, argv++))
304                         return -EFAULT;
305                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306                 if (!len || len > MAX_ARG_STRLEN)
307                         return -EINVAL;
308                 p += len;
309         }
310         if (__put_user(0, argv))
311                 return -EFAULT;
312         current->mm->arg_end = current->mm->env_start = p;
313         while (envc-- > 0) {
314                 size_t len;
315                 if (__put_user((elf_addr_t)p, envp++))
316                         return -EFAULT;
317                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318                 if (!len || len > MAX_ARG_STRLEN)
319                         return -EINVAL;
320                 p += len;
321         }
322         if (__put_user(0, envp))
323                 return -EFAULT;
324         current->mm->env_end = p;
325
326         /* Put the elf_info on the stack in the right place.  */
327         sp = (elf_addr_t __user *)envp + 1;
328         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329                 return -EFAULT;
330         return 0;
331 }
332
333 #ifndef elf_map
334
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336                 struct elf_phdr *eppnt, int prot, int type,
337                 unsigned long total_size)
338 {
339         unsigned long map_addr;
340         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342         addr = ELF_PAGESTART(addr);
343         size = ELF_PAGEALIGN(size);
344
345         /* mmap() will return -EINVAL if given a zero size, but a
346          * segment with zero filesize is perfectly valid */
347         if (!size)
348                 return addr;
349
350         /*
351         * total_size is the size of the ELF (interpreter) image.
352         * The _first_ mmap needs to know the full size, otherwise
353         * randomization might put this image into an overlapping
354         * position with the ELF binary image. (since size < total_size)
355         * So we first map the 'big' image - and unmap the remainder at
356         * the end. (which unmap is needed for ELF images with holes.)
357         */
358         if (total_size) {
359                 total_size = ELF_PAGEALIGN(total_size);
360                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361                 if (!BAD_ADDR(map_addr))
362                         vm_munmap(map_addr+size, total_size-size);
363         } else
364                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
365
366         return(map_addr);
367 }
368
369 #endif /* !elf_map */
370
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373         int i, first_idx = -1, last_idx = -1;
374
375         for (i = 0; i < nr; i++) {
376                 if (cmds[i].p_type == PT_LOAD) {
377                         last_idx = i;
378                         if (first_idx == -1)
379                                 first_idx = i;
380                 }
381         }
382         if (first_idx == -1)
383                 return 0;
384
385         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388
389
390 /* This is much more generalized than the library routine read function,
391    so we keep this separate.  Technically the library read function
392    is only provided so that we can read a.out libraries that have
393    an ELF header */
394
395 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
396                 struct file *interpreter, unsigned long *interp_map_addr,
397                 unsigned long no_base)
398 {
399         struct elf_phdr *elf_phdata;
400         struct elf_phdr *eppnt;
401         unsigned long load_addr = 0;
402         int load_addr_set = 0;
403         unsigned long last_bss = 0, elf_bss = 0;
404         unsigned long error = ~0UL;
405         unsigned long total_size;
406         int retval, i, size;
407
408         /* First of all, some simple consistency checks */
409         if (interp_elf_ex->e_type != ET_EXEC &&
410             interp_elf_ex->e_type != ET_DYN)
411                 goto out;
412         if (!elf_check_arch(interp_elf_ex))
413                 goto out;
414         if (!interpreter->f_op->mmap)
415                 goto out;
416
417         /*
418          * If the size of this structure has changed, then punt, since
419          * we will be doing the wrong thing.
420          */
421         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
422                 goto out;
423         if (interp_elf_ex->e_phnum < 1 ||
424                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
425                 goto out;
426
427         /* Now read in all of the header information */
428         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
429         if (size > ELF_MIN_ALIGN)
430                 goto out;
431         elf_phdata = kmalloc(size, GFP_KERNEL);
432         if (!elf_phdata)
433                 goto out;
434
435         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
436                              (char *)elf_phdata, size);
437         error = -EIO;
438         if (retval != size) {
439                 if (retval < 0)
440                         error = retval; 
441                 goto out_close;
442         }
443
444         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
445         if (!total_size) {
446                 error = -EINVAL;
447                 goto out_close;
448         }
449
450         eppnt = elf_phdata;
451         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
452                 if (eppnt->p_type == PT_LOAD) {
453                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
454                         int elf_prot = 0;
455                         unsigned long vaddr = 0;
456                         unsigned long k, map_addr;
457
458                         if (eppnt->p_flags & PF_R)
459                                 elf_prot = PROT_READ;
460                         if (eppnt->p_flags & PF_W)
461                                 elf_prot |= PROT_WRITE;
462                         if (eppnt->p_flags & PF_X)
463                                 elf_prot |= PROT_EXEC;
464                         vaddr = eppnt->p_vaddr;
465                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
466                                 elf_type |= MAP_FIXED;
467                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
468                                 load_addr = -vaddr;
469
470                         map_addr = elf_map(interpreter, load_addr + vaddr,
471                                         eppnt, elf_prot, elf_type, total_size);
472                         total_size = 0;
473                         if (!*interp_map_addr)
474                                 *interp_map_addr = map_addr;
475                         error = map_addr;
476                         if (BAD_ADDR(map_addr))
477                                 goto out_close;
478
479                         if (!load_addr_set &&
480                             interp_elf_ex->e_type == ET_DYN) {
481                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
482                                 load_addr_set = 1;
483                         }
484
485                         /*
486                          * Check to see if the section's size will overflow the
487                          * allowed task size. Note that p_filesz must always be
488                          * <= p_memsize so it's only necessary to check p_memsz.
489                          */
490                         k = load_addr + eppnt->p_vaddr;
491                         if (BAD_ADDR(k) ||
492                             eppnt->p_filesz > eppnt->p_memsz ||
493                             eppnt->p_memsz > TASK_SIZE ||
494                             TASK_SIZE - eppnt->p_memsz < k) {
495                                 error = -ENOMEM;
496                                 goto out_close;
497                         }
498
499                         /*
500                          * Find the end of the file mapping for this phdr, and
501                          * keep track of the largest address we see for this.
502                          */
503                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
504                         if (k > elf_bss)
505                                 elf_bss = k;
506
507                         /*
508                          * Do the same thing for the memory mapping - between
509                          * elf_bss and last_bss is the bss section.
510                          */
511                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
512                         if (k > last_bss)
513                                 last_bss = k;
514                 }
515         }
516
517         if (last_bss > elf_bss) {
518                 /*
519                  * Now fill out the bss section.  First pad the last page up
520                  * to the page boundary, and then perform a mmap to make sure
521                  * that there are zero-mapped pages up to and including the
522                  * last bss page.
523                  */
524                 if (padzero(elf_bss)) {
525                         error = -EFAULT;
526                         goto out_close;
527                 }
528
529                 /* What we have mapped so far */
530                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
531
532                 /* Map the last of the bss segment */
533                 error = vm_brk(elf_bss, last_bss - elf_bss);
534                 if (BAD_ADDR(error))
535                         goto out_close;
536         }
537
538         error = load_addr;
539
540 out_close:
541         kfree(elf_phdata);
542 out:
543         return error;
544 }
545
546 /*
547  * These are the functions used to load ELF style executables and shared
548  * libraries.  There is no binary dependent code anywhere else.
549  */
550
551 #ifndef STACK_RND_MASK
552 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
553 #endif
554
555 static unsigned long randomize_stack_top(unsigned long stack_top)
556 {
557         unsigned int random_variable = 0;
558
559         if ((current->flags & PF_RANDOMIZE) &&
560                 !(current->personality & ADDR_NO_RANDOMIZE)) {
561                 random_variable = get_random_int() & STACK_RND_MASK;
562                 random_variable <<= PAGE_SHIFT;
563         }
564 #ifdef CONFIG_STACK_GROWSUP
565         return PAGE_ALIGN(stack_top) + random_variable;
566 #else
567         return PAGE_ALIGN(stack_top) - random_variable;
568 #endif
569 }
570
571 static int load_elf_binary(struct linux_binprm *bprm)
572 {
573         struct file *interpreter = NULL; /* to shut gcc up */
574         unsigned long load_addr = 0, load_bias = 0;
575         int load_addr_set = 0;
576         char * elf_interpreter = NULL;
577         unsigned long error;
578         struct elf_phdr *elf_ppnt, *elf_phdata;
579         unsigned long elf_bss, elf_brk;
580         int retval, i;
581         unsigned int size;
582         unsigned long elf_entry;
583         unsigned long interp_load_addr = 0;
584         unsigned long start_code, end_code, start_data, end_data;
585         unsigned long reloc_func_desc __maybe_unused = 0;
586         int executable_stack = EXSTACK_DEFAULT;
587         struct pt_regs *regs = current_pt_regs();
588         struct {
589                 struct elfhdr elf_ex;
590                 struct elfhdr interp_elf_ex;
591         } *loc;
592
593         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594         if (!loc) {
595                 retval = -ENOMEM;
596                 goto out_ret;
597         }
598         
599         /* Get the exec-header */
600         loc->elf_ex = *((struct elfhdr *)bprm->buf);
601
602         retval = -ENOEXEC;
603         /* First of all, some simple consistency checks */
604         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605                 goto out;
606
607         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608                 goto out;
609         if (!elf_check_arch(&loc->elf_ex))
610                 goto out;
611         if (!bprm->file->f_op->mmap)
612                 goto out;
613
614         /* Now read in all of the header information */
615         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616                 goto out;
617         if (loc->elf_ex.e_phnum < 1 ||
618                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619                 goto out;
620         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621         retval = -ENOMEM;
622         elf_phdata = kmalloc(size, GFP_KERNEL);
623         if (!elf_phdata)
624                 goto out;
625
626         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627                              (char *)elf_phdata, size);
628         if (retval != size) {
629                 if (retval >= 0)
630                         retval = -EIO;
631                 goto out_free_ph;
632         }
633
634         elf_ppnt = elf_phdata;
635         elf_bss = 0;
636         elf_brk = 0;
637
638         start_code = ~0UL;
639         end_code = 0;
640         start_data = 0;
641         end_data = 0;
642
643         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
644                 if (elf_ppnt->p_type == PT_INTERP) {
645                         /* This is the program interpreter used for
646                          * shared libraries - for now assume that this
647                          * is an a.out format binary
648                          */
649                         retval = -ENOEXEC;
650                         if (elf_ppnt->p_filesz > PATH_MAX || 
651                             elf_ppnt->p_filesz < 2)
652                                 goto out_free_ph;
653
654                         retval = -ENOMEM;
655                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
656                                                   GFP_KERNEL);
657                         if (!elf_interpreter)
658                                 goto out_free_ph;
659
660                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
661                                              elf_interpreter,
662                                              elf_ppnt->p_filesz);
663                         if (retval != elf_ppnt->p_filesz) {
664                                 if (retval >= 0)
665                                         retval = -EIO;
666                                 goto out_free_interp;
667                         }
668                         /* make sure path is NULL terminated */
669                         retval = -ENOEXEC;
670                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
671                                 goto out_free_interp;
672
673                         interpreter = open_exec(elf_interpreter);
674                         retval = PTR_ERR(interpreter);
675                         if (IS_ERR(interpreter))
676                                 goto out_free_interp;
677
678                         /*
679                          * If the binary is not readable then enforce
680                          * mm->dumpable = 0 regardless of the interpreter's
681                          * permissions.
682                          */
683                         would_dump(bprm, interpreter);
684
685                         retval = kernel_read(interpreter, 0, bprm->buf,
686                                              BINPRM_BUF_SIZE);
687                         if (retval != BINPRM_BUF_SIZE) {
688                                 if (retval >= 0)
689                                         retval = -EIO;
690                                 goto out_free_dentry;
691                         }
692
693                         /* Get the exec headers */
694                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
695                         break;
696                 }
697                 elf_ppnt++;
698         }
699
700         elf_ppnt = elf_phdata;
701         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
702                 if (elf_ppnt->p_type == PT_GNU_STACK) {
703                         if (elf_ppnt->p_flags & PF_X)
704                                 executable_stack = EXSTACK_ENABLE_X;
705                         else
706                                 executable_stack = EXSTACK_DISABLE_X;
707                         break;
708                 }
709
710         /* Some simple consistency checks for the interpreter */
711         if (elf_interpreter) {
712                 retval = -ELIBBAD;
713                 /* Not an ELF interpreter */
714                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715                         goto out_free_dentry;
716                 /* Verify the interpreter has a valid arch */
717                 if (!elf_check_arch(&loc->interp_elf_ex))
718                         goto out_free_dentry;
719         }
720
721         /* Flush all traces of the currently running executable */
722         retval = flush_old_exec(bprm);
723         if (retval)
724                 goto out_free_dentry;
725
726         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
727            may depend on the personality.  */
728         SET_PERSONALITY(loc->elf_ex);
729         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
730                 current->personality |= READ_IMPLIES_EXEC;
731
732         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
733                 current->flags |= PF_RANDOMIZE;
734
735         setup_new_exec(bprm);
736
737         /* Do this so that we can load the interpreter, if need be.  We will
738            change some of these later */
739         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
740                                  executable_stack);
741         if (retval < 0) {
742                 send_sig(SIGKILL, current, 0);
743                 goto out_free_dentry;
744         }
745         
746         current->mm->start_stack = bprm->p;
747
748         /* Now we do a little grungy work by mmapping the ELF image into
749            the correct location in memory. */
750         for(i = 0, elf_ppnt = elf_phdata;
751             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
752                 int elf_prot = 0, elf_flags;
753                 unsigned long k, vaddr;
754
755                 if (elf_ppnt->p_type != PT_LOAD)
756                         continue;
757
758                 if (unlikely (elf_brk > elf_bss)) {
759                         unsigned long nbyte;
760                     
761                         /* There was a PT_LOAD segment with p_memsz > p_filesz
762                            before this one. Map anonymous pages, if needed,
763                            and clear the area.  */
764                         retval = set_brk(elf_bss + load_bias,
765                                          elf_brk + load_bias);
766                         if (retval) {
767                                 send_sig(SIGKILL, current, 0);
768                                 goto out_free_dentry;
769                         }
770                         nbyte = ELF_PAGEOFFSET(elf_bss);
771                         if (nbyte) {
772                                 nbyte = ELF_MIN_ALIGN - nbyte;
773                                 if (nbyte > elf_brk - elf_bss)
774                                         nbyte = elf_brk - elf_bss;
775                                 if (clear_user((void __user *)elf_bss +
776                                                         load_bias, nbyte)) {
777                                         /*
778                                          * This bss-zeroing can fail if the ELF
779                                          * file specifies odd protections. So
780                                          * we don't check the return value
781                                          */
782                                 }
783                         }
784                 }
785
786                 if (elf_ppnt->p_flags & PF_R)
787                         elf_prot |= PROT_READ;
788                 if (elf_ppnt->p_flags & PF_W)
789                         elf_prot |= PROT_WRITE;
790                 if (elf_ppnt->p_flags & PF_X)
791                         elf_prot |= PROT_EXEC;
792
793                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
794
795                 vaddr = elf_ppnt->p_vaddr;
796                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
797                         elf_flags |= MAP_FIXED;
798                 } else if (loc->elf_ex.e_type == ET_DYN) {
799                         /* Try and get dynamic programs out of the way of the
800                          * default mmap base, as well as whatever program they
801                          * might try to exec.  This is because the brk will
802                          * follow the loader, and is not movable.  */
803 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
804                         /* Memory randomization might have been switched off
805                          * in runtime via sysctl or explicit setting of
806                          * personality flags.
807                          * If that is the case, retain the original non-zero
808                          * load_bias value in order to establish proper
809                          * non-randomized mappings.
810                          */
811                         if (current->flags & PF_RANDOMIZE)
812                                 load_bias = 0;
813                         else
814                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
815 #else
816                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
817 #endif
818                 }
819
820                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
821                                 elf_prot, elf_flags, 0);
822                 if (BAD_ADDR(error)) {
823                         send_sig(SIGKILL, current, 0);
824                         retval = IS_ERR((void *)error) ?
825                                 PTR_ERR((void*)error) : -EINVAL;
826                         goto out_free_dentry;
827                 }
828
829                 if (!load_addr_set) {
830                         load_addr_set = 1;
831                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
832                         if (loc->elf_ex.e_type == ET_DYN) {
833                                 load_bias += error -
834                                              ELF_PAGESTART(load_bias + vaddr);
835                                 load_addr += load_bias;
836                                 reloc_func_desc = load_bias;
837                         }
838                 }
839                 k = elf_ppnt->p_vaddr;
840                 if (k < start_code)
841                         start_code = k;
842                 if (start_data < k)
843                         start_data = k;
844
845                 /*
846                  * Check to see if the section's size will overflow the
847                  * allowed task size. Note that p_filesz must always be
848                  * <= p_memsz so it is only necessary to check p_memsz.
849                  */
850                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
851                     elf_ppnt->p_memsz > TASK_SIZE ||
852                     TASK_SIZE - elf_ppnt->p_memsz < k) {
853                         /* set_brk can never work. Avoid overflows. */
854                         send_sig(SIGKILL, current, 0);
855                         retval = -EINVAL;
856                         goto out_free_dentry;
857                 }
858
859                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
860
861                 if (k > elf_bss)
862                         elf_bss = k;
863                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
864                         end_code = k;
865                 if (end_data < k)
866                         end_data = k;
867                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
868                 if (k > elf_brk)
869                         elf_brk = k;
870         }
871
872         loc->elf_ex.e_entry += load_bias;
873         elf_bss += load_bias;
874         elf_brk += load_bias;
875         start_code += load_bias;
876         end_code += load_bias;
877         start_data += load_bias;
878         end_data += load_bias;
879
880         /* Calling set_brk effectively mmaps the pages that we need
881          * for the bss and break sections.  We must do this before
882          * mapping in the interpreter, to make sure it doesn't wind
883          * up getting placed where the bss needs to go.
884          */
885         retval = set_brk(elf_bss, elf_brk);
886         if (retval) {
887                 send_sig(SIGKILL, current, 0);
888                 goto out_free_dentry;
889         }
890         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
891                 send_sig(SIGSEGV, current, 0);
892                 retval = -EFAULT; /* Nobody gets to see this, but.. */
893                 goto out_free_dentry;
894         }
895
896         if (elf_interpreter) {
897                 unsigned long interp_map_addr = 0;
898
899                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
900                                             interpreter,
901                                             &interp_map_addr,
902                                             load_bias);
903                 if (!IS_ERR((void *)elf_entry)) {
904                         /*
905                          * load_elf_interp() returns relocation
906                          * adjustment
907                          */
908                         interp_load_addr = elf_entry;
909                         elf_entry += loc->interp_elf_ex.e_entry;
910                 }
911                 if (BAD_ADDR(elf_entry)) {
912                         force_sig(SIGSEGV, current);
913                         retval = IS_ERR((void *)elf_entry) ?
914                                         (int)elf_entry : -EINVAL;
915                         goto out_free_dentry;
916                 }
917                 reloc_func_desc = interp_load_addr;
918
919                 allow_write_access(interpreter);
920                 fput(interpreter);
921                 kfree(elf_interpreter);
922         } else {
923                 elf_entry = loc->elf_ex.e_entry;
924                 if (BAD_ADDR(elf_entry)) {
925                         force_sig(SIGSEGV, current);
926                         retval = -EINVAL;
927                         goto out_free_dentry;
928                 }
929         }
930
931         kfree(elf_phdata);
932
933         set_binfmt(&elf_format);
934
935 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
936         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
937         if (retval < 0) {
938                 send_sig(SIGKILL, current, 0);
939                 goto out;
940         }
941 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
942
943         install_exec_creds(bprm);
944         retval = create_elf_tables(bprm, &loc->elf_ex,
945                           load_addr, interp_load_addr);
946         if (retval < 0) {
947                 send_sig(SIGKILL, current, 0);
948                 goto out;
949         }
950         /* N.B. passed_fileno might not be initialized? */
951         current->mm->end_code = end_code;
952         current->mm->start_code = start_code;
953         current->mm->start_data = start_data;
954         current->mm->end_data = end_data;
955         current->mm->start_stack = bprm->p;
956
957 #ifdef arch_randomize_brk
958         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
959                 current->mm->brk = current->mm->start_brk =
960                         arch_randomize_brk(current->mm);
961 #ifdef CONFIG_COMPAT_BRK
962                 current->brk_randomized = 1;
963 #endif
964         }
965 #endif
966
967         if (current->personality & MMAP_PAGE_ZERO) {
968                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
969                    and some applications "depend" upon this behavior.
970                    Since we do not have the power to recompile these, we
971                    emulate the SVr4 behavior. Sigh. */
972                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
973                                 MAP_FIXED | MAP_PRIVATE, 0);
974         }
975
976 #ifdef ELF_PLAT_INIT
977         /*
978          * The ABI may specify that certain registers be set up in special
979          * ways (on i386 %edx is the address of a DT_FINI function, for
980          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
981          * that the e_entry field is the address of the function descriptor
982          * for the startup routine, rather than the address of the startup
983          * routine itself.  This macro performs whatever initialization to
984          * the regs structure is required as well as any relocations to the
985          * function descriptor entries when executing dynamically links apps.
986          */
987         ELF_PLAT_INIT(regs, reloc_func_desc);
988 #endif
989
990         start_thread(regs, elf_entry, bprm->p);
991         retval = 0;
992 out:
993         kfree(loc);
994 out_ret:
995         return retval;
996
997         /* error cleanup */
998 out_free_dentry:
999         allow_write_access(interpreter);
1000         if (interpreter)
1001                 fput(interpreter);
1002 out_free_interp:
1003         kfree(elf_interpreter);
1004 out_free_ph:
1005         kfree(elf_phdata);
1006         goto out;
1007 }
1008
1009 #ifdef CONFIG_USELIB
1010 /* This is really simpleminded and specialized - we are loading an
1011    a.out library that is given an ELF header. */
1012 static int load_elf_library(struct file *file)
1013 {
1014         struct elf_phdr *elf_phdata;
1015         struct elf_phdr *eppnt;
1016         unsigned long elf_bss, bss, len;
1017         int retval, error, i, j;
1018         struct elfhdr elf_ex;
1019
1020         error = -ENOEXEC;
1021         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1022         if (retval != sizeof(elf_ex))
1023                 goto out;
1024
1025         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1026                 goto out;
1027
1028         /* First of all, some simple consistency checks */
1029         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1030             !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1031                 goto out;
1032
1033         /* Now read in all of the header information */
1034
1035         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1036         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1037
1038         error = -ENOMEM;
1039         elf_phdata = kmalloc(j, GFP_KERNEL);
1040         if (!elf_phdata)
1041                 goto out;
1042
1043         eppnt = elf_phdata;
1044         error = -ENOEXEC;
1045         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1046         if (retval != j)
1047                 goto out_free_ph;
1048
1049         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1050                 if ((eppnt + i)->p_type == PT_LOAD)
1051                         j++;
1052         if (j != 1)
1053                 goto out_free_ph;
1054
1055         while (eppnt->p_type != PT_LOAD)
1056                 eppnt++;
1057
1058         /* Now use mmap to map the library into memory. */
1059         error = vm_mmap(file,
1060                         ELF_PAGESTART(eppnt->p_vaddr),
1061                         (eppnt->p_filesz +
1062                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1063                         PROT_READ | PROT_WRITE | PROT_EXEC,
1064                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1065                         (eppnt->p_offset -
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1067         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1068                 goto out_free_ph;
1069
1070         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1071         if (padzero(elf_bss)) {
1072                 error = -EFAULT;
1073                 goto out_free_ph;
1074         }
1075
1076         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1077                             ELF_MIN_ALIGN - 1);
1078         bss = eppnt->p_memsz + eppnt->p_vaddr;
1079         if (bss > len)
1080                 vm_brk(len, bss - len);
1081         error = 0;
1082
1083 out_free_ph:
1084         kfree(elf_phdata);
1085 out:
1086         return error;
1087 }
1088 #endif /* #ifdef CONFIG_USELIB */
1089
1090 #ifdef CONFIG_ELF_CORE
1091 /*
1092  * ELF core dumper
1093  *
1094  * Modelled on fs/exec.c:aout_core_dump()
1095  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1096  */
1097
1098 /*
1099  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1100  * that are useful for post-mortem analysis are included in every core dump.
1101  * In that way we ensure that the core dump is fully interpretable later
1102  * without matching up the same kernel and hardware config to see what PC values
1103  * meant. These special mappings include - vDSO, vsyscall, and other
1104  * architecture specific mappings
1105  */
1106 static bool always_dump_vma(struct vm_area_struct *vma)
1107 {
1108         /* Any vsyscall mappings? */
1109         if (vma == get_gate_vma(vma->vm_mm))
1110                 return true;
1111         /*
1112          * arch_vma_name() returns non-NULL for special architecture mappings,
1113          * such as vDSO sections.
1114          */
1115         if (arch_vma_name(vma))
1116                 return true;
1117
1118         return false;
1119 }
1120
1121 /*
1122  * Decide what to dump of a segment, part, all or none.
1123  */
1124 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1125                                    unsigned long mm_flags)
1126 {
1127 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1128
1129         /* always dump the vdso and vsyscall sections */
1130         if (always_dump_vma(vma))
1131                 goto whole;
1132
1133         if (vma->vm_flags & VM_DONTDUMP)
1134                 return 0;
1135
1136         /* Hugetlb memory check */
1137         if (vma->vm_flags & VM_HUGETLB) {
1138                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1139                         goto whole;
1140                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1141                         goto whole;
1142                 return 0;
1143         }
1144
1145         /* Do not dump I/O mapped devices or special mappings */
1146         if (vma->vm_flags & VM_IO)
1147                 return 0;
1148
1149         /* By default, dump shared memory if mapped from an anonymous file. */
1150         if (vma->vm_flags & VM_SHARED) {
1151                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1152                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1153                         goto whole;
1154                 return 0;
1155         }
1156
1157         /* Dump segments that have been written to.  */
1158         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1159                 goto whole;
1160         if (vma->vm_file == NULL)
1161                 return 0;
1162
1163         if (FILTER(MAPPED_PRIVATE))
1164                 goto whole;
1165
1166         /*
1167          * If this looks like the beginning of a DSO or executable mapping,
1168          * check for an ELF header.  If we find one, dump the first page to
1169          * aid in determining what was mapped here.
1170          */
1171         if (FILTER(ELF_HEADERS) &&
1172             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1173                 u32 __user *header = (u32 __user *) vma->vm_start;
1174                 u32 word;
1175                 mm_segment_t fs = get_fs();
1176                 /*
1177                  * Doing it this way gets the constant folded by GCC.
1178                  */
1179                 union {
1180                         u32 cmp;
1181                         char elfmag[SELFMAG];
1182                 } magic;
1183                 BUILD_BUG_ON(SELFMAG != sizeof word);
1184                 magic.elfmag[EI_MAG0] = ELFMAG0;
1185                 magic.elfmag[EI_MAG1] = ELFMAG1;
1186                 magic.elfmag[EI_MAG2] = ELFMAG2;
1187                 magic.elfmag[EI_MAG3] = ELFMAG3;
1188                 /*
1189                  * Switch to the user "segment" for get_user(),
1190                  * then put back what elf_core_dump() had in place.
1191                  */
1192                 set_fs(USER_DS);
1193                 if (unlikely(get_user(word, header)))
1194                         word = 0;
1195                 set_fs(fs);
1196                 if (word == magic.cmp)
1197                         return PAGE_SIZE;
1198         }
1199
1200 #undef  FILTER
1201
1202         return 0;
1203
1204 whole:
1205         return vma->vm_end - vma->vm_start;
1206 }
1207
1208 /* An ELF note in memory */
1209 struct memelfnote
1210 {
1211         const char *name;
1212         int type;
1213         unsigned int datasz;
1214         void *data;
1215 };
1216
1217 static int notesize(struct memelfnote *en)
1218 {
1219         int sz;
1220
1221         sz = sizeof(struct elf_note);
1222         sz += roundup(strlen(en->name) + 1, 4);
1223         sz += roundup(en->datasz, 4);
1224
1225         return sz;
1226 }
1227
1228 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1229 {
1230         struct elf_note en;
1231         en.n_namesz = strlen(men->name) + 1;
1232         en.n_descsz = men->datasz;
1233         en.n_type = men->type;
1234
1235         return dump_emit(cprm, &en, sizeof(en)) &&
1236             dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1237             dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1238 }
1239
1240 static void fill_elf_header(struct elfhdr *elf, int segs,
1241                             u16 machine, u32 flags)
1242 {
1243         memset(elf, 0, sizeof(*elf));
1244
1245         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1246         elf->e_ident[EI_CLASS] = ELF_CLASS;
1247         elf->e_ident[EI_DATA] = ELF_DATA;
1248         elf->e_ident[EI_VERSION] = EV_CURRENT;
1249         elf->e_ident[EI_OSABI] = ELF_OSABI;
1250
1251         elf->e_type = ET_CORE;
1252         elf->e_machine = machine;
1253         elf->e_version = EV_CURRENT;
1254         elf->e_phoff = sizeof(struct elfhdr);
1255         elf->e_flags = flags;
1256         elf->e_ehsize = sizeof(struct elfhdr);
1257         elf->e_phentsize = sizeof(struct elf_phdr);
1258         elf->e_phnum = segs;
1259
1260         return;
1261 }
1262
1263 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1264 {
1265         phdr->p_type = PT_NOTE;
1266         phdr->p_offset = offset;
1267         phdr->p_vaddr = 0;
1268         phdr->p_paddr = 0;
1269         phdr->p_filesz = sz;
1270         phdr->p_memsz = 0;
1271         phdr->p_flags = 0;
1272         phdr->p_align = 0;
1273         return;
1274 }
1275
1276 static void fill_note(struct memelfnote *note, const char *name, int type, 
1277                 unsigned int sz, void *data)
1278 {
1279         note->name = name;
1280         note->type = type;
1281         note->datasz = sz;
1282         note->data = data;
1283         return;
1284 }
1285
1286 /*
1287  * fill up all the fields in prstatus from the given task struct, except
1288  * registers which need to be filled up separately.
1289  */
1290 static void fill_prstatus(struct elf_prstatus *prstatus,
1291                 struct task_struct *p, long signr)
1292 {
1293         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1294         prstatus->pr_sigpend = p->pending.signal.sig[0];
1295         prstatus->pr_sighold = p->blocked.sig[0];
1296         rcu_read_lock();
1297         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1298         rcu_read_unlock();
1299         prstatus->pr_pid = task_pid_vnr(p);
1300         prstatus->pr_pgrp = task_pgrp_vnr(p);
1301         prstatus->pr_sid = task_session_vnr(p);
1302         if (thread_group_leader(p)) {
1303                 struct task_cputime cputime;
1304
1305                 /*
1306                  * This is the record for the group leader.  It shows the
1307                  * group-wide total, not its individual thread total.
1308                  */
1309                 thread_group_cputime(p, &cputime);
1310                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1311                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1312         } else {
1313                 cputime_t utime, stime;
1314
1315                 task_cputime(p, &utime, &stime);
1316                 cputime_to_timeval(utime, &prstatus->pr_utime);
1317                 cputime_to_timeval(stime, &prstatus->pr_stime);
1318         }
1319         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1320         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1321 }
1322
1323 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1324                        struct mm_struct *mm)
1325 {
1326         const struct cred *cred;
1327         unsigned int i, len;
1328         
1329         /* first copy the parameters from user space */
1330         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1331
1332         len = mm->arg_end - mm->arg_start;
1333         if (len >= ELF_PRARGSZ)
1334                 len = ELF_PRARGSZ-1;
1335         if (copy_from_user(&psinfo->pr_psargs,
1336                            (const char __user *)mm->arg_start, len))
1337                 return -EFAULT;
1338         for(i = 0; i < len; i++)
1339                 if (psinfo->pr_psargs[i] == 0)
1340                         psinfo->pr_psargs[i] = ' ';
1341         psinfo->pr_psargs[len] = 0;
1342
1343         rcu_read_lock();
1344         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1345         rcu_read_unlock();
1346         psinfo->pr_pid = task_pid_vnr(p);
1347         psinfo->pr_pgrp = task_pgrp_vnr(p);
1348         psinfo->pr_sid = task_session_vnr(p);
1349
1350         i = p->state ? ffz(~p->state) + 1 : 0;
1351         psinfo->pr_state = i;
1352         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1353         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1354         psinfo->pr_nice = task_nice(p);
1355         psinfo->pr_flag = p->flags;
1356         rcu_read_lock();
1357         cred = __task_cred(p);
1358         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1359         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1360         rcu_read_unlock();
1361         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1362         
1363         return 0;
1364 }
1365
1366 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1367 {
1368         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1369         int i = 0;
1370         do
1371                 i += 2;
1372         while (auxv[i - 2] != AT_NULL);
1373         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1374 }
1375
1376 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1377                 const siginfo_t *siginfo)
1378 {
1379         mm_segment_t old_fs = get_fs();
1380         set_fs(KERNEL_DS);
1381         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1382         set_fs(old_fs);
1383         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1384 }
1385
1386 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1387 /*
1388  * Format of NT_FILE note:
1389  *
1390  * long count     -- how many files are mapped
1391  * long page_size -- units for file_ofs
1392  * array of [COUNT] elements of
1393  *   long start
1394  *   long end
1395  *   long file_ofs
1396  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1397  */
1398 static int fill_files_note(struct memelfnote *note)
1399 {
1400         struct vm_area_struct *vma;
1401         unsigned count, size, names_ofs, remaining, n;
1402         user_long_t *data;
1403         user_long_t *start_end_ofs;
1404         char *name_base, *name_curpos;
1405
1406         /* *Estimated* file count and total data size needed */
1407         count = current->mm->map_count;
1408         size = count * 64;
1409
1410         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1411  alloc:
1412         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1413                 return -EINVAL;
1414         size = round_up(size, PAGE_SIZE);
1415         data = vmalloc(size);
1416         if (!data)
1417                 return -ENOMEM;
1418
1419         start_end_ofs = data + 2;
1420         name_base = name_curpos = ((char *)data) + names_ofs;
1421         remaining = size - names_ofs;
1422         count = 0;
1423         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1424                 struct file *file;
1425                 const char *filename;
1426
1427                 file = vma->vm_file;
1428                 if (!file)
1429                         continue;
1430                 filename = d_path(&file->f_path, name_curpos, remaining);
1431                 if (IS_ERR(filename)) {
1432                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1433                                 vfree(data);
1434                                 size = size * 5 / 4;
1435                                 goto alloc;
1436                         }
1437                         continue;
1438                 }
1439
1440                 /* d_path() fills at the end, move name down */
1441                 /* n = strlen(filename) + 1: */
1442                 n = (name_curpos + remaining) - filename;
1443                 remaining = filename - name_curpos;
1444                 memmove(name_curpos, filename, n);
1445                 name_curpos += n;
1446
1447                 *start_end_ofs++ = vma->vm_start;
1448                 *start_end_ofs++ = vma->vm_end;
1449                 *start_end_ofs++ = vma->vm_pgoff;
1450                 count++;
1451         }
1452
1453         /* Now we know exact count of files, can store it */
1454         data[0] = count;
1455         data[1] = PAGE_SIZE;
1456         /*
1457          * Count usually is less than current->mm->map_count,
1458          * we need to move filenames down.
1459          */
1460         n = current->mm->map_count - count;
1461         if (n != 0) {
1462                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1463                 memmove(name_base - shift_bytes, name_base,
1464                         name_curpos - name_base);
1465                 name_curpos -= shift_bytes;
1466         }
1467
1468         size = name_curpos - (char *)data;
1469         fill_note(note, "CORE", NT_FILE, size, data);
1470         return 0;
1471 }
1472
1473 #ifdef CORE_DUMP_USE_REGSET
1474 #include <linux/regset.h>
1475
1476 struct elf_thread_core_info {
1477         struct elf_thread_core_info *next;
1478         struct task_struct *task;
1479         struct elf_prstatus prstatus;
1480         struct memelfnote notes[0];
1481 };
1482
1483 struct elf_note_info {
1484         struct elf_thread_core_info *thread;
1485         struct memelfnote psinfo;
1486         struct memelfnote signote;
1487         struct memelfnote auxv;
1488         struct memelfnote files;
1489         user_siginfo_t csigdata;
1490         size_t size;
1491         int thread_notes;
1492 };
1493
1494 /*
1495  * When a regset has a writeback hook, we call it on each thread before
1496  * dumping user memory.  On register window machines, this makes sure the
1497  * user memory backing the register data is up to date before we read it.
1498  */
1499 static void do_thread_regset_writeback(struct task_struct *task,
1500                                        const struct user_regset *regset)
1501 {
1502         if (regset->writeback)
1503                 regset->writeback(task, regset, 1);
1504 }
1505
1506 #ifndef PR_REG_SIZE
1507 #define PR_REG_SIZE(S) sizeof(S)
1508 #endif
1509
1510 #ifndef PRSTATUS_SIZE
1511 #define PRSTATUS_SIZE(S) sizeof(S)
1512 #endif
1513
1514 #ifndef PR_REG_PTR
1515 #define PR_REG_PTR(S) (&((S)->pr_reg))
1516 #endif
1517
1518 #ifndef SET_PR_FPVALID
1519 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1520 #endif
1521
1522 static int fill_thread_core_info(struct elf_thread_core_info *t,
1523                                  const struct user_regset_view *view,
1524                                  long signr, size_t *total)
1525 {
1526         unsigned int i;
1527
1528         /*
1529          * NT_PRSTATUS is the one special case, because the regset data
1530          * goes into the pr_reg field inside the note contents, rather
1531          * than being the whole note contents.  We fill the reset in here.
1532          * We assume that regset 0 is NT_PRSTATUS.
1533          */
1534         fill_prstatus(&t->prstatus, t->task, signr);
1535         (void) view->regsets[0].get(t->task, &view->regsets[0],
1536                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1537                                     PR_REG_PTR(&t->prstatus), NULL);
1538
1539         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1540                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1541         *total += notesize(&t->notes[0]);
1542
1543         do_thread_regset_writeback(t->task, &view->regsets[0]);
1544
1545         /*
1546          * Each other regset might generate a note too.  For each regset
1547          * that has no core_note_type or is inactive, we leave t->notes[i]
1548          * all zero and we'll know to skip writing it later.
1549          */
1550         for (i = 1; i < view->n; ++i) {
1551                 const struct user_regset *regset = &view->regsets[i];
1552                 do_thread_regset_writeback(t->task, regset);
1553                 if (regset->core_note_type && regset->get &&
1554                     (!regset->active || regset->active(t->task, regset))) {
1555                         int ret;
1556                         size_t size = regset->n * regset->size;
1557                         void *data = kmalloc(size, GFP_KERNEL);
1558                         if (unlikely(!data))
1559                                 return 0;
1560                         ret = regset->get(t->task, regset,
1561                                           0, size, data, NULL);
1562                         if (unlikely(ret))
1563                                 kfree(data);
1564                         else {
1565                                 if (regset->core_note_type != NT_PRFPREG)
1566                                         fill_note(&t->notes[i], "LINUX",
1567                                                   regset->core_note_type,
1568                                                   size, data);
1569                                 else {
1570                                         SET_PR_FPVALID(&t->prstatus, 1);
1571                                         fill_note(&t->notes[i], "CORE",
1572                                                   NT_PRFPREG, size, data);
1573                                 }
1574                                 *total += notesize(&t->notes[i]);
1575                         }
1576                 }
1577         }
1578
1579         return 1;
1580 }
1581
1582 static int fill_note_info(struct elfhdr *elf, int phdrs,
1583                           struct elf_note_info *info,
1584                           const siginfo_t *siginfo, struct pt_regs *regs)
1585 {
1586         struct task_struct *dump_task = current;
1587         const struct user_regset_view *view = task_user_regset_view(dump_task);
1588         struct elf_thread_core_info *t;
1589         struct elf_prpsinfo *psinfo;
1590         struct core_thread *ct;
1591         unsigned int i;
1592
1593         info->size = 0;
1594         info->thread = NULL;
1595
1596         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1597         if (psinfo == NULL) {
1598                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1599                 return 0;
1600         }
1601
1602         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1603
1604         /*
1605          * Figure out how many notes we're going to need for each thread.
1606          */
1607         info->thread_notes = 0;
1608         for (i = 0; i < view->n; ++i)
1609                 if (view->regsets[i].core_note_type != 0)
1610                         ++info->thread_notes;
1611
1612         /*
1613          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1614          * since it is our one special case.
1615          */
1616         if (unlikely(info->thread_notes == 0) ||
1617             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1618                 WARN_ON(1);
1619                 return 0;
1620         }
1621
1622         /*
1623          * Initialize the ELF file header.
1624          */
1625         fill_elf_header(elf, phdrs,
1626                         view->e_machine, view->e_flags);
1627
1628         /*
1629          * Allocate a structure for each thread.
1630          */
1631         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1632                 t = kzalloc(offsetof(struct elf_thread_core_info,
1633                                      notes[info->thread_notes]),
1634                             GFP_KERNEL);
1635                 if (unlikely(!t))
1636                         return 0;
1637
1638                 t->task = ct->task;
1639                 if (ct->task == dump_task || !info->thread) {
1640                         t->next = info->thread;
1641                         info->thread = t;
1642                 } else {
1643                         /*
1644                          * Make sure to keep the original task at
1645                          * the head of the list.
1646                          */
1647                         t->next = info->thread->next;
1648                         info->thread->next = t;
1649                 }
1650         }
1651
1652         /*
1653          * Now fill in each thread's information.
1654          */
1655         for (t = info->thread; t != NULL; t = t->next)
1656                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1657                         return 0;
1658
1659         /*
1660          * Fill in the two process-wide notes.
1661          */
1662         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1663         info->size += notesize(&info->psinfo);
1664
1665         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1666         info->size += notesize(&info->signote);
1667
1668         fill_auxv_note(&info->auxv, current->mm);
1669         info->size += notesize(&info->auxv);
1670
1671         if (fill_files_note(&info->files) == 0)
1672                 info->size += notesize(&info->files);
1673
1674         return 1;
1675 }
1676
1677 static size_t get_note_info_size(struct elf_note_info *info)
1678 {
1679         return info->size;
1680 }
1681
1682 /*
1683  * Write all the notes for each thread.  When writing the first thread, the
1684  * process-wide notes are interleaved after the first thread-specific note.
1685  */
1686 static int write_note_info(struct elf_note_info *info,
1687                            struct coredump_params *cprm)
1688 {
1689         bool first = 1;
1690         struct elf_thread_core_info *t = info->thread;
1691
1692         do {
1693                 int i;
1694
1695                 if (!writenote(&t->notes[0], cprm))
1696                         return 0;
1697
1698                 if (first && !writenote(&info->psinfo, cprm))
1699                         return 0;
1700                 if (first && !writenote(&info->signote, cprm))
1701                         return 0;
1702                 if (first && !writenote(&info->auxv, cprm))
1703                         return 0;
1704                 if (first && info->files.data &&
1705                                 !writenote(&info->files, cprm))
1706                         return 0;
1707
1708                 for (i = 1; i < info->thread_notes; ++i)
1709                         if (t->notes[i].data &&
1710                             !writenote(&t->notes[i], cprm))
1711                                 return 0;
1712
1713                 first = 0;
1714                 t = t->next;
1715         } while (t);
1716
1717         return 1;
1718 }
1719
1720 static void free_note_info(struct elf_note_info *info)
1721 {
1722         struct elf_thread_core_info *threads = info->thread;
1723         while (threads) {
1724                 unsigned int i;
1725                 struct elf_thread_core_info *t = threads;
1726                 threads = t->next;
1727                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1728                 for (i = 1; i < info->thread_notes; ++i)
1729                         kfree(t->notes[i].data);
1730                 kfree(t);
1731         }
1732         kfree(info->psinfo.data);
1733         vfree(info->files.data);
1734 }
1735
1736 #else
1737
1738 /* Here is the structure in which status of each thread is captured. */
1739 struct elf_thread_status
1740 {
1741         struct list_head list;
1742         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1743         elf_fpregset_t fpu;             /* NT_PRFPREG */
1744         struct task_struct *thread;
1745 #ifdef ELF_CORE_COPY_XFPREGS
1746         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1747 #endif
1748         struct memelfnote notes[3];
1749         int num_notes;
1750 };
1751
1752 /*
1753  * In order to add the specific thread information for the elf file format,
1754  * we need to keep a linked list of every threads pr_status and then create
1755  * a single section for them in the final core file.
1756  */
1757 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1758 {
1759         int sz = 0;
1760         struct task_struct *p = t->thread;
1761         t->num_notes = 0;
1762
1763         fill_prstatus(&t->prstatus, p, signr);
1764         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1765         
1766         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1767                   &(t->prstatus));
1768         t->num_notes++;
1769         sz += notesize(&t->notes[0]);
1770
1771         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1772                                                                 &t->fpu))) {
1773                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1774                           &(t->fpu));
1775                 t->num_notes++;
1776                 sz += notesize(&t->notes[1]);
1777         }
1778
1779 #ifdef ELF_CORE_COPY_XFPREGS
1780         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1781                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1782                           sizeof(t->xfpu), &t->xfpu);
1783                 t->num_notes++;
1784                 sz += notesize(&t->notes[2]);
1785         }
1786 #endif  
1787         return sz;
1788 }
1789
1790 struct elf_note_info {
1791         struct memelfnote *notes;
1792         struct memelfnote *notes_files;
1793         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1794         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1795         struct list_head thread_list;
1796         elf_fpregset_t *fpu;
1797 #ifdef ELF_CORE_COPY_XFPREGS
1798         elf_fpxregset_t *xfpu;
1799 #endif
1800         user_siginfo_t csigdata;
1801         int thread_status_size;
1802         int numnote;
1803 };
1804
1805 static int elf_note_info_init(struct elf_note_info *info)
1806 {
1807         memset(info, 0, sizeof(*info));
1808         INIT_LIST_HEAD(&info->thread_list);
1809
1810         /* Allocate space for ELF notes */
1811         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1812         if (!info->notes)
1813                 return 0;
1814         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1815         if (!info->psinfo)
1816                 return 0;
1817         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1818         if (!info->prstatus)
1819                 return 0;
1820         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1821         if (!info->fpu)
1822                 return 0;
1823 #ifdef ELF_CORE_COPY_XFPREGS
1824         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1825         if (!info->xfpu)
1826                 return 0;
1827 #endif
1828         return 1;
1829 }
1830
1831 static int fill_note_info(struct elfhdr *elf, int phdrs,
1832                           struct elf_note_info *info,
1833                           const siginfo_t *siginfo, struct pt_regs *regs)
1834 {
1835         struct list_head *t;
1836         struct core_thread *ct;
1837         struct elf_thread_status *ets;
1838
1839         if (!elf_note_info_init(info))
1840                 return 0;
1841
1842         for (ct = current->mm->core_state->dumper.next;
1843                                         ct; ct = ct->next) {
1844                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1845                 if (!ets)
1846                         return 0;
1847
1848                 ets->thread = ct->task;
1849                 list_add(&ets->list, &info->thread_list);
1850         }
1851
1852         list_for_each(t, &info->thread_list) {
1853                 int sz;
1854
1855                 ets = list_entry(t, struct elf_thread_status, list);
1856                 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1857                 info->thread_status_size += sz;
1858         }
1859         /* now collect the dump for the current */
1860         memset(info->prstatus, 0, sizeof(*info->prstatus));
1861         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1862         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1863
1864         /* Set up header */
1865         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1866
1867         /*
1868          * Set up the notes in similar form to SVR4 core dumps made
1869          * with info from their /proc.
1870          */
1871
1872         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1873                   sizeof(*info->prstatus), info->prstatus);
1874         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1875         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1876                   sizeof(*info->psinfo), info->psinfo);
1877
1878         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1879         fill_auxv_note(info->notes + 3, current->mm);
1880         info->numnote = 4;
1881
1882         if (fill_files_note(info->notes + info->numnote) == 0) {
1883                 info->notes_files = info->notes + info->numnote;
1884                 info->numnote++;
1885         }
1886
1887         /* Try to dump the FPU. */
1888         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1889                                                                info->fpu);
1890         if (info->prstatus->pr_fpvalid)
1891                 fill_note(info->notes + info->numnote++,
1892                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1893 #ifdef ELF_CORE_COPY_XFPREGS
1894         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1895                 fill_note(info->notes + info->numnote++,
1896                           "LINUX", ELF_CORE_XFPREG_TYPE,
1897                           sizeof(*info->xfpu), info->xfpu);
1898 #endif
1899
1900         return 1;
1901 }
1902
1903 static size_t get_note_info_size(struct elf_note_info *info)
1904 {
1905         int sz = 0;
1906         int i;
1907
1908         for (i = 0; i < info->numnote; i++)
1909                 sz += notesize(info->notes + i);
1910
1911         sz += info->thread_status_size;
1912
1913         return sz;
1914 }
1915
1916 static int write_note_info(struct elf_note_info *info,
1917                            struct coredump_params *cprm)
1918 {
1919         int i;
1920         struct list_head *t;
1921
1922         for (i = 0; i < info->numnote; i++)
1923                 if (!writenote(info->notes + i, cprm))
1924                         return 0;
1925
1926         /* write out the thread status notes section */
1927         list_for_each(t, &info->thread_list) {
1928                 struct elf_thread_status *tmp =
1929                                 list_entry(t, struct elf_thread_status, list);
1930
1931                 for (i = 0; i < tmp->num_notes; i++)
1932                         if (!writenote(&tmp->notes[i], cprm))
1933                                 return 0;
1934         }
1935
1936         return 1;
1937 }
1938
1939 static void free_note_info(struct elf_note_info *info)
1940 {
1941         while (!list_empty(&info->thread_list)) {
1942                 struct list_head *tmp = info->thread_list.next;
1943                 list_del(tmp);
1944                 kfree(list_entry(tmp, struct elf_thread_status, list));
1945         }
1946
1947         /* Free data possibly allocated by fill_files_note(): */
1948         if (info->notes_files)
1949                 vfree(info->notes_files->data);
1950
1951         kfree(info->prstatus);
1952         kfree(info->psinfo);
1953         kfree(info->notes);
1954         kfree(info->fpu);
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956         kfree(info->xfpu);
1957 #endif
1958 }
1959
1960 #endif
1961
1962 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1963                                         struct vm_area_struct *gate_vma)
1964 {
1965         struct vm_area_struct *ret = tsk->mm->mmap;
1966
1967         if (ret)
1968                 return ret;
1969         return gate_vma;
1970 }
1971 /*
1972  * Helper function for iterating across a vma list.  It ensures that the caller
1973  * will visit `gate_vma' prior to terminating the search.
1974  */
1975 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1976                                         struct vm_area_struct *gate_vma)
1977 {
1978         struct vm_area_struct *ret;
1979
1980         ret = this_vma->vm_next;
1981         if (ret)
1982                 return ret;
1983         if (this_vma == gate_vma)
1984                 return NULL;
1985         return gate_vma;
1986 }
1987
1988 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1989                              elf_addr_t e_shoff, int segs)
1990 {
1991         elf->e_shoff = e_shoff;
1992         elf->e_shentsize = sizeof(*shdr4extnum);
1993         elf->e_shnum = 1;
1994         elf->e_shstrndx = SHN_UNDEF;
1995
1996         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1997
1998         shdr4extnum->sh_type = SHT_NULL;
1999         shdr4extnum->sh_size = elf->e_shnum;
2000         shdr4extnum->sh_link = elf->e_shstrndx;
2001         shdr4extnum->sh_info = segs;
2002 }
2003
2004 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2005                                      unsigned long mm_flags)
2006 {
2007         struct vm_area_struct *vma;
2008         size_t size = 0;
2009
2010         for (vma = first_vma(current, gate_vma); vma != NULL;
2011              vma = next_vma(vma, gate_vma))
2012                 size += vma_dump_size(vma, mm_flags);
2013         return size;
2014 }
2015
2016 /*
2017  * Actual dumper
2018  *
2019  * This is a two-pass process; first we find the offsets of the bits,
2020  * and then they are actually written out.  If we run out of core limit
2021  * we just truncate.
2022  */
2023 static int elf_core_dump(struct coredump_params *cprm)
2024 {
2025         int has_dumped = 0;
2026         mm_segment_t fs;
2027         int segs;
2028         struct vm_area_struct *vma, *gate_vma;
2029         struct elfhdr *elf = NULL;
2030         loff_t offset = 0, dataoff;
2031         struct elf_note_info info = { };
2032         struct elf_phdr *phdr4note = NULL;
2033         struct elf_shdr *shdr4extnum = NULL;
2034         Elf_Half e_phnum;
2035         elf_addr_t e_shoff;
2036
2037         /*
2038          * We no longer stop all VM operations.
2039          * 
2040          * This is because those proceses that could possibly change map_count
2041          * or the mmap / vma pages are now blocked in do_exit on current
2042          * finishing this core dump.
2043          *
2044          * Only ptrace can touch these memory addresses, but it doesn't change
2045          * the map_count or the pages allocated. So no possibility of crashing
2046          * exists while dumping the mm->vm_next areas to the core file.
2047          */
2048   
2049         /* alloc memory for large data structures: too large to be on stack */
2050         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2051         if (!elf)
2052                 goto out;
2053         /*
2054          * The number of segs are recored into ELF header as 16bit value.
2055          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2056          */
2057         segs = current->mm->map_count;
2058         segs += elf_core_extra_phdrs();
2059
2060         gate_vma = get_gate_vma(current->mm);
2061         if (gate_vma != NULL)
2062                 segs++;
2063
2064         /* for notes section */
2065         segs++;
2066
2067         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2068          * this, kernel supports extended numbering. Have a look at
2069          * include/linux/elf.h for further information. */
2070         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2071
2072         /*
2073          * Collect all the non-memory information about the process for the
2074          * notes.  This also sets up the file header.
2075          */
2076         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2077                 goto cleanup;
2078
2079         has_dumped = 1;
2080
2081         fs = get_fs();
2082         set_fs(KERNEL_DS);
2083
2084         offset += sizeof(*elf);                         /* Elf header */
2085         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2086
2087         /* Write notes phdr entry */
2088         {
2089                 size_t sz = get_note_info_size(&info);
2090
2091                 sz += elf_coredump_extra_notes_size();
2092
2093                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2094                 if (!phdr4note)
2095                         goto end_coredump;
2096
2097                 fill_elf_note_phdr(phdr4note, sz, offset);
2098                 offset += sz;
2099         }
2100
2101         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2102
2103         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2104         offset += elf_core_extra_data_size();
2105         e_shoff = offset;
2106
2107         if (e_phnum == PN_XNUM) {
2108                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2109                 if (!shdr4extnum)
2110                         goto end_coredump;
2111                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2112         }
2113
2114         offset = dataoff;
2115
2116         if (!dump_emit(cprm, elf, sizeof(*elf)))
2117                 goto end_coredump;
2118
2119         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2120                 goto end_coredump;
2121
2122         /* Write program headers for segments dump */
2123         for (vma = first_vma(current, gate_vma); vma != NULL;
2124                         vma = next_vma(vma, gate_vma)) {
2125                 struct elf_phdr phdr;
2126
2127                 phdr.p_type = PT_LOAD;
2128                 phdr.p_offset = offset;
2129                 phdr.p_vaddr = vma->vm_start;
2130                 phdr.p_paddr = 0;
2131                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2132                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2133                 offset += phdr.p_filesz;
2134                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2135                 if (vma->vm_flags & VM_WRITE)
2136                         phdr.p_flags |= PF_W;
2137                 if (vma->vm_flags & VM_EXEC)
2138                         phdr.p_flags |= PF_X;
2139                 phdr.p_align = ELF_EXEC_PAGESIZE;
2140
2141                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2142                         goto end_coredump;
2143         }
2144
2145         if (!elf_core_write_extra_phdrs(cprm, offset))
2146                 goto end_coredump;
2147
2148         /* write out the notes section */
2149         if (!write_note_info(&info, cprm))
2150                 goto end_coredump;
2151
2152         if (elf_coredump_extra_notes_write(cprm))
2153                 goto end_coredump;
2154
2155         /* Align to page */
2156         if (!dump_skip(cprm, dataoff - cprm->written))
2157                 goto end_coredump;
2158
2159         for (vma = first_vma(current, gate_vma); vma != NULL;
2160                         vma = next_vma(vma, gate_vma)) {
2161                 unsigned long addr;
2162                 unsigned long end;
2163
2164                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2165
2166                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2167                         struct page *page;
2168                         int stop;
2169
2170                         page = get_dump_page(addr);
2171                         if (page) {
2172                                 void *kaddr = kmap(page);
2173                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2174                                 kunmap(page);
2175                                 page_cache_release(page);
2176                         } else
2177                                 stop = !dump_skip(cprm, PAGE_SIZE);
2178                         if (stop)
2179                                 goto end_coredump;
2180                 }
2181         }
2182
2183         if (!elf_core_write_extra_data(cprm))
2184                 goto end_coredump;
2185
2186         if (e_phnum == PN_XNUM) {
2187                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2188                         goto end_coredump;
2189         }
2190
2191 end_coredump:
2192         set_fs(fs);
2193
2194 cleanup:
2195         free_note_info(&info);
2196         kfree(shdr4extnum);
2197         kfree(phdr4note);
2198         kfree(elf);
2199 out:
2200         return has_dumped;
2201 }
2202
2203 #endif          /* CONFIG_ELF_CORE */
2204
2205 static int __init init_elf_binfmt(void)
2206 {
2207         register_binfmt(&elf_format);
2208         return 0;
2209 }
2210
2211 static void __exit exit_elf_binfmt(void)
2212 {
2213         /* Remove the COFF and ELF loaders. */
2214         unregister_binfmt(&elf_format);
2215 }
2216
2217 core_initcall(init_elf_binfmt);
2218 module_exit(exit_elf_binfmt);
2219 MODULE_LICENSE("GPL");