Merge tag 'iommu-updates-v3.15' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50                                 int, int, unsigned long);
51
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump   NULL
66 #endif
67
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN   PAGE_SIZE
72 #endif
73
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
76 #endif
77
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81
82 static struct linux_binfmt elf_format = {
83         .module         = THIS_MODULE,
84         .load_binary    = load_elf_binary,
85         .load_shlib     = load_elf_library,
86         .core_dump      = elf_core_dump,
87         .min_coredump   = ELF_EXEC_PAGESIZE,
88 };
89
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94         start = ELF_PAGEALIGN(start);
95         end = ELF_PAGEALIGN(end);
96         if (end > start) {
97                 unsigned long addr;
98                 addr = vm_brk(start, end - start);
99                 if (BAD_ADDR(addr))
100                         return addr;
101         }
102         current->mm->start_brk = current->mm->brk = end;
103         return 0;
104 }
105
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113         unsigned long nbyte;
114
115         nbyte = ELF_PAGEOFFSET(elf_bss);
116         if (nbyte) {
117                 nbyte = ELF_MIN_ALIGN - nbyte;
118                 if (clear_user((void __user *) elf_bss, nbyte))
119                         return -EFAULT;
120         }
121         return 0;
122 }
123
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131         old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135         (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150                 unsigned long load_addr, unsigned long interp_load_addr)
151 {
152         unsigned long p = bprm->p;
153         int argc = bprm->argc;
154         int envc = bprm->envc;
155         elf_addr_t __user *argv;
156         elf_addr_t __user *envp;
157         elf_addr_t __user *sp;
158         elf_addr_t __user *u_platform;
159         elf_addr_t __user *u_base_platform;
160         elf_addr_t __user *u_rand_bytes;
161         const char *k_platform = ELF_PLATFORM;
162         const char *k_base_platform = ELF_BASE_PLATFORM;
163         unsigned char k_rand_bytes[16];
164         int items;
165         elf_addr_t *elf_info;
166         int ei_index = 0;
167         const struct cred *cred = current_cred();
168         struct vm_area_struct *vma;
169
170         /*
171          * In some cases (e.g. Hyper-Threading), we want to avoid L1
172          * evictions by the processes running on the same package. One
173          * thing we can do is to shuffle the initial stack for them.
174          */
175
176         p = arch_align_stack(p);
177
178         /*
179          * If this architecture has a platform capability string, copy it
180          * to userspace.  In some cases (Sparc), this info is impossible
181          * for userspace to get any other way, in others (i386) it is
182          * merely difficult.
183          */
184         u_platform = NULL;
185         if (k_platform) {
186                 size_t len = strlen(k_platform) + 1;
187
188                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_platform, k_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * If this architecture has a "base" platform capability
195          * string, copy it to userspace.
196          */
197         u_base_platform = NULL;
198         if (k_base_platform) {
199                 size_t len = strlen(k_base_platform) + 1;
200
201                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202                 if (__copy_to_user(u_base_platform, k_base_platform, len))
203                         return -EFAULT;
204         }
205
206         /*
207          * Generate 16 random bytes for userspace PRNG seeding.
208          */
209         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210         u_rand_bytes = (elf_addr_t __user *)
211                        STACK_ALLOC(p, sizeof(k_rand_bytes));
212         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213                 return -EFAULT;
214
215         /* Create the ELF interpreter info */
216         elf_info = (elf_addr_t *)current->mm->saved_auxv;
217         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219         do { \
220                 elf_info[ei_index++] = id; \
221                 elf_info[ei_index++] = val; \
222         } while (0)
223
224 #ifdef ARCH_DLINFO
225         /* 
226          * ARCH_DLINFO must come first so PPC can do its special alignment of
227          * AUXV.
228          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229          * ARCH_DLINFO changes
230          */
231         ARCH_DLINFO;
232 #endif
233         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239         NEW_AUX_ENT(AT_BASE, interp_load_addr);
240         NEW_AUX_ENT(AT_FLAGS, 0);
241         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252         if (k_platform) {
253                 NEW_AUX_ENT(AT_PLATFORM,
254                             (elf_addr_t)(unsigned long)u_platform);
255         }
256         if (k_base_platform) {
257                 NEW_AUX_ENT(AT_BASE_PLATFORM,
258                             (elf_addr_t)(unsigned long)u_base_platform);
259         }
260         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262         }
263 #undef NEW_AUX_ENT
264         /* AT_NULL is zero; clear the rest too */
265         memset(&elf_info[ei_index], 0,
266                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267
268         /* And advance past the AT_NULL entry.  */
269         ei_index += 2;
270
271         sp = STACK_ADD(p, ei_index);
272
273         items = (argc + 1) + (envc + 1) + 1;
274         bprm->p = STACK_ROUND(sp, items);
275
276         /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281         sp = (elf_addr_t __user *)bprm->p;
282 #endif
283
284
285         /*
286          * Grow the stack manually; some architectures have a limit on how
287          * far ahead a user-space access may be in order to grow the stack.
288          */
289         vma = find_extend_vma(current->mm, bprm->p);
290         if (!vma)
291                 return -EFAULT;
292
293         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294         if (__put_user(argc, sp++))
295                 return -EFAULT;
296         argv = sp;
297         envp = argv + argc + 1;
298
299         /* Populate argv and envp */
300         p = current->mm->arg_end = current->mm->arg_start;
301         while (argc-- > 0) {
302                 size_t len;
303                 if (__put_user((elf_addr_t)p, argv++))
304                         return -EFAULT;
305                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306                 if (!len || len > MAX_ARG_STRLEN)
307                         return -EINVAL;
308                 p += len;
309         }
310         if (__put_user(0, argv))
311                 return -EFAULT;
312         current->mm->arg_end = current->mm->env_start = p;
313         while (envc-- > 0) {
314                 size_t len;
315                 if (__put_user((elf_addr_t)p, envp++))
316                         return -EFAULT;
317                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318                 if (!len || len > MAX_ARG_STRLEN)
319                         return -EINVAL;
320                 p += len;
321         }
322         if (__put_user(0, envp))
323                 return -EFAULT;
324         current->mm->env_end = p;
325
326         /* Put the elf_info on the stack in the right place.  */
327         sp = (elf_addr_t __user *)envp + 1;
328         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329                 return -EFAULT;
330         return 0;
331 }
332
333 #ifndef elf_map
334
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336                 struct elf_phdr *eppnt, int prot, int type,
337                 unsigned long total_size)
338 {
339         unsigned long map_addr;
340         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342         addr = ELF_PAGESTART(addr);
343         size = ELF_PAGEALIGN(size);
344
345         /* mmap() will return -EINVAL if given a zero size, but a
346          * segment with zero filesize is perfectly valid */
347         if (!size)
348                 return addr;
349
350         /*
351         * total_size is the size of the ELF (interpreter) image.
352         * The _first_ mmap needs to know the full size, otherwise
353         * randomization might put this image into an overlapping
354         * position with the ELF binary image. (since size < total_size)
355         * So we first map the 'big' image - and unmap the remainder at
356         * the end. (which unmap is needed for ELF images with holes.)
357         */
358         if (total_size) {
359                 total_size = ELF_PAGEALIGN(total_size);
360                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361                 if (!BAD_ADDR(map_addr))
362                         vm_munmap(map_addr+size, total_size-size);
363         } else
364                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
365
366         return(map_addr);
367 }
368
369 #endif /* !elf_map */
370
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373         int i, first_idx = -1, last_idx = -1;
374
375         for (i = 0; i < nr; i++) {
376                 if (cmds[i].p_type == PT_LOAD) {
377                         last_idx = i;
378                         if (first_idx == -1)
379                                 first_idx = i;
380                 }
381         }
382         if (first_idx == -1)
383                 return 0;
384
385         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388
389
390 /* This is much more generalized than the library routine read function,
391    so we keep this separate.  Technically the library read function
392    is only provided so that we can read a.out libraries that have
393    an ELF header */
394
395 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
396                 struct file *interpreter, unsigned long *interp_map_addr,
397                 unsigned long no_base)
398 {
399         struct elf_phdr *elf_phdata;
400         struct elf_phdr *eppnt;
401         unsigned long load_addr = 0;
402         int load_addr_set = 0;
403         unsigned long last_bss = 0, elf_bss = 0;
404         unsigned long error = ~0UL;
405         unsigned long total_size;
406         int retval, i, size;
407
408         /* First of all, some simple consistency checks */
409         if (interp_elf_ex->e_type != ET_EXEC &&
410             interp_elf_ex->e_type != ET_DYN)
411                 goto out;
412         if (!elf_check_arch(interp_elf_ex))
413                 goto out;
414         if (!interpreter->f_op->mmap)
415                 goto out;
416
417         /*
418          * If the size of this structure has changed, then punt, since
419          * we will be doing the wrong thing.
420          */
421         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
422                 goto out;
423         if (interp_elf_ex->e_phnum < 1 ||
424                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
425                 goto out;
426
427         /* Now read in all of the header information */
428         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
429         if (size > ELF_MIN_ALIGN)
430                 goto out;
431         elf_phdata = kmalloc(size, GFP_KERNEL);
432         if (!elf_phdata)
433                 goto out;
434
435         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
436                              (char *)elf_phdata, size);
437         error = -EIO;
438         if (retval != size) {
439                 if (retval < 0)
440                         error = retval; 
441                 goto out_close;
442         }
443
444         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
445         if (!total_size) {
446                 error = -EINVAL;
447                 goto out_close;
448         }
449
450         eppnt = elf_phdata;
451         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
452                 if (eppnt->p_type == PT_LOAD) {
453                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
454                         int elf_prot = 0;
455                         unsigned long vaddr = 0;
456                         unsigned long k, map_addr;
457
458                         if (eppnt->p_flags & PF_R)
459                                 elf_prot = PROT_READ;
460                         if (eppnt->p_flags & PF_W)
461                                 elf_prot |= PROT_WRITE;
462                         if (eppnt->p_flags & PF_X)
463                                 elf_prot |= PROT_EXEC;
464                         vaddr = eppnt->p_vaddr;
465                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
466                                 elf_type |= MAP_FIXED;
467                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
468                                 load_addr = -vaddr;
469
470                         map_addr = elf_map(interpreter, load_addr + vaddr,
471                                         eppnt, elf_prot, elf_type, total_size);
472                         total_size = 0;
473                         if (!*interp_map_addr)
474                                 *interp_map_addr = map_addr;
475                         error = map_addr;
476                         if (BAD_ADDR(map_addr))
477                                 goto out_close;
478
479                         if (!load_addr_set &&
480                             interp_elf_ex->e_type == ET_DYN) {
481                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
482                                 load_addr_set = 1;
483                         }
484
485                         /*
486                          * Check to see if the section's size will overflow the
487                          * allowed task size. Note that p_filesz must always be
488                          * <= p_memsize so it's only necessary to check p_memsz.
489                          */
490                         k = load_addr + eppnt->p_vaddr;
491                         if (BAD_ADDR(k) ||
492                             eppnt->p_filesz > eppnt->p_memsz ||
493                             eppnt->p_memsz > TASK_SIZE ||
494                             TASK_SIZE - eppnt->p_memsz < k) {
495                                 error = -ENOMEM;
496                                 goto out_close;
497                         }
498
499                         /*
500                          * Find the end of the file mapping for this phdr, and
501                          * keep track of the largest address we see for this.
502                          */
503                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
504                         if (k > elf_bss)
505                                 elf_bss = k;
506
507                         /*
508                          * Do the same thing for the memory mapping - between
509                          * elf_bss and last_bss is the bss section.
510                          */
511                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
512                         if (k > last_bss)
513                                 last_bss = k;
514                 }
515         }
516
517         if (last_bss > elf_bss) {
518                 /*
519                  * Now fill out the bss section.  First pad the last page up
520                  * to the page boundary, and then perform a mmap to make sure
521                  * that there are zero-mapped pages up to and including the
522                  * last bss page.
523                  */
524                 if (padzero(elf_bss)) {
525                         error = -EFAULT;
526                         goto out_close;
527                 }
528
529                 /* What we have mapped so far */
530                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
531
532                 /* Map the last of the bss segment */
533                 error = vm_brk(elf_bss, last_bss - elf_bss);
534                 if (BAD_ADDR(error))
535                         goto out_close;
536         }
537
538         error = load_addr;
539
540 out_close:
541         kfree(elf_phdata);
542 out:
543         return error;
544 }
545
546 /*
547  * These are the functions used to load ELF style executables and shared
548  * libraries.  There is no binary dependent code anywhere else.
549  */
550
551 #ifndef STACK_RND_MASK
552 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
553 #endif
554
555 static unsigned long randomize_stack_top(unsigned long stack_top)
556 {
557         unsigned int random_variable = 0;
558
559         if ((current->flags & PF_RANDOMIZE) &&
560                 !(current->personality & ADDR_NO_RANDOMIZE)) {
561                 random_variable = get_random_int() & STACK_RND_MASK;
562                 random_variable <<= PAGE_SHIFT;
563         }
564 #ifdef CONFIG_STACK_GROWSUP
565         return PAGE_ALIGN(stack_top) + random_variable;
566 #else
567         return PAGE_ALIGN(stack_top) - random_variable;
568 #endif
569 }
570
571 static int load_elf_binary(struct linux_binprm *bprm)
572 {
573         struct file *interpreter = NULL; /* to shut gcc up */
574         unsigned long load_addr = 0, load_bias = 0;
575         int load_addr_set = 0;
576         char * elf_interpreter = NULL;
577         unsigned long error;
578         struct elf_phdr *elf_ppnt, *elf_phdata;
579         unsigned long elf_bss, elf_brk;
580         int retval, i;
581         unsigned int size;
582         unsigned long elf_entry;
583         unsigned long interp_load_addr = 0;
584         unsigned long start_code, end_code, start_data, end_data;
585         unsigned long reloc_func_desc __maybe_unused = 0;
586         int executable_stack = EXSTACK_DEFAULT;
587         unsigned long def_flags = 0;
588         struct pt_regs *regs = current_pt_regs();
589         struct {
590                 struct elfhdr elf_ex;
591                 struct elfhdr interp_elf_ex;
592         } *loc;
593
594         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
595         if (!loc) {
596                 retval = -ENOMEM;
597                 goto out_ret;
598         }
599         
600         /* Get the exec-header */
601         loc->elf_ex = *((struct elfhdr *)bprm->buf);
602
603         retval = -ENOEXEC;
604         /* First of all, some simple consistency checks */
605         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
606                 goto out;
607
608         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
609                 goto out;
610         if (!elf_check_arch(&loc->elf_ex))
611                 goto out;
612         if (!bprm->file->f_op->mmap)
613                 goto out;
614
615         /* Now read in all of the header information */
616         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
617                 goto out;
618         if (loc->elf_ex.e_phnum < 1 ||
619                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
620                 goto out;
621         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
622         retval = -ENOMEM;
623         elf_phdata = kmalloc(size, GFP_KERNEL);
624         if (!elf_phdata)
625                 goto out;
626
627         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
628                              (char *)elf_phdata, size);
629         if (retval != size) {
630                 if (retval >= 0)
631                         retval = -EIO;
632                 goto out_free_ph;
633         }
634
635         elf_ppnt = elf_phdata;
636         elf_bss = 0;
637         elf_brk = 0;
638
639         start_code = ~0UL;
640         end_code = 0;
641         start_data = 0;
642         end_data = 0;
643
644         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
645                 if (elf_ppnt->p_type == PT_INTERP) {
646                         /* This is the program interpreter used for
647                          * shared libraries - for now assume that this
648                          * is an a.out format binary
649                          */
650                         retval = -ENOEXEC;
651                         if (elf_ppnt->p_filesz > PATH_MAX || 
652                             elf_ppnt->p_filesz < 2)
653                                 goto out_free_ph;
654
655                         retval = -ENOMEM;
656                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
657                                                   GFP_KERNEL);
658                         if (!elf_interpreter)
659                                 goto out_free_ph;
660
661                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
662                                              elf_interpreter,
663                                              elf_ppnt->p_filesz);
664                         if (retval != elf_ppnt->p_filesz) {
665                                 if (retval >= 0)
666                                         retval = -EIO;
667                                 goto out_free_interp;
668                         }
669                         /* make sure path is NULL terminated */
670                         retval = -ENOEXEC;
671                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
672                                 goto out_free_interp;
673
674                         interpreter = open_exec(elf_interpreter);
675                         retval = PTR_ERR(interpreter);
676                         if (IS_ERR(interpreter))
677                                 goto out_free_interp;
678
679                         /*
680                          * If the binary is not readable then enforce
681                          * mm->dumpable = 0 regardless of the interpreter's
682                          * permissions.
683                          */
684                         would_dump(bprm, interpreter);
685
686                         retval = kernel_read(interpreter, 0, bprm->buf,
687                                              BINPRM_BUF_SIZE);
688                         if (retval != BINPRM_BUF_SIZE) {
689                                 if (retval >= 0)
690                                         retval = -EIO;
691                                 goto out_free_dentry;
692                         }
693
694                         /* Get the exec headers */
695                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
696                         break;
697                 }
698                 elf_ppnt++;
699         }
700
701         elf_ppnt = elf_phdata;
702         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
703                 if (elf_ppnt->p_type == PT_GNU_STACK) {
704                         if (elf_ppnt->p_flags & PF_X)
705                                 executable_stack = EXSTACK_ENABLE_X;
706                         else
707                                 executable_stack = EXSTACK_DISABLE_X;
708                         break;
709                 }
710
711         /* Some simple consistency checks for the interpreter */
712         if (elf_interpreter) {
713                 retval = -ELIBBAD;
714                 /* Not an ELF interpreter */
715                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
716                         goto out_free_dentry;
717                 /* Verify the interpreter has a valid arch */
718                 if (!elf_check_arch(&loc->interp_elf_ex))
719                         goto out_free_dentry;
720         }
721
722         /* Flush all traces of the currently running executable */
723         retval = flush_old_exec(bprm);
724         if (retval)
725                 goto out_free_dentry;
726
727         /* OK, This is the point of no return */
728         current->mm->def_flags = def_flags;
729
730         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
731            may depend on the personality.  */
732         SET_PERSONALITY(loc->elf_ex);
733         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
734                 current->personality |= READ_IMPLIES_EXEC;
735
736         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
737                 current->flags |= PF_RANDOMIZE;
738
739         setup_new_exec(bprm);
740
741         /* Do this so that we can load the interpreter, if need be.  We will
742            change some of these later */
743         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
744                                  executable_stack);
745         if (retval < 0) {
746                 send_sig(SIGKILL, current, 0);
747                 goto out_free_dentry;
748         }
749         
750         current->mm->start_stack = bprm->p;
751
752         /* Now we do a little grungy work by mmapping the ELF image into
753            the correct location in memory. */
754         for(i = 0, elf_ppnt = elf_phdata;
755             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
756                 int elf_prot = 0, elf_flags;
757                 unsigned long k, vaddr;
758
759                 if (elf_ppnt->p_type != PT_LOAD)
760                         continue;
761
762                 if (unlikely (elf_brk > elf_bss)) {
763                         unsigned long nbyte;
764                     
765                         /* There was a PT_LOAD segment with p_memsz > p_filesz
766                            before this one. Map anonymous pages, if needed,
767                            and clear the area.  */
768                         retval = set_brk(elf_bss + load_bias,
769                                          elf_brk + load_bias);
770                         if (retval) {
771                                 send_sig(SIGKILL, current, 0);
772                                 goto out_free_dentry;
773                         }
774                         nbyte = ELF_PAGEOFFSET(elf_bss);
775                         if (nbyte) {
776                                 nbyte = ELF_MIN_ALIGN - nbyte;
777                                 if (nbyte > elf_brk - elf_bss)
778                                         nbyte = elf_brk - elf_bss;
779                                 if (clear_user((void __user *)elf_bss +
780                                                         load_bias, nbyte)) {
781                                         /*
782                                          * This bss-zeroing can fail if the ELF
783                                          * file specifies odd protections. So
784                                          * we don't check the return value
785                                          */
786                                 }
787                         }
788                 }
789
790                 if (elf_ppnt->p_flags & PF_R)
791                         elf_prot |= PROT_READ;
792                 if (elf_ppnt->p_flags & PF_W)
793                         elf_prot |= PROT_WRITE;
794                 if (elf_ppnt->p_flags & PF_X)
795                         elf_prot |= PROT_EXEC;
796
797                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
798
799                 vaddr = elf_ppnt->p_vaddr;
800                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
801                         elf_flags |= MAP_FIXED;
802                 } else if (loc->elf_ex.e_type == ET_DYN) {
803                         /* Try and get dynamic programs out of the way of the
804                          * default mmap base, as well as whatever program they
805                          * might try to exec.  This is because the brk will
806                          * follow the loader, and is not movable.  */
807 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
808                         /* Memory randomization might have been switched off
809                          * in runtime via sysctl or explicit setting of
810                          * personality flags.
811                          * If that is the case, retain the original non-zero
812                          * load_bias value in order to establish proper
813                          * non-randomized mappings.
814                          */
815                         if (current->flags & PF_RANDOMIZE)
816                                 load_bias = 0;
817                         else
818                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
819 #else
820                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
821 #endif
822                 }
823
824                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
825                                 elf_prot, elf_flags, 0);
826                 if (BAD_ADDR(error)) {
827                         send_sig(SIGKILL, current, 0);
828                         retval = IS_ERR((void *)error) ?
829                                 PTR_ERR((void*)error) : -EINVAL;
830                         goto out_free_dentry;
831                 }
832
833                 if (!load_addr_set) {
834                         load_addr_set = 1;
835                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
836                         if (loc->elf_ex.e_type == ET_DYN) {
837                                 load_bias += error -
838                                              ELF_PAGESTART(load_bias + vaddr);
839                                 load_addr += load_bias;
840                                 reloc_func_desc = load_bias;
841                         }
842                 }
843                 k = elf_ppnt->p_vaddr;
844                 if (k < start_code)
845                         start_code = k;
846                 if (start_data < k)
847                         start_data = k;
848
849                 /*
850                  * Check to see if the section's size will overflow the
851                  * allowed task size. Note that p_filesz must always be
852                  * <= p_memsz so it is only necessary to check p_memsz.
853                  */
854                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
855                     elf_ppnt->p_memsz > TASK_SIZE ||
856                     TASK_SIZE - elf_ppnt->p_memsz < k) {
857                         /* set_brk can never work. Avoid overflows. */
858                         send_sig(SIGKILL, current, 0);
859                         retval = -EINVAL;
860                         goto out_free_dentry;
861                 }
862
863                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
864
865                 if (k > elf_bss)
866                         elf_bss = k;
867                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
868                         end_code = k;
869                 if (end_data < k)
870                         end_data = k;
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
872                 if (k > elf_brk)
873                         elf_brk = k;
874         }
875
876         loc->elf_ex.e_entry += load_bias;
877         elf_bss += load_bias;
878         elf_brk += load_bias;
879         start_code += load_bias;
880         end_code += load_bias;
881         start_data += load_bias;
882         end_data += load_bias;
883
884         /* Calling set_brk effectively mmaps the pages that we need
885          * for the bss and break sections.  We must do this before
886          * mapping in the interpreter, to make sure it doesn't wind
887          * up getting placed where the bss needs to go.
888          */
889         retval = set_brk(elf_bss, elf_brk);
890         if (retval) {
891                 send_sig(SIGKILL, current, 0);
892                 goto out_free_dentry;
893         }
894         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
895                 send_sig(SIGSEGV, current, 0);
896                 retval = -EFAULT; /* Nobody gets to see this, but.. */
897                 goto out_free_dentry;
898         }
899
900         if (elf_interpreter) {
901                 unsigned long interp_map_addr = 0;
902
903                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
904                                             interpreter,
905                                             &interp_map_addr,
906                                             load_bias);
907                 if (!IS_ERR((void *)elf_entry)) {
908                         /*
909                          * load_elf_interp() returns relocation
910                          * adjustment
911                          */
912                         interp_load_addr = elf_entry;
913                         elf_entry += loc->interp_elf_ex.e_entry;
914                 }
915                 if (BAD_ADDR(elf_entry)) {
916                         force_sig(SIGSEGV, current);
917                         retval = IS_ERR((void *)elf_entry) ?
918                                         (int)elf_entry : -EINVAL;
919                         goto out_free_dentry;
920                 }
921                 reloc_func_desc = interp_load_addr;
922
923                 allow_write_access(interpreter);
924                 fput(interpreter);
925                 kfree(elf_interpreter);
926         } else {
927                 elf_entry = loc->elf_ex.e_entry;
928                 if (BAD_ADDR(elf_entry)) {
929                         force_sig(SIGSEGV, current);
930                         retval = -EINVAL;
931                         goto out_free_dentry;
932                 }
933         }
934
935         kfree(elf_phdata);
936
937         set_binfmt(&elf_format);
938
939 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
940         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
941         if (retval < 0) {
942                 send_sig(SIGKILL, current, 0);
943                 goto out;
944         }
945 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
946
947         install_exec_creds(bprm);
948         retval = create_elf_tables(bprm, &loc->elf_ex,
949                           load_addr, interp_load_addr);
950         if (retval < 0) {
951                 send_sig(SIGKILL, current, 0);
952                 goto out;
953         }
954         /* N.B. passed_fileno might not be initialized? */
955         current->mm->end_code = end_code;
956         current->mm->start_code = start_code;
957         current->mm->start_data = start_data;
958         current->mm->end_data = end_data;
959         current->mm->start_stack = bprm->p;
960
961 #ifdef arch_randomize_brk
962         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
963                 current->mm->brk = current->mm->start_brk =
964                         arch_randomize_brk(current->mm);
965 #ifdef CONFIG_COMPAT_BRK
966                 current->brk_randomized = 1;
967 #endif
968         }
969 #endif
970
971         if (current->personality & MMAP_PAGE_ZERO) {
972                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
973                    and some applications "depend" upon this behavior.
974                    Since we do not have the power to recompile these, we
975                    emulate the SVr4 behavior. Sigh. */
976                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
977                                 MAP_FIXED | MAP_PRIVATE, 0);
978         }
979
980 #ifdef ELF_PLAT_INIT
981         /*
982          * The ABI may specify that certain registers be set up in special
983          * ways (on i386 %edx is the address of a DT_FINI function, for
984          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
985          * that the e_entry field is the address of the function descriptor
986          * for the startup routine, rather than the address of the startup
987          * routine itself.  This macro performs whatever initialization to
988          * the regs structure is required as well as any relocations to the
989          * function descriptor entries when executing dynamically links apps.
990          */
991         ELF_PLAT_INIT(regs, reloc_func_desc);
992 #endif
993
994         start_thread(regs, elf_entry, bprm->p);
995         retval = 0;
996 out:
997         kfree(loc);
998 out_ret:
999         return retval;
1000
1001         /* error cleanup */
1002 out_free_dentry:
1003         allow_write_access(interpreter);
1004         if (interpreter)
1005                 fput(interpreter);
1006 out_free_interp:
1007         kfree(elf_interpreter);
1008 out_free_ph:
1009         kfree(elf_phdata);
1010         goto out;
1011 }
1012
1013 #ifdef CONFIG_USELIB
1014 /* This is really simpleminded and specialized - we are loading an
1015    a.out library that is given an ELF header. */
1016 static int load_elf_library(struct file *file)
1017 {
1018         struct elf_phdr *elf_phdata;
1019         struct elf_phdr *eppnt;
1020         unsigned long elf_bss, bss, len;
1021         int retval, error, i, j;
1022         struct elfhdr elf_ex;
1023
1024         error = -ENOEXEC;
1025         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1026         if (retval != sizeof(elf_ex))
1027                 goto out;
1028
1029         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1030                 goto out;
1031
1032         /* First of all, some simple consistency checks */
1033         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1034             !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1035                 goto out;
1036
1037         /* Now read in all of the header information */
1038
1039         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1040         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1041
1042         error = -ENOMEM;
1043         elf_phdata = kmalloc(j, GFP_KERNEL);
1044         if (!elf_phdata)
1045                 goto out;
1046
1047         eppnt = elf_phdata;
1048         error = -ENOEXEC;
1049         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1050         if (retval != j)
1051                 goto out_free_ph;
1052
1053         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1054                 if ((eppnt + i)->p_type == PT_LOAD)
1055                         j++;
1056         if (j != 1)
1057                 goto out_free_ph;
1058
1059         while (eppnt->p_type != PT_LOAD)
1060                 eppnt++;
1061
1062         /* Now use mmap to map the library into memory. */
1063         error = vm_mmap(file,
1064                         ELF_PAGESTART(eppnt->p_vaddr),
1065                         (eppnt->p_filesz +
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1067                         PROT_READ | PROT_WRITE | PROT_EXEC,
1068                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1069                         (eppnt->p_offset -
1070                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1071         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1072                 goto out_free_ph;
1073
1074         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1075         if (padzero(elf_bss)) {
1076                 error = -EFAULT;
1077                 goto out_free_ph;
1078         }
1079
1080         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1081                             ELF_MIN_ALIGN - 1);
1082         bss = eppnt->p_memsz + eppnt->p_vaddr;
1083         if (bss > len)
1084                 vm_brk(len, bss - len);
1085         error = 0;
1086
1087 out_free_ph:
1088         kfree(elf_phdata);
1089 out:
1090         return error;
1091 }
1092 #endif /* #ifdef CONFIG_USELIB */
1093
1094 #ifdef CONFIG_ELF_CORE
1095 /*
1096  * ELF core dumper
1097  *
1098  * Modelled on fs/exec.c:aout_core_dump()
1099  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1100  */
1101
1102 /*
1103  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1104  * that are useful for post-mortem analysis are included in every core dump.
1105  * In that way we ensure that the core dump is fully interpretable later
1106  * without matching up the same kernel and hardware config to see what PC values
1107  * meant. These special mappings include - vDSO, vsyscall, and other
1108  * architecture specific mappings
1109  */
1110 static bool always_dump_vma(struct vm_area_struct *vma)
1111 {
1112         /* Any vsyscall mappings? */
1113         if (vma == get_gate_vma(vma->vm_mm))
1114                 return true;
1115         /*
1116          * arch_vma_name() returns non-NULL for special architecture mappings,
1117          * such as vDSO sections.
1118          */
1119         if (arch_vma_name(vma))
1120                 return true;
1121
1122         return false;
1123 }
1124
1125 /*
1126  * Decide what to dump of a segment, part, all or none.
1127  */
1128 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1129                                    unsigned long mm_flags)
1130 {
1131 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1132
1133         /* always dump the vdso and vsyscall sections */
1134         if (always_dump_vma(vma))
1135                 goto whole;
1136
1137         if (vma->vm_flags & VM_DONTDUMP)
1138                 return 0;
1139
1140         /* Hugetlb memory check */
1141         if (vma->vm_flags & VM_HUGETLB) {
1142                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1143                         goto whole;
1144                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1145                         goto whole;
1146                 return 0;
1147         }
1148
1149         /* Do not dump I/O mapped devices or special mappings */
1150         if (vma->vm_flags & VM_IO)
1151                 return 0;
1152
1153         /* By default, dump shared memory if mapped from an anonymous file. */
1154         if (vma->vm_flags & VM_SHARED) {
1155                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1156                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1157                         goto whole;
1158                 return 0;
1159         }
1160
1161         /* Dump segments that have been written to.  */
1162         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1163                 goto whole;
1164         if (vma->vm_file == NULL)
1165                 return 0;
1166
1167         if (FILTER(MAPPED_PRIVATE))
1168                 goto whole;
1169
1170         /*
1171          * If this looks like the beginning of a DSO or executable mapping,
1172          * check for an ELF header.  If we find one, dump the first page to
1173          * aid in determining what was mapped here.
1174          */
1175         if (FILTER(ELF_HEADERS) &&
1176             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1177                 u32 __user *header = (u32 __user *) vma->vm_start;
1178                 u32 word;
1179                 mm_segment_t fs = get_fs();
1180                 /*
1181                  * Doing it this way gets the constant folded by GCC.
1182                  */
1183                 union {
1184                         u32 cmp;
1185                         char elfmag[SELFMAG];
1186                 } magic;
1187                 BUILD_BUG_ON(SELFMAG != sizeof word);
1188                 magic.elfmag[EI_MAG0] = ELFMAG0;
1189                 magic.elfmag[EI_MAG1] = ELFMAG1;
1190                 magic.elfmag[EI_MAG2] = ELFMAG2;
1191                 magic.elfmag[EI_MAG3] = ELFMAG3;
1192                 /*
1193                  * Switch to the user "segment" for get_user(),
1194                  * then put back what elf_core_dump() had in place.
1195                  */
1196                 set_fs(USER_DS);
1197                 if (unlikely(get_user(word, header)))
1198                         word = 0;
1199                 set_fs(fs);
1200                 if (word == magic.cmp)
1201                         return PAGE_SIZE;
1202         }
1203
1204 #undef  FILTER
1205
1206         return 0;
1207
1208 whole:
1209         return vma->vm_end - vma->vm_start;
1210 }
1211
1212 /* An ELF note in memory */
1213 struct memelfnote
1214 {
1215         const char *name;
1216         int type;
1217         unsigned int datasz;
1218         void *data;
1219 };
1220
1221 static int notesize(struct memelfnote *en)
1222 {
1223         int sz;
1224
1225         sz = sizeof(struct elf_note);
1226         sz += roundup(strlen(en->name) + 1, 4);
1227         sz += roundup(en->datasz, 4);
1228
1229         return sz;
1230 }
1231
1232 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1233 {
1234         struct elf_note en;
1235         en.n_namesz = strlen(men->name) + 1;
1236         en.n_descsz = men->datasz;
1237         en.n_type = men->type;
1238
1239         return dump_emit(cprm, &en, sizeof(en)) &&
1240             dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1241             dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1242 }
1243
1244 static void fill_elf_header(struct elfhdr *elf, int segs,
1245                             u16 machine, u32 flags)
1246 {
1247         memset(elf, 0, sizeof(*elf));
1248
1249         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1250         elf->e_ident[EI_CLASS] = ELF_CLASS;
1251         elf->e_ident[EI_DATA] = ELF_DATA;
1252         elf->e_ident[EI_VERSION] = EV_CURRENT;
1253         elf->e_ident[EI_OSABI] = ELF_OSABI;
1254
1255         elf->e_type = ET_CORE;
1256         elf->e_machine = machine;
1257         elf->e_version = EV_CURRENT;
1258         elf->e_phoff = sizeof(struct elfhdr);
1259         elf->e_flags = flags;
1260         elf->e_ehsize = sizeof(struct elfhdr);
1261         elf->e_phentsize = sizeof(struct elf_phdr);
1262         elf->e_phnum = segs;
1263
1264         return;
1265 }
1266
1267 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1268 {
1269         phdr->p_type = PT_NOTE;
1270         phdr->p_offset = offset;
1271         phdr->p_vaddr = 0;
1272         phdr->p_paddr = 0;
1273         phdr->p_filesz = sz;
1274         phdr->p_memsz = 0;
1275         phdr->p_flags = 0;
1276         phdr->p_align = 0;
1277         return;
1278 }
1279
1280 static void fill_note(struct memelfnote *note, const char *name, int type, 
1281                 unsigned int sz, void *data)
1282 {
1283         note->name = name;
1284         note->type = type;
1285         note->datasz = sz;
1286         note->data = data;
1287         return;
1288 }
1289
1290 /*
1291  * fill up all the fields in prstatus from the given task struct, except
1292  * registers which need to be filled up separately.
1293  */
1294 static void fill_prstatus(struct elf_prstatus *prstatus,
1295                 struct task_struct *p, long signr)
1296 {
1297         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1298         prstatus->pr_sigpend = p->pending.signal.sig[0];
1299         prstatus->pr_sighold = p->blocked.sig[0];
1300         rcu_read_lock();
1301         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1302         rcu_read_unlock();
1303         prstatus->pr_pid = task_pid_vnr(p);
1304         prstatus->pr_pgrp = task_pgrp_vnr(p);
1305         prstatus->pr_sid = task_session_vnr(p);
1306         if (thread_group_leader(p)) {
1307                 struct task_cputime cputime;
1308
1309                 /*
1310                  * This is the record for the group leader.  It shows the
1311                  * group-wide total, not its individual thread total.
1312                  */
1313                 thread_group_cputime(p, &cputime);
1314                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1315                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1316         } else {
1317                 cputime_t utime, stime;
1318
1319                 task_cputime(p, &utime, &stime);
1320                 cputime_to_timeval(utime, &prstatus->pr_utime);
1321                 cputime_to_timeval(stime, &prstatus->pr_stime);
1322         }
1323         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1324         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1325 }
1326
1327 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1328                        struct mm_struct *mm)
1329 {
1330         const struct cred *cred;
1331         unsigned int i, len;
1332         
1333         /* first copy the parameters from user space */
1334         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1335
1336         len = mm->arg_end - mm->arg_start;
1337         if (len >= ELF_PRARGSZ)
1338                 len = ELF_PRARGSZ-1;
1339         if (copy_from_user(&psinfo->pr_psargs,
1340                            (const char __user *)mm->arg_start, len))
1341                 return -EFAULT;
1342         for(i = 0; i < len; i++)
1343                 if (psinfo->pr_psargs[i] == 0)
1344                         psinfo->pr_psargs[i] = ' ';
1345         psinfo->pr_psargs[len] = 0;
1346
1347         rcu_read_lock();
1348         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1349         rcu_read_unlock();
1350         psinfo->pr_pid = task_pid_vnr(p);
1351         psinfo->pr_pgrp = task_pgrp_vnr(p);
1352         psinfo->pr_sid = task_session_vnr(p);
1353
1354         i = p->state ? ffz(~p->state) + 1 : 0;
1355         psinfo->pr_state = i;
1356         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1357         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1358         psinfo->pr_nice = task_nice(p);
1359         psinfo->pr_flag = p->flags;
1360         rcu_read_lock();
1361         cred = __task_cred(p);
1362         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1363         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1364         rcu_read_unlock();
1365         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1366         
1367         return 0;
1368 }
1369
1370 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1371 {
1372         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1373         int i = 0;
1374         do
1375                 i += 2;
1376         while (auxv[i - 2] != AT_NULL);
1377         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1378 }
1379
1380 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1381                 const siginfo_t *siginfo)
1382 {
1383         mm_segment_t old_fs = get_fs();
1384         set_fs(KERNEL_DS);
1385         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1386         set_fs(old_fs);
1387         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1388 }
1389
1390 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1391 /*
1392  * Format of NT_FILE note:
1393  *
1394  * long count     -- how many files are mapped
1395  * long page_size -- units for file_ofs
1396  * array of [COUNT] elements of
1397  *   long start
1398  *   long end
1399  *   long file_ofs
1400  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1401  */
1402 static int fill_files_note(struct memelfnote *note)
1403 {
1404         struct vm_area_struct *vma;
1405         unsigned count, size, names_ofs, remaining, n;
1406         user_long_t *data;
1407         user_long_t *start_end_ofs;
1408         char *name_base, *name_curpos;
1409
1410         /* *Estimated* file count and total data size needed */
1411         count = current->mm->map_count;
1412         size = count * 64;
1413
1414         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1415  alloc:
1416         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1417                 return -EINVAL;
1418         size = round_up(size, PAGE_SIZE);
1419         data = vmalloc(size);
1420         if (!data)
1421                 return -ENOMEM;
1422
1423         start_end_ofs = data + 2;
1424         name_base = name_curpos = ((char *)data) + names_ofs;
1425         remaining = size - names_ofs;
1426         count = 0;
1427         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1428                 struct file *file;
1429                 const char *filename;
1430
1431                 file = vma->vm_file;
1432                 if (!file)
1433                         continue;
1434                 filename = d_path(&file->f_path, name_curpos, remaining);
1435                 if (IS_ERR(filename)) {
1436                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1437                                 vfree(data);
1438                                 size = size * 5 / 4;
1439                                 goto alloc;
1440                         }
1441                         continue;
1442                 }
1443
1444                 /* d_path() fills at the end, move name down */
1445                 /* n = strlen(filename) + 1: */
1446                 n = (name_curpos + remaining) - filename;
1447                 remaining = filename - name_curpos;
1448                 memmove(name_curpos, filename, n);
1449                 name_curpos += n;
1450
1451                 *start_end_ofs++ = vma->vm_start;
1452                 *start_end_ofs++ = vma->vm_end;
1453                 *start_end_ofs++ = vma->vm_pgoff;
1454                 count++;
1455         }
1456
1457         /* Now we know exact count of files, can store it */
1458         data[0] = count;
1459         data[1] = PAGE_SIZE;
1460         /*
1461          * Count usually is less than current->mm->map_count,
1462          * we need to move filenames down.
1463          */
1464         n = current->mm->map_count - count;
1465         if (n != 0) {
1466                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1467                 memmove(name_base - shift_bytes, name_base,
1468                         name_curpos - name_base);
1469                 name_curpos -= shift_bytes;
1470         }
1471
1472         size = name_curpos - (char *)data;
1473         fill_note(note, "CORE", NT_FILE, size, data);
1474         return 0;
1475 }
1476
1477 #ifdef CORE_DUMP_USE_REGSET
1478 #include <linux/regset.h>
1479
1480 struct elf_thread_core_info {
1481         struct elf_thread_core_info *next;
1482         struct task_struct *task;
1483         struct elf_prstatus prstatus;
1484         struct memelfnote notes[0];
1485 };
1486
1487 struct elf_note_info {
1488         struct elf_thread_core_info *thread;
1489         struct memelfnote psinfo;
1490         struct memelfnote signote;
1491         struct memelfnote auxv;
1492         struct memelfnote files;
1493         user_siginfo_t csigdata;
1494         size_t size;
1495         int thread_notes;
1496 };
1497
1498 /*
1499  * When a regset has a writeback hook, we call it on each thread before
1500  * dumping user memory.  On register window machines, this makes sure the
1501  * user memory backing the register data is up to date before we read it.
1502  */
1503 static void do_thread_regset_writeback(struct task_struct *task,
1504                                        const struct user_regset *regset)
1505 {
1506         if (regset->writeback)
1507                 regset->writeback(task, regset, 1);
1508 }
1509
1510 #ifndef PR_REG_SIZE
1511 #define PR_REG_SIZE(S) sizeof(S)
1512 #endif
1513
1514 #ifndef PRSTATUS_SIZE
1515 #define PRSTATUS_SIZE(S) sizeof(S)
1516 #endif
1517
1518 #ifndef PR_REG_PTR
1519 #define PR_REG_PTR(S) (&((S)->pr_reg))
1520 #endif
1521
1522 #ifndef SET_PR_FPVALID
1523 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1524 #endif
1525
1526 static int fill_thread_core_info(struct elf_thread_core_info *t,
1527                                  const struct user_regset_view *view,
1528                                  long signr, size_t *total)
1529 {
1530         unsigned int i;
1531
1532         /*
1533          * NT_PRSTATUS is the one special case, because the regset data
1534          * goes into the pr_reg field inside the note contents, rather
1535          * than being the whole note contents.  We fill the reset in here.
1536          * We assume that regset 0 is NT_PRSTATUS.
1537          */
1538         fill_prstatus(&t->prstatus, t->task, signr);
1539         (void) view->regsets[0].get(t->task, &view->regsets[0],
1540                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1541                                     PR_REG_PTR(&t->prstatus), NULL);
1542
1543         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1544                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1545         *total += notesize(&t->notes[0]);
1546
1547         do_thread_regset_writeback(t->task, &view->regsets[0]);
1548
1549         /*
1550          * Each other regset might generate a note too.  For each regset
1551          * that has no core_note_type or is inactive, we leave t->notes[i]
1552          * all zero and we'll know to skip writing it later.
1553          */
1554         for (i = 1; i < view->n; ++i) {
1555                 const struct user_regset *regset = &view->regsets[i];
1556                 do_thread_regset_writeback(t->task, regset);
1557                 if (regset->core_note_type && regset->get &&
1558                     (!regset->active || regset->active(t->task, regset))) {
1559                         int ret;
1560                         size_t size = regset->n * regset->size;
1561                         void *data = kmalloc(size, GFP_KERNEL);
1562                         if (unlikely(!data))
1563                                 return 0;
1564                         ret = regset->get(t->task, regset,
1565                                           0, size, data, NULL);
1566                         if (unlikely(ret))
1567                                 kfree(data);
1568                         else {
1569                                 if (regset->core_note_type != NT_PRFPREG)
1570                                         fill_note(&t->notes[i], "LINUX",
1571                                                   regset->core_note_type,
1572                                                   size, data);
1573                                 else {
1574                                         SET_PR_FPVALID(&t->prstatus, 1);
1575                                         fill_note(&t->notes[i], "CORE",
1576                                                   NT_PRFPREG, size, data);
1577                                 }
1578                                 *total += notesize(&t->notes[i]);
1579                         }
1580                 }
1581         }
1582
1583         return 1;
1584 }
1585
1586 static int fill_note_info(struct elfhdr *elf, int phdrs,
1587                           struct elf_note_info *info,
1588                           const siginfo_t *siginfo, struct pt_regs *regs)
1589 {
1590         struct task_struct *dump_task = current;
1591         const struct user_regset_view *view = task_user_regset_view(dump_task);
1592         struct elf_thread_core_info *t;
1593         struct elf_prpsinfo *psinfo;
1594         struct core_thread *ct;
1595         unsigned int i;
1596
1597         info->size = 0;
1598         info->thread = NULL;
1599
1600         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1601         if (psinfo == NULL) {
1602                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1603                 return 0;
1604         }
1605
1606         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1607
1608         /*
1609          * Figure out how many notes we're going to need for each thread.
1610          */
1611         info->thread_notes = 0;
1612         for (i = 0; i < view->n; ++i)
1613                 if (view->regsets[i].core_note_type != 0)
1614                         ++info->thread_notes;
1615
1616         /*
1617          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1618          * since it is our one special case.
1619          */
1620         if (unlikely(info->thread_notes == 0) ||
1621             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1622                 WARN_ON(1);
1623                 return 0;
1624         }
1625
1626         /*
1627          * Initialize the ELF file header.
1628          */
1629         fill_elf_header(elf, phdrs,
1630                         view->e_machine, view->e_flags);
1631
1632         /*
1633          * Allocate a structure for each thread.
1634          */
1635         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1636                 t = kzalloc(offsetof(struct elf_thread_core_info,
1637                                      notes[info->thread_notes]),
1638                             GFP_KERNEL);
1639                 if (unlikely(!t))
1640                         return 0;
1641
1642                 t->task = ct->task;
1643                 if (ct->task == dump_task || !info->thread) {
1644                         t->next = info->thread;
1645                         info->thread = t;
1646                 } else {
1647                         /*
1648                          * Make sure to keep the original task at
1649                          * the head of the list.
1650                          */
1651                         t->next = info->thread->next;
1652                         info->thread->next = t;
1653                 }
1654         }
1655
1656         /*
1657          * Now fill in each thread's information.
1658          */
1659         for (t = info->thread; t != NULL; t = t->next)
1660                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1661                         return 0;
1662
1663         /*
1664          * Fill in the two process-wide notes.
1665          */
1666         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1667         info->size += notesize(&info->psinfo);
1668
1669         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1670         info->size += notesize(&info->signote);
1671
1672         fill_auxv_note(&info->auxv, current->mm);
1673         info->size += notesize(&info->auxv);
1674
1675         if (fill_files_note(&info->files) == 0)
1676                 info->size += notesize(&info->files);
1677
1678         return 1;
1679 }
1680
1681 static size_t get_note_info_size(struct elf_note_info *info)
1682 {
1683         return info->size;
1684 }
1685
1686 /*
1687  * Write all the notes for each thread.  When writing the first thread, the
1688  * process-wide notes are interleaved after the first thread-specific note.
1689  */
1690 static int write_note_info(struct elf_note_info *info,
1691                            struct coredump_params *cprm)
1692 {
1693         bool first = 1;
1694         struct elf_thread_core_info *t = info->thread;
1695
1696         do {
1697                 int i;
1698
1699                 if (!writenote(&t->notes[0], cprm))
1700                         return 0;
1701
1702                 if (first && !writenote(&info->psinfo, cprm))
1703                         return 0;
1704                 if (first && !writenote(&info->signote, cprm))
1705                         return 0;
1706                 if (first && !writenote(&info->auxv, cprm))
1707                         return 0;
1708                 if (first && info->files.data &&
1709                                 !writenote(&info->files, cprm))
1710                         return 0;
1711
1712                 for (i = 1; i < info->thread_notes; ++i)
1713                         if (t->notes[i].data &&
1714                             !writenote(&t->notes[i], cprm))
1715                                 return 0;
1716
1717                 first = 0;
1718                 t = t->next;
1719         } while (t);
1720
1721         return 1;
1722 }
1723
1724 static void free_note_info(struct elf_note_info *info)
1725 {
1726         struct elf_thread_core_info *threads = info->thread;
1727         while (threads) {
1728                 unsigned int i;
1729                 struct elf_thread_core_info *t = threads;
1730                 threads = t->next;
1731                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1732                 for (i = 1; i < info->thread_notes; ++i)
1733                         kfree(t->notes[i].data);
1734                 kfree(t);
1735         }
1736         kfree(info->psinfo.data);
1737         vfree(info->files.data);
1738 }
1739
1740 #else
1741
1742 /* Here is the structure in which status of each thread is captured. */
1743 struct elf_thread_status
1744 {
1745         struct list_head list;
1746         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1747         elf_fpregset_t fpu;             /* NT_PRFPREG */
1748         struct task_struct *thread;
1749 #ifdef ELF_CORE_COPY_XFPREGS
1750         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1751 #endif
1752         struct memelfnote notes[3];
1753         int num_notes;
1754 };
1755
1756 /*
1757  * In order to add the specific thread information for the elf file format,
1758  * we need to keep a linked list of every threads pr_status and then create
1759  * a single section for them in the final core file.
1760  */
1761 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1762 {
1763         int sz = 0;
1764         struct task_struct *p = t->thread;
1765         t->num_notes = 0;
1766
1767         fill_prstatus(&t->prstatus, p, signr);
1768         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1769         
1770         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1771                   &(t->prstatus));
1772         t->num_notes++;
1773         sz += notesize(&t->notes[0]);
1774
1775         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1776                                                                 &t->fpu))) {
1777                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1778                           &(t->fpu));
1779                 t->num_notes++;
1780                 sz += notesize(&t->notes[1]);
1781         }
1782
1783 #ifdef ELF_CORE_COPY_XFPREGS
1784         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1785                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1786                           sizeof(t->xfpu), &t->xfpu);
1787                 t->num_notes++;
1788                 sz += notesize(&t->notes[2]);
1789         }
1790 #endif  
1791         return sz;
1792 }
1793
1794 struct elf_note_info {
1795         struct memelfnote *notes;
1796         struct memelfnote *notes_files;
1797         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1798         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1799         struct list_head thread_list;
1800         elf_fpregset_t *fpu;
1801 #ifdef ELF_CORE_COPY_XFPREGS
1802         elf_fpxregset_t *xfpu;
1803 #endif
1804         user_siginfo_t csigdata;
1805         int thread_status_size;
1806         int numnote;
1807 };
1808
1809 static int elf_note_info_init(struct elf_note_info *info)
1810 {
1811         memset(info, 0, sizeof(*info));
1812         INIT_LIST_HEAD(&info->thread_list);
1813
1814         /* Allocate space for ELF notes */
1815         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1816         if (!info->notes)
1817                 return 0;
1818         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1819         if (!info->psinfo)
1820                 return 0;
1821         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1822         if (!info->prstatus)
1823                 return 0;
1824         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1825         if (!info->fpu)
1826                 return 0;
1827 #ifdef ELF_CORE_COPY_XFPREGS
1828         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1829         if (!info->xfpu)
1830                 return 0;
1831 #endif
1832         return 1;
1833 }
1834
1835 static int fill_note_info(struct elfhdr *elf, int phdrs,
1836                           struct elf_note_info *info,
1837                           const siginfo_t *siginfo, struct pt_regs *regs)
1838 {
1839         struct list_head *t;
1840         struct core_thread *ct;
1841         struct elf_thread_status *ets;
1842
1843         if (!elf_note_info_init(info))
1844                 return 0;
1845
1846         for (ct = current->mm->core_state->dumper.next;
1847                                         ct; ct = ct->next) {
1848                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1849                 if (!ets)
1850                         return 0;
1851
1852                 ets->thread = ct->task;
1853                 list_add(&ets->list, &info->thread_list);
1854         }
1855
1856         list_for_each(t, &info->thread_list) {
1857                 int sz;
1858
1859                 ets = list_entry(t, struct elf_thread_status, list);
1860                 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1861                 info->thread_status_size += sz;
1862         }
1863         /* now collect the dump for the current */
1864         memset(info->prstatus, 0, sizeof(*info->prstatus));
1865         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1866         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1867
1868         /* Set up header */
1869         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1870
1871         /*
1872          * Set up the notes in similar form to SVR4 core dumps made
1873          * with info from their /proc.
1874          */
1875
1876         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1877                   sizeof(*info->prstatus), info->prstatus);
1878         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1879         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1880                   sizeof(*info->psinfo), info->psinfo);
1881
1882         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1883         fill_auxv_note(info->notes + 3, current->mm);
1884         info->numnote = 4;
1885
1886         if (fill_files_note(info->notes + info->numnote) == 0) {
1887                 info->notes_files = info->notes + info->numnote;
1888                 info->numnote++;
1889         }
1890
1891         /* Try to dump the FPU. */
1892         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1893                                                                info->fpu);
1894         if (info->prstatus->pr_fpvalid)
1895                 fill_note(info->notes + info->numnote++,
1896                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1897 #ifdef ELF_CORE_COPY_XFPREGS
1898         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1899                 fill_note(info->notes + info->numnote++,
1900                           "LINUX", ELF_CORE_XFPREG_TYPE,
1901                           sizeof(*info->xfpu), info->xfpu);
1902 #endif
1903
1904         return 1;
1905 }
1906
1907 static size_t get_note_info_size(struct elf_note_info *info)
1908 {
1909         int sz = 0;
1910         int i;
1911
1912         for (i = 0; i < info->numnote; i++)
1913                 sz += notesize(info->notes + i);
1914
1915         sz += info->thread_status_size;
1916
1917         return sz;
1918 }
1919
1920 static int write_note_info(struct elf_note_info *info,
1921                            struct coredump_params *cprm)
1922 {
1923         int i;
1924         struct list_head *t;
1925
1926         for (i = 0; i < info->numnote; i++)
1927                 if (!writenote(info->notes + i, cprm))
1928                         return 0;
1929
1930         /* write out the thread status notes section */
1931         list_for_each(t, &info->thread_list) {
1932                 struct elf_thread_status *tmp =
1933                                 list_entry(t, struct elf_thread_status, list);
1934
1935                 for (i = 0; i < tmp->num_notes; i++)
1936                         if (!writenote(&tmp->notes[i], cprm))
1937                                 return 0;
1938         }
1939
1940         return 1;
1941 }
1942
1943 static void free_note_info(struct elf_note_info *info)
1944 {
1945         while (!list_empty(&info->thread_list)) {
1946                 struct list_head *tmp = info->thread_list.next;
1947                 list_del(tmp);
1948                 kfree(list_entry(tmp, struct elf_thread_status, list));
1949         }
1950
1951         /* Free data possibly allocated by fill_files_note(): */
1952         if (info->notes_files)
1953                 vfree(info->notes_files->data);
1954
1955         kfree(info->prstatus);
1956         kfree(info->psinfo);
1957         kfree(info->notes);
1958         kfree(info->fpu);
1959 #ifdef ELF_CORE_COPY_XFPREGS
1960         kfree(info->xfpu);
1961 #endif
1962 }
1963
1964 #endif
1965
1966 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1967                                         struct vm_area_struct *gate_vma)
1968 {
1969         struct vm_area_struct *ret = tsk->mm->mmap;
1970
1971         if (ret)
1972                 return ret;
1973         return gate_vma;
1974 }
1975 /*
1976  * Helper function for iterating across a vma list.  It ensures that the caller
1977  * will visit `gate_vma' prior to terminating the search.
1978  */
1979 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1980                                         struct vm_area_struct *gate_vma)
1981 {
1982         struct vm_area_struct *ret;
1983
1984         ret = this_vma->vm_next;
1985         if (ret)
1986                 return ret;
1987         if (this_vma == gate_vma)
1988                 return NULL;
1989         return gate_vma;
1990 }
1991
1992 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1993                              elf_addr_t e_shoff, int segs)
1994 {
1995         elf->e_shoff = e_shoff;
1996         elf->e_shentsize = sizeof(*shdr4extnum);
1997         elf->e_shnum = 1;
1998         elf->e_shstrndx = SHN_UNDEF;
1999
2000         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2001
2002         shdr4extnum->sh_type = SHT_NULL;
2003         shdr4extnum->sh_size = elf->e_shnum;
2004         shdr4extnum->sh_link = elf->e_shstrndx;
2005         shdr4extnum->sh_info = segs;
2006 }
2007
2008 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2009                                      unsigned long mm_flags)
2010 {
2011         struct vm_area_struct *vma;
2012         size_t size = 0;
2013
2014         for (vma = first_vma(current, gate_vma); vma != NULL;
2015              vma = next_vma(vma, gate_vma))
2016                 size += vma_dump_size(vma, mm_flags);
2017         return size;
2018 }
2019
2020 /*
2021  * Actual dumper
2022  *
2023  * This is a two-pass process; first we find the offsets of the bits,
2024  * and then they are actually written out.  If we run out of core limit
2025  * we just truncate.
2026  */
2027 static int elf_core_dump(struct coredump_params *cprm)
2028 {
2029         int has_dumped = 0;
2030         mm_segment_t fs;
2031         int segs;
2032         struct vm_area_struct *vma, *gate_vma;
2033         struct elfhdr *elf = NULL;
2034         loff_t offset = 0, dataoff;
2035         struct elf_note_info info = { };
2036         struct elf_phdr *phdr4note = NULL;
2037         struct elf_shdr *shdr4extnum = NULL;
2038         Elf_Half e_phnum;
2039         elf_addr_t e_shoff;
2040
2041         /*
2042          * We no longer stop all VM operations.
2043          * 
2044          * This is because those proceses that could possibly change map_count
2045          * or the mmap / vma pages are now blocked in do_exit on current
2046          * finishing this core dump.
2047          *
2048          * Only ptrace can touch these memory addresses, but it doesn't change
2049          * the map_count or the pages allocated. So no possibility of crashing
2050          * exists while dumping the mm->vm_next areas to the core file.
2051          */
2052   
2053         /* alloc memory for large data structures: too large to be on stack */
2054         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2055         if (!elf)
2056                 goto out;
2057         /*
2058          * The number of segs are recored into ELF header as 16bit value.
2059          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2060          */
2061         segs = current->mm->map_count;
2062         segs += elf_core_extra_phdrs();
2063
2064         gate_vma = get_gate_vma(current->mm);
2065         if (gate_vma != NULL)
2066                 segs++;
2067
2068         /* for notes section */
2069         segs++;
2070
2071         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2072          * this, kernel supports extended numbering. Have a look at
2073          * include/linux/elf.h for further information. */
2074         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2075
2076         /*
2077          * Collect all the non-memory information about the process for the
2078          * notes.  This also sets up the file header.
2079          */
2080         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2081                 goto cleanup;
2082
2083         has_dumped = 1;
2084
2085         fs = get_fs();
2086         set_fs(KERNEL_DS);
2087
2088         offset += sizeof(*elf);                         /* Elf header */
2089         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2090
2091         /* Write notes phdr entry */
2092         {
2093                 size_t sz = get_note_info_size(&info);
2094
2095                 sz += elf_coredump_extra_notes_size();
2096
2097                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2098                 if (!phdr4note)
2099                         goto end_coredump;
2100
2101                 fill_elf_note_phdr(phdr4note, sz, offset);
2102                 offset += sz;
2103         }
2104
2105         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2106
2107         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2108         offset += elf_core_extra_data_size();
2109         e_shoff = offset;
2110
2111         if (e_phnum == PN_XNUM) {
2112                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2113                 if (!shdr4extnum)
2114                         goto end_coredump;
2115                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2116         }
2117
2118         offset = dataoff;
2119
2120         if (!dump_emit(cprm, elf, sizeof(*elf)))
2121                 goto end_coredump;
2122
2123         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2124                 goto end_coredump;
2125
2126         /* Write program headers for segments dump */
2127         for (vma = first_vma(current, gate_vma); vma != NULL;
2128                         vma = next_vma(vma, gate_vma)) {
2129                 struct elf_phdr phdr;
2130
2131                 phdr.p_type = PT_LOAD;
2132                 phdr.p_offset = offset;
2133                 phdr.p_vaddr = vma->vm_start;
2134                 phdr.p_paddr = 0;
2135                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2136                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2137                 offset += phdr.p_filesz;
2138                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2139                 if (vma->vm_flags & VM_WRITE)
2140                         phdr.p_flags |= PF_W;
2141                 if (vma->vm_flags & VM_EXEC)
2142                         phdr.p_flags |= PF_X;
2143                 phdr.p_align = ELF_EXEC_PAGESIZE;
2144
2145                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2146                         goto end_coredump;
2147         }
2148
2149         if (!elf_core_write_extra_phdrs(cprm, offset))
2150                 goto end_coredump;
2151
2152         /* write out the notes section */
2153         if (!write_note_info(&info, cprm))
2154                 goto end_coredump;
2155
2156         if (elf_coredump_extra_notes_write(cprm))
2157                 goto end_coredump;
2158
2159         /* Align to page */
2160         if (!dump_skip(cprm, dataoff - cprm->written))
2161                 goto end_coredump;
2162
2163         for (vma = first_vma(current, gate_vma); vma != NULL;
2164                         vma = next_vma(vma, gate_vma)) {
2165                 unsigned long addr;
2166                 unsigned long end;
2167
2168                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2169
2170                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2171                         struct page *page;
2172                         int stop;
2173
2174                         page = get_dump_page(addr);
2175                         if (page) {
2176                                 void *kaddr = kmap(page);
2177                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2178                                 kunmap(page);
2179                                 page_cache_release(page);
2180                         } else
2181                                 stop = !dump_skip(cprm, PAGE_SIZE);
2182                         if (stop)
2183                                 goto end_coredump;
2184                 }
2185         }
2186
2187         if (!elf_core_write_extra_data(cprm))
2188                 goto end_coredump;
2189
2190         if (e_phnum == PN_XNUM) {
2191                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2192                         goto end_coredump;
2193         }
2194
2195 end_coredump:
2196         set_fs(fs);
2197
2198 cleanup:
2199         free_note_info(&info);
2200         kfree(shdr4extnum);
2201         kfree(phdr4note);
2202         kfree(elf);
2203 out:
2204         return has_dumped;
2205 }
2206
2207 #endif          /* CONFIG_ELF_CORE */
2208
2209 static int __init init_elf_binfmt(void)
2210 {
2211         register_binfmt(&elf_format);
2212         return 0;
2213 }
2214
2215 static void __exit exit_elf_binfmt(void)
2216 {
2217         /* Remove the COFF and ELF loaders. */
2218         unregister_binfmt(&elf_format);
2219 }
2220
2221 core_initcall(init_elf_binfmt);
2222 module_exit(exit_elf_binfmt);
2223 MODULE_LICENSE("GPL");