rk312x: add psci support
[firefly-linux-kernel-4.4.55.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247         if (k_platform) {
248                 NEW_AUX_ENT(AT_PLATFORM,
249                             (elf_addr_t)(unsigned long)u_platform);
250         }
251         if (k_base_platform) {
252                 NEW_AUX_ENT(AT_BASE_PLATFORM,
253                             (elf_addr_t)(unsigned long)u_base_platform);
254         }
255         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257         }
258 #undef NEW_AUX_ENT
259         /* AT_NULL is zero; clear the rest too */
260         memset(&elf_info[ei_index], 0,
261                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262
263         /* And advance past the AT_NULL entry.  */
264         ei_index += 2;
265
266         sp = STACK_ADD(p, ei_index);
267
268         items = (argc + 1) + (envc + 1) + 1;
269         bprm->p = STACK_ROUND(sp, items);
270
271         /* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276         sp = (elf_addr_t __user *)bprm->p;
277 #endif
278
279
280         /*
281          * Grow the stack manually; some architectures have a limit on how
282          * far ahead a user-space access may be in order to grow the stack.
283          */
284         vma = find_extend_vma(current->mm, bprm->p);
285         if (!vma)
286                 return -EFAULT;
287
288         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
289         if (__put_user(argc, sp++))
290                 return -EFAULT;
291         argv = sp;
292         envp = argv + argc + 1;
293
294         /* Populate argv and envp */
295         p = current->mm->arg_end = current->mm->arg_start;
296         while (argc-- > 0) {
297                 size_t len;
298                 if (__put_user((elf_addr_t)p, argv++))
299                         return -EFAULT;
300                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301                 if (!len || len > MAX_ARG_STRLEN)
302                         return -EINVAL;
303                 p += len;
304         }
305         if (__put_user(0, argv))
306                 return -EFAULT;
307         current->mm->arg_end = current->mm->env_start = p;
308         while (envc-- > 0) {
309                 size_t len;
310                 if (__put_user((elf_addr_t)p, envp++))
311                         return -EFAULT;
312                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313                 if (!len || len > MAX_ARG_STRLEN)
314                         return -EINVAL;
315                 p += len;
316         }
317         if (__put_user(0, envp))
318                 return -EFAULT;
319         current->mm->env_end = p;
320
321         /* Put the elf_info on the stack in the right place.  */
322         sp = (elf_addr_t __user *)envp + 1;
323         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324                 return -EFAULT;
325         return 0;
326 }
327
328 #ifndef elf_map
329
330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331                 struct elf_phdr *eppnt, int prot, int type,
332                 unsigned long total_size)
333 {
334         unsigned long map_addr;
335         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337         addr = ELF_PAGESTART(addr);
338         size = ELF_PAGEALIGN(size);
339
340         /* mmap() will return -EINVAL if given a zero size, but a
341          * segment with zero filesize is perfectly valid */
342         if (!size)
343                 return addr;
344
345         /*
346         * total_size is the size of the ELF (interpreter) image.
347         * The _first_ mmap needs to know the full size, otherwise
348         * randomization might put this image into an overlapping
349         * position with the ELF binary image. (since size < total_size)
350         * So we first map the 'big' image - and unmap the remainder at
351         * the end. (which unmap is needed for ELF images with holes.)
352         */
353         if (total_size) {
354                 total_size = ELF_PAGEALIGN(total_size);
355                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356                 if (!BAD_ADDR(map_addr))
357                         vm_munmap(map_addr+size, total_size-size);
358         } else
359                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
360
361         return(map_addr);
362 }
363
364 #endif /* !elf_map */
365
366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368         int i, first_idx = -1, last_idx = -1;
369
370         for (i = 0; i < nr; i++) {
371                 if (cmds[i].p_type == PT_LOAD) {
372                         last_idx = i;
373                         if (first_idx == -1)
374                                 first_idx = i;
375                 }
376         }
377         if (first_idx == -1)
378                 return 0;
379
380         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383
384
385 /* This is much more generalized than the library routine read function,
386    so we keep this separate.  Technically the library read function
387    is only provided so that we can read a.out libraries that have
388    an ELF header */
389
390 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
391                 struct file *interpreter, unsigned long *interp_map_addr,
392                 unsigned long no_base)
393 {
394         struct elf_phdr *elf_phdata;
395         struct elf_phdr *eppnt;
396         unsigned long load_addr = 0;
397         int load_addr_set = 0;
398         unsigned long last_bss = 0, elf_bss = 0;
399         unsigned long error = ~0UL;
400         unsigned long total_size;
401         int retval, i, size;
402
403         /* First of all, some simple consistency checks */
404         if (interp_elf_ex->e_type != ET_EXEC &&
405             interp_elf_ex->e_type != ET_DYN)
406                 goto out;
407         if (!elf_check_arch(interp_elf_ex))
408                 goto out;
409         if (!interpreter->f_op || !interpreter->f_op->mmap)
410                 goto out;
411
412         /*
413          * If the size of this structure has changed, then punt, since
414          * we will be doing the wrong thing.
415          */
416         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
417                 goto out;
418         if (interp_elf_ex->e_phnum < 1 ||
419                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
420                 goto out;
421
422         /* Now read in all of the header information */
423         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
424         if (size > ELF_MIN_ALIGN)
425                 goto out;
426         elf_phdata = kmalloc(size, GFP_KERNEL);
427         if (!elf_phdata)
428                 goto out;
429
430         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
431                              (char *)elf_phdata, size);
432         error = -EIO;
433         if (retval != size) {
434                 if (retval < 0)
435                         error = retval; 
436                 goto out_close;
437         }
438
439         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
440         if (!total_size) {
441                 error = -EINVAL;
442                 goto out_close;
443         }
444
445         eppnt = elf_phdata;
446         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
447                 if (eppnt->p_type == PT_LOAD) {
448                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
449                         int elf_prot = 0;
450                         unsigned long vaddr = 0;
451                         unsigned long k, map_addr;
452
453                         if (eppnt->p_flags & PF_R)
454                                 elf_prot = PROT_READ;
455                         if (eppnt->p_flags & PF_W)
456                                 elf_prot |= PROT_WRITE;
457                         if (eppnt->p_flags & PF_X)
458                                 elf_prot |= PROT_EXEC;
459                         vaddr = eppnt->p_vaddr;
460                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
461                                 elf_type |= MAP_FIXED;
462                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
463                                 load_addr = -vaddr;
464
465                         map_addr = elf_map(interpreter, load_addr + vaddr,
466                                         eppnt, elf_prot, elf_type, total_size);
467                         total_size = 0;
468                         if (!*interp_map_addr)
469                                 *interp_map_addr = map_addr;
470                         error = map_addr;
471                         if (BAD_ADDR(map_addr))
472                                 goto out_close;
473
474                         if (!load_addr_set &&
475                             interp_elf_ex->e_type == ET_DYN) {
476                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
477                                 load_addr_set = 1;
478                         }
479
480                         /*
481                          * Check to see if the section's size will overflow the
482                          * allowed task size. Note that p_filesz must always be
483                          * <= p_memsize so it's only necessary to check p_memsz.
484                          */
485                         k = load_addr + eppnt->p_vaddr;
486                         if (BAD_ADDR(k) ||
487                             eppnt->p_filesz > eppnt->p_memsz ||
488                             eppnt->p_memsz > TASK_SIZE ||
489                             TASK_SIZE - eppnt->p_memsz < k) {
490                                 error = -ENOMEM;
491                                 goto out_close;
492                         }
493
494                         /*
495                          * Find the end of the file mapping for this phdr, and
496                          * keep track of the largest address we see for this.
497                          */
498                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
499                         if (k > elf_bss)
500                                 elf_bss = k;
501
502                         /*
503                          * Do the same thing for the memory mapping - between
504                          * elf_bss and last_bss is the bss section.
505                          */
506                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
507                         if (k > last_bss)
508                                 last_bss = k;
509                 }
510         }
511
512         if (last_bss > elf_bss) {
513                 /*
514                  * Now fill out the bss section.  First pad the last page up
515                  * to the page boundary, and then perform a mmap to make sure
516                  * that there are zero-mapped pages up to and including the
517                  * last bss page.
518                  */
519                 if (padzero(elf_bss)) {
520                         error = -EFAULT;
521                         goto out_close;
522                 }
523
524                 /* What we have mapped so far */
525                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
526
527                 /* Map the last of the bss segment */
528                 error = vm_brk(elf_bss, last_bss - elf_bss);
529                 if (BAD_ADDR(error))
530                         goto out_close;
531         }
532
533         error = load_addr;
534
535 out_close:
536         kfree(elf_phdata);
537 out:
538         return error;
539 }
540
541 /*
542  * These are the functions used to load ELF style executables and shared
543  * libraries.  There is no binary dependent code anywhere else.
544  */
545
546 #define INTERPRETER_NONE 0
547 #define INTERPRETER_ELF 2
548
549 #ifndef STACK_RND_MASK
550 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
551 #endif
552
553 static unsigned long randomize_stack_top(unsigned long stack_top)
554 {
555         unsigned long random_variable = 0;
556
557         if ((current->flags & PF_RANDOMIZE) &&
558                 !(current->personality & ADDR_NO_RANDOMIZE)) {
559                 random_variable = (unsigned long) get_random_int();
560                 random_variable &= STACK_RND_MASK;
561                 random_variable <<= PAGE_SHIFT;
562         }
563 #ifdef CONFIG_STACK_GROWSUP
564         return PAGE_ALIGN(stack_top) + random_variable;
565 #else
566         return PAGE_ALIGN(stack_top) - random_variable;
567 #endif
568 }
569
570 static int load_elf_binary(struct linux_binprm *bprm)
571 {
572         struct file *interpreter = NULL; /* to shut gcc up */
573         unsigned long load_addr = 0, load_bias = 0;
574         int load_addr_set = 0;
575         char * elf_interpreter = NULL;
576         unsigned long error;
577         struct elf_phdr *elf_ppnt, *elf_phdata;
578         unsigned long elf_bss, elf_brk;
579         int retval, i;
580         unsigned int size;
581         unsigned long elf_entry;
582         unsigned long interp_load_addr = 0;
583         unsigned long start_code, end_code, start_data, end_data;
584         unsigned long reloc_func_desc __maybe_unused = 0;
585         int executable_stack = EXSTACK_DEFAULT;
586         unsigned long def_flags = 0;
587         struct pt_regs *regs = current_pt_regs();
588         struct {
589                 struct elfhdr elf_ex;
590                 struct elfhdr interp_elf_ex;
591         } *loc;
592
593         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594         if (!loc) {
595                 retval = -ENOMEM;
596                 goto out_ret;
597         }
598         
599         /* Get the exec-header */
600         loc->elf_ex = *((struct elfhdr *)bprm->buf);
601
602         retval = -ENOEXEC;
603         /* First of all, some simple consistency checks */
604         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605                 goto out;
606
607         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608                 goto out;
609         if (!elf_check_arch(&loc->elf_ex))
610                 goto out;
611         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
612                 goto out;
613
614         /* Now read in all of the header information */
615         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616                 goto out;
617         if (loc->elf_ex.e_phnum < 1 ||
618                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619                 goto out;
620         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621         retval = -ENOMEM;
622         elf_phdata = kmalloc(size, GFP_KERNEL);
623         if (!elf_phdata)
624                 goto out;
625
626         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627                              (char *)elf_phdata, size);
628         if (retval != size) {
629                 if (retval >= 0)
630                         retval = -EIO;
631                 goto out_free_ph;
632         }
633
634         elf_ppnt = elf_phdata;
635         elf_bss = 0;
636         elf_brk = 0;
637
638         start_code = ~0UL;
639         end_code = 0;
640         start_data = 0;
641         end_data = 0;
642
643         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
644                 if (elf_ppnt->p_type == PT_INTERP) {
645                         /* This is the program interpreter used for
646                          * shared libraries - for now assume that this
647                          * is an a.out format binary
648                          */
649                         retval = -ENOEXEC;
650                         if (elf_ppnt->p_filesz > PATH_MAX || 
651                             elf_ppnt->p_filesz < 2)
652                                 goto out_free_ph;
653
654                         retval = -ENOMEM;
655                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
656                                                   GFP_KERNEL);
657                         if (!elf_interpreter)
658                                 goto out_free_ph;
659
660                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
661                                              elf_interpreter,
662                                              elf_ppnt->p_filesz);
663                         if (retval != elf_ppnt->p_filesz) {
664                                 if (retval >= 0)
665                                         retval = -EIO;
666                                 goto out_free_interp;
667                         }
668                         /* make sure path is NULL terminated */
669                         retval = -ENOEXEC;
670                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
671                                 goto out_free_interp;
672
673                         interpreter = open_exec(elf_interpreter);
674                         retval = PTR_ERR(interpreter);
675                         if (IS_ERR(interpreter))
676                                 goto out_free_interp;
677
678                         /*
679                          * If the binary is not readable then enforce
680                          * mm->dumpable = 0 regardless of the interpreter's
681                          * permissions.
682                          */
683                         would_dump(bprm, interpreter);
684
685                         retval = kernel_read(interpreter, 0, bprm->buf,
686                                              BINPRM_BUF_SIZE);
687                         if (retval != BINPRM_BUF_SIZE) {
688                                 if (retval >= 0)
689                                         retval = -EIO;
690                                 goto out_free_dentry;
691                         }
692
693                         /* Get the exec headers */
694                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
695                         break;
696                 }
697                 elf_ppnt++;
698         }
699
700         elf_ppnt = elf_phdata;
701         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
702                 if (elf_ppnt->p_type == PT_GNU_STACK) {
703                         if (elf_ppnt->p_flags & PF_X)
704                                 executable_stack = EXSTACK_ENABLE_X;
705                         else
706                                 executable_stack = EXSTACK_DISABLE_X;
707                         break;
708                 }
709
710         /* Some simple consistency checks for the interpreter */
711         if (elf_interpreter) {
712                 retval = -ELIBBAD;
713                 /* Not an ELF interpreter */
714                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715                         goto out_free_dentry;
716                 /* Verify the interpreter has a valid arch */
717                 if (!elf_check_arch(&loc->interp_elf_ex))
718                         goto out_free_dentry;
719         }
720
721         /* Flush all traces of the currently running executable */
722         retval = flush_old_exec(bprm);
723         if (retval)
724                 goto out_free_dentry;
725
726         /* OK, This is the point of no return */
727         current->mm->def_flags = def_flags;
728
729         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
730            may depend on the personality.  */
731         SET_PERSONALITY(loc->elf_ex);
732         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
733                 current->personality |= READ_IMPLIES_EXEC;
734
735         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
736                 current->flags |= PF_RANDOMIZE;
737
738         setup_new_exec(bprm);
739
740         /* Do this so that we can load the interpreter, if need be.  We will
741            change some of these later */
742         current->mm->free_area_cache = current->mm->mmap_base;
743         current->mm->cached_hole_size = 0;
744         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
745                                  executable_stack);
746         if (retval < 0) {
747                 send_sig(SIGKILL, current, 0);
748                 goto out_free_dentry;
749         }
750         
751         current->mm->start_stack = bprm->p;
752
753         /* Now we do a little grungy work by mmapping the ELF image into
754            the correct location in memory. */
755         for(i = 0, elf_ppnt = elf_phdata;
756             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
757                 int elf_prot = 0, elf_flags;
758                 unsigned long k, vaddr;
759                 unsigned long total_size = 0;
760
761                 if (elf_ppnt->p_type != PT_LOAD)
762                         continue;
763
764                 if (unlikely (elf_brk > elf_bss)) {
765                         unsigned long nbyte;
766                     
767                         /* There was a PT_LOAD segment with p_memsz > p_filesz
768                            before this one. Map anonymous pages, if needed,
769                            and clear the area.  */
770                         retval = set_brk(elf_bss + load_bias,
771                                          elf_brk + load_bias);
772                         if (retval) {
773                                 send_sig(SIGKILL, current, 0);
774                                 goto out_free_dentry;
775                         }
776                         nbyte = ELF_PAGEOFFSET(elf_bss);
777                         if (nbyte) {
778                                 nbyte = ELF_MIN_ALIGN - nbyte;
779                                 if (nbyte > elf_brk - elf_bss)
780                                         nbyte = elf_brk - elf_bss;
781                                 if (clear_user((void __user *)elf_bss +
782                                                         load_bias, nbyte)) {
783                                         /*
784                                          * This bss-zeroing can fail if the ELF
785                                          * file specifies odd protections. So
786                                          * we don't check the return value
787                                          */
788                                 }
789                         }
790                 }
791
792                 if (elf_ppnt->p_flags & PF_R)
793                         elf_prot |= PROT_READ;
794                 if (elf_ppnt->p_flags & PF_W)
795                         elf_prot |= PROT_WRITE;
796                 if (elf_ppnt->p_flags & PF_X)
797                         elf_prot |= PROT_EXEC;
798
799                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
800
801                 vaddr = elf_ppnt->p_vaddr;
802                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
803                         elf_flags |= MAP_FIXED;
804                 } else if (loc->elf_ex.e_type == ET_DYN) {
805                         /* Try and get dynamic programs out of the way of the
806                          * default mmap base, as well as whatever program they
807                          * might try to exec.  This is because the brk will
808                          * follow the loader, and is not movable.  */
809 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
810                         /* Memory randomization might have been switched off
811                          * in runtime via sysctl or explicit setting of
812                          * personality flags.
813                          * If that is the case, retain the original non-zero
814                          * load_bias value in order to establish proper
815                          * non-randomized mappings.
816                          */
817                         if (current->flags & PF_RANDOMIZE)
818                                 load_bias = 0;
819                         else
820                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
821 #else
822                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
823 #endif
824                         total_size = total_mapping_size(elf_phdata,
825                                                         loc->elf_ex.e_phnum);
826                         if (!total_size) {
827                                 retval = -EINVAL;
828                                 goto out_free_dentry;
829                         }
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, total_size);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long interp_map_addr = 0;
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         retval = create_elf_tables(bprm, &loc->elf_ex,
957                           load_addr, interp_load_addr);
958         if (retval < 0) {
959                 send_sig(SIGKILL, current, 0);
960                 goto out;
961         }
962         /* N.B. passed_fileno might not be initialized? */
963         current->mm->end_code = end_code;
964         current->mm->start_code = start_code;
965         current->mm->start_data = start_data;
966         current->mm->end_data = end_data;
967         current->mm->start_stack = bprm->p;
968
969 #ifdef arch_randomize_brk
970         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
971                 current->mm->brk = current->mm->start_brk =
972                         arch_randomize_brk(current->mm);
973 #ifdef CONFIG_COMPAT_BRK
974                 current->brk_randomized = 1;
975 #endif
976         }
977 #endif
978
979         if (current->personality & MMAP_PAGE_ZERO) {
980                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
981                    and some applications "depend" upon this behavior.
982                    Since we do not have the power to recompile these, we
983                    emulate the SVr4 behavior. Sigh. */
984                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
985                                 MAP_FIXED | MAP_PRIVATE, 0);
986         }
987
988 #ifdef ELF_PLAT_INIT
989         /*
990          * The ABI may specify that certain registers be set up in special
991          * ways (on i386 %edx is the address of a DT_FINI function, for
992          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
993          * that the e_entry field is the address of the function descriptor
994          * for the startup routine, rather than the address of the startup
995          * routine itself.  This macro performs whatever initialization to
996          * the regs structure is required as well as any relocations to the
997          * function descriptor entries when executing dynamically links apps.
998          */
999         ELF_PLAT_INIT(regs, reloc_func_desc);
1000 #endif
1001
1002         start_thread(regs, elf_entry, bprm->p);
1003         retval = 0;
1004 out:
1005         kfree(loc);
1006 out_ret:
1007         return retval;
1008
1009         /* error cleanup */
1010 out_free_dentry:
1011         allow_write_access(interpreter);
1012         if (interpreter)
1013                 fput(interpreter);
1014 out_free_interp:
1015         kfree(elf_interpreter);
1016 out_free_ph:
1017         kfree(elf_phdata);
1018         goto out;
1019 }
1020
1021 /* This is really simpleminded and specialized - we are loading an
1022    a.out library that is given an ELF header. */
1023 static int load_elf_library(struct file *file)
1024 {
1025         struct elf_phdr *elf_phdata;
1026         struct elf_phdr *eppnt;
1027         unsigned long elf_bss, bss, len;
1028         int retval, error, i, j;
1029         struct elfhdr elf_ex;
1030
1031         error = -ENOEXEC;
1032         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1033         if (retval != sizeof(elf_ex))
1034                 goto out;
1035
1036         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1037                 goto out;
1038
1039         /* First of all, some simple consistency checks */
1040         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1041             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1042                 goto out;
1043
1044         /* Now read in all of the header information */
1045
1046         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1047         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1048
1049         error = -ENOMEM;
1050         elf_phdata = kmalloc(j, GFP_KERNEL);
1051         if (!elf_phdata)
1052                 goto out;
1053
1054         eppnt = elf_phdata;
1055         error = -ENOEXEC;
1056         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1057         if (retval != j)
1058                 goto out_free_ph;
1059
1060         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1061                 if ((eppnt + i)->p_type == PT_LOAD)
1062                         j++;
1063         if (j != 1)
1064                 goto out_free_ph;
1065
1066         while (eppnt->p_type != PT_LOAD)
1067                 eppnt++;
1068
1069         /* Now use mmap to map the library into memory. */
1070         error = vm_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1079                 goto out_free_ph;
1080
1081         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1082         if (padzero(elf_bss)) {
1083                 error = -EFAULT;
1084                 goto out_free_ph;
1085         }
1086
1087         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1088                             ELF_MIN_ALIGN - 1);
1089         bss = eppnt->p_memsz + eppnt->p_vaddr;
1090         if (bss > len)
1091                 vm_brk(len, bss - len);
1092         error = 0;
1093
1094 out_free_ph:
1095         kfree(elf_phdata);
1096 out:
1097         return error;
1098 }
1099
1100 #ifdef CONFIG_ELF_CORE
1101 /*
1102  * ELF core dumper
1103  *
1104  * Modelled on fs/exec.c:aout_core_dump()
1105  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1106  */
1107
1108 /*
1109  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1110  * that are useful for post-mortem analysis are included in every core dump.
1111  * In that way we ensure that the core dump is fully interpretable later
1112  * without matching up the same kernel and hardware config to see what PC values
1113  * meant. These special mappings include - vDSO, vsyscall, and other
1114  * architecture specific mappings
1115  */
1116 static bool always_dump_vma(struct vm_area_struct *vma)
1117 {
1118         /* Any vsyscall mappings? */
1119         if (vma == get_gate_vma(vma->vm_mm))
1120                 return true;
1121         /*
1122          * arch_vma_name() returns non-NULL for special architecture mappings,
1123          * such as vDSO sections.
1124          */
1125         if (arch_vma_name(vma))
1126                 return true;
1127
1128         return false;
1129 }
1130
1131 /*
1132  * Decide what to dump of a segment, part, all or none.
1133  */
1134 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1135                                    unsigned long mm_flags)
1136 {
1137 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1138
1139         /* always dump the vdso and vsyscall sections */
1140         if (always_dump_vma(vma))
1141                 goto whole;
1142
1143         if (vma->vm_flags & VM_DONTDUMP)
1144                 return 0;
1145
1146         /* Hugetlb memory check */
1147         if (vma->vm_flags & VM_HUGETLB) {
1148                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1149                         goto whole;
1150                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1151                         goto whole;
1152                 return 0;
1153         }
1154
1155         /* Do not dump I/O mapped devices or special mappings */
1156         if (vma->vm_flags & VM_IO)
1157                 return 0;
1158
1159         /* By default, dump shared memory if mapped from an anonymous file. */
1160         if (vma->vm_flags & VM_SHARED) {
1161                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1162                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1163                         goto whole;
1164                 return 0;
1165         }
1166
1167         /* Dump segments that have been written to.  */
1168         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1169                 goto whole;
1170         if (vma->vm_file == NULL)
1171                 return 0;
1172
1173         if (FILTER(MAPPED_PRIVATE))
1174                 goto whole;
1175
1176         /*
1177          * If this looks like the beginning of a DSO or executable mapping,
1178          * check for an ELF header.  If we find one, dump the first page to
1179          * aid in determining what was mapped here.
1180          */
1181         if (FILTER(ELF_HEADERS) &&
1182             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1183                 u32 __user *header = (u32 __user *) vma->vm_start;
1184                 u32 word;
1185                 mm_segment_t fs = get_fs();
1186                 /*
1187                  * Doing it this way gets the constant folded by GCC.
1188                  */
1189                 union {
1190                         u32 cmp;
1191                         char elfmag[SELFMAG];
1192                 } magic;
1193                 BUILD_BUG_ON(SELFMAG != sizeof word);
1194                 magic.elfmag[EI_MAG0] = ELFMAG0;
1195                 magic.elfmag[EI_MAG1] = ELFMAG1;
1196                 magic.elfmag[EI_MAG2] = ELFMAG2;
1197                 magic.elfmag[EI_MAG3] = ELFMAG3;
1198                 /*
1199                  * Switch to the user "segment" for get_user(),
1200                  * then put back what elf_core_dump() had in place.
1201                  */
1202                 set_fs(USER_DS);
1203                 if (unlikely(get_user(word, header)))
1204                         word = 0;
1205                 set_fs(fs);
1206                 if (word == magic.cmp)
1207                         return PAGE_SIZE;
1208         }
1209
1210 #undef  FILTER
1211
1212         return 0;
1213
1214 whole:
1215         return vma->vm_end - vma->vm_start;
1216 }
1217
1218 /* An ELF note in memory */
1219 struct memelfnote
1220 {
1221         const char *name;
1222         int type;
1223         unsigned int datasz;
1224         void *data;
1225 };
1226
1227 static int notesize(struct memelfnote *en)
1228 {
1229         int sz;
1230
1231         sz = sizeof(struct elf_note);
1232         sz += roundup(strlen(en->name) + 1, 4);
1233         sz += roundup(en->datasz, 4);
1234
1235         return sz;
1236 }
1237
1238 #define DUMP_WRITE(addr, nr, foffset)   \
1239         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1240
1241 static int alignfile(struct file *file, loff_t *foffset)
1242 {
1243         static const char buf[4] = { 0, };
1244         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1245         return 1;
1246 }
1247
1248 static int writenote(struct memelfnote *men, struct file *file,
1249                         loff_t *foffset)
1250 {
1251         struct elf_note en;
1252         en.n_namesz = strlen(men->name) + 1;
1253         en.n_descsz = men->datasz;
1254         en.n_type = men->type;
1255
1256         DUMP_WRITE(&en, sizeof(en), foffset);
1257         DUMP_WRITE(men->name, en.n_namesz, foffset);
1258         if (!alignfile(file, foffset))
1259                 return 0;
1260         DUMP_WRITE(men->data, men->datasz, foffset);
1261         if (!alignfile(file, foffset))
1262                 return 0;
1263
1264         return 1;
1265 }
1266 #undef DUMP_WRITE
1267
1268 static void fill_elf_header(struct elfhdr *elf, int segs,
1269                             u16 machine, u32 flags)
1270 {
1271         memset(elf, 0, sizeof(*elf));
1272
1273         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1274         elf->e_ident[EI_CLASS] = ELF_CLASS;
1275         elf->e_ident[EI_DATA] = ELF_DATA;
1276         elf->e_ident[EI_VERSION] = EV_CURRENT;
1277         elf->e_ident[EI_OSABI] = ELF_OSABI;
1278
1279         elf->e_type = ET_CORE;
1280         elf->e_machine = machine;
1281         elf->e_version = EV_CURRENT;
1282         elf->e_phoff = sizeof(struct elfhdr);
1283         elf->e_flags = flags;
1284         elf->e_ehsize = sizeof(struct elfhdr);
1285         elf->e_phentsize = sizeof(struct elf_phdr);
1286         elf->e_phnum = segs;
1287
1288         return;
1289 }
1290
1291 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1292 {
1293         phdr->p_type = PT_NOTE;
1294         phdr->p_offset = offset;
1295         phdr->p_vaddr = 0;
1296         phdr->p_paddr = 0;
1297         phdr->p_filesz = sz;
1298         phdr->p_memsz = 0;
1299         phdr->p_flags = 0;
1300         phdr->p_align = 0;
1301         return;
1302 }
1303
1304 static void fill_note(struct memelfnote *note, const char *name, int type, 
1305                 unsigned int sz, void *data)
1306 {
1307         note->name = name;
1308         note->type = type;
1309         note->datasz = sz;
1310         note->data = data;
1311         return;
1312 }
1313
1314 /*
1315  * fill up all the fields in prstatus from the given task struct, except
1316  * registers which need to be filled up separately.
1317  */
1318 static void fill_prstatus(struct elf_prstatus *prstatus,
1319                 struct task_struct *p, long signr)
1320 {
1321         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1322         prstatus->pr_sigpend = p->pending.signal.sig[0];
1323         prstatus->pr_sighold = p->blocked.sig[0];
1324         rcu_read_lock();
1325         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1326         rcu_read_unlock();
1327         prstatus->pr_pid = task_pid_vnr(p);
1328         prstatus->pr_pgrp = task_pgrp_vnr(p);
1329         prstatus->pr_sid = task_session_vnr(p);
1330         if (thread_group_leader(p)) {
1331                 struct task_cputime cputime;
1332
1333                 /*
1334                  * This is the record for the group leader.  It shows the
1335                  * group-wide total, not its individual thread total.
1336                  */
1337                 thread_group_cputime(p, &cputime);
1338                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1339                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1340         } else {
1341                 cputime_t utime, stime;
1342
1343                 task_cputime(p, &utime, &stime);
1344                 cputime_to_timeval(utime, &prstatus->pr_utime);
1345                 cputime_to_timeval(stime, &prstatus->pr_stime);
1346         }
1347         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1348         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1349 }
1350
1351 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1352                        struct mm_struct *mm)
1353 {
1354         const struct cred *cred;
1355         unsigned int i, len;
1356         
1357         /* first copy the parameters from user space */
1358         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1359
1360         len = mm->arg_end - mm->arg_start;
1361         if (len >= ELF_PRARGSZ)
1362                 len = ELF_PRARGSZ-1;
1363         if (copy_from_user(&psinfo->pr_psargs,
1364                            (const char __user *)mm->arg_start, len))
1365                 return -EFAULT;
1366         for(i = 0; i < len; i++)
1367                 if (psinfo->pr_psargs[i] == 0)
1368                         psinfo->pr_psargs[i] = ' ';
1369         psinfo->pr_psargs[len] = 0;
1370
1371         rcu_read_lock();
1372         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1373         rcu_read_unlock();
1374         psinfo->pr_pid = task_pid_vnr(p);
1375         psinfo->pr_pgrp = task_pgrp_vnr(p);
1376         psinfo->pr_sid = task_session_vnr(p);
1377
1378         i = p->state ? ffz(~p->state) + 1 : 0;
1379         psinfo->pr_state = i;
1380         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1381         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1382         psinfo->pr_nice = task_nice(p);
1383         psinfo->pr_flag = p->flags;
1384         rcu_read_lock();
1385         cred = __task_cred(p);
1386         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1387         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1388         rcu_read_unlock();
1389         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1390         
1391         return 0;
1392 }
1393
1394 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1395 {
1396         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1397         int i = 0;
1398         do
1399                 i += 2;
1400         while (auxv[i - 2] != AT_NULL);
1401         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1402 }
1403
1404 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1405                 siginfo_t *siginfo)
1406 {
1407         mm_segment_t old_fs = get_fs();
1408         set_fs(KERNEL_DS);
1409         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1410         set_fs(old_fs);
1411         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1412 }
1413
1414 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1415 /*
1416  * Format of NT_FILE note:
1417  *
1418  * long count     -- how many files are mapped
1419  * long page_size -- units for file_ofs
1420  * array of [COUNT] elements of
1421  *   long start
1422  *   long end
1423  *   long file_ofs
1424  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1425  */
1426 static int fill_files_note(struct memelfnote *note)
1427 {
1428         struct vm_area_struct *vma;
1429         unsigned count, size, names_ofs, remaining, n;
1430         user_long_t *data;
1431         user_long_t *start_end_ofs;
1432         char *name_base, *name_curpos;
1433
1434         /* *Estimated* file count and total data size needed */
1435         count = current->mm->map_count;
1436         size = count * 64;
1437
1438         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1439  alloc:
1440         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1441                 return -EINVAL;
1442         size = round_up(size, PAGE_SIZE);
1443         data = vmalloc(size);
1444         if (!data)
1445                 return -ENOMEM;
1446
1447         start_end_ofs = data + 2;
1448         name_base = name_curpos = ((char *)data) + names_ofs;
1449         remaining = size - names_ofs;
1450         count = 0;
1451         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1452                 struct file *file;
1453                 const char *filename;
1454
1455                 file = vma->vm_file;
1456                 if (!file)
1457                         continue;
1458                 filename = d_path(&file->f_path, name_curpos, remaining);
1459                 if (IS_ERR(filename)) {
1460                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1461                                 vfree(data);
1462                                 size = size * 5 / 4;
1463                                 goto alloc;
1464                         }
1465                         continue;
1466                 }
1467
1468                 /* d_path() fills at the end, move name down */
1469                 /* n = strlen(filename) + 1: */
1470                 n = (name_curpos + remaining) - filename;
1471                 remaining = filename - name_curpos;
1472                 memmove(name_curpos, filename, n);
1473                 name_curpos += n;
1474
1475                 *start_end_ofs++ = vma->vm_start;
1476                 *start_end_ofs++ = vma->vm_end;
1477                 *start_end_ofs++ = vma->vm_pgoff;
1478                 count++;
1479         }
1480
1481         /* Now we know exact count of files, can store it */
1482         data[0] = count;
1483         data[1] = PAGE_SIZE;
1484         /*
1485          * Count usually is less than current->mm->map_count,
1486          * we need to move filenames down.
1487          */
1488         n = current->mm->map_count - count;
1489         if (n != 0) {
1490                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1491                 memmove(name_base - shift_bytes, name_base,
1492                         name_curpos - name_base);
1493                 name_curpos -= shift_bytes;
1494         }
1495
1496         size = name_curpos - (char *)data;
1497         fill_note(note, "CORE", NT_FILE, size, data);
1498         return 0;
1499 }
1500
1501 #ifdef CORE_DUMP_USE_REGSET
1502 #include <linux/regset.h>
1503
1504 struct elf_thread_core_info {
1505         struct elf_thread_core_info *next;
1506         struct task_struct *task;
1507         struct elf_prstatus prstatus;
1508         struct memelfnote notes[0];
1509 };
1510
1511 struct elf_note_info {
1512         struct elf_thread_core_info *thread;
1513         struct memelfnote psinfo;
1514         struct memelfnote signote;
1515         struct memelfnote auxv;
1516         struct memelfnote files;
1517         user_siginfo_t csigdata;
1518         size_t size;
1519         int thread_notes;
1520 };
1521
1522 /*
1523  * When a regset has a writeback hook, we call it on each thread before
1524  * dumping user memory.  On register window machines, this makes sure the
1525  * user memory backing the register data is up to date before we read it.
1526  */
1527 static void do_thread_regset_writeback(struct task_struct *task,
1528                                        const struct user_regset *regset)
1529 {
1530         if (regset->writeback)
1531                 regset->writeback(task, regset, 1);
1532 }
1533
1534 #ifndef PR_REG_SIZE
1535 #define PR_REG_SIZE(S) sizeof(S)
1536 #endif
1537
1538 #ifndef PRSTATUS_SIZE
1539 #define PRSTATUS_SIZE(S) sizeof(S)
1540 #endif
1541
1542 #ifndef PR_REG_PTR
1543 #define PR_REG_PTR(S) (&((S)->pr_reg))
1544 #endif
1545
1546 #ifndef SET_PR_FPVALID
1547 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1548 #endif
1549
1550 static int fill_thread_core_info(struct elf_thread_core_info *t,
1551                                  const struct user_regset_view *view,
1552                                  long signr, size_t *total)
1553 {
1554         unsigned int i;
1555
1556         /*
1557          * NT_PRSTATUS is the one special case, because the regset data
1558          * goes into the pr_reg field inside the note contents, rather
1559          * than being the whole note contents.  We fill the reset in here.
1560          * We assume that regset 0 is NT_PRSTATUS.
1561          */
1562         fill_prstatus(&t->prstatus, t->task, signr);
1563         (void) view->regsets[0].get(t->task, &view->regsets[0],
1564                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1565                                     PR_REG_PTR(&t->prstatus), NULL);
1566
1567         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1568                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1569         *total += notesize(&t->notes[0]);
1570
1571         do_thread_regset_writeback(t->task, &view->regsets[0]);
1572
1573         /*
1574          * Each other regset might generate a note too.  For each regset
1575          * that has no core_note_type or is inactive, we leave t->notes[i]
1576          * all zero and we'll know to skip writing it later.
1577          */
1578         for (i = 1; i < view->n; ++i) {
1579                 const struct user_regset *regset = &view->regsets[i];
1580                 do_thread_regset_writeback(t->task, regset);
1581                 if (regset->core_note_type && regset->get &&
1582                     (!regset->active || regset->active(t->task, regset))) {
1583                         int ret;
1584                         size_t size = regset->n * regset->size;
1585                         void *data = kmalloc(size, GFP_KERNEL);
1586                         if (unlikely(!data))
1587                                 return 0;
1588                         ret = regset->get(t->task, regset,
1589                                           0, size, data, NULL);
1590                         if (unlikely(ret))
1591                                 kfree(data);
1592                         else {
1593                                 if (regset->core_note_type != NT_PRFPREG)
1594                                         fill_note(&t->notes[i], "LINUX",
1595                                                   regset->core_note_type,
1596                                                   size, data);
1597                                 else {
1598                                         SET_PR_FPVALID(&t->prstatus, 1);
1599                                         fill_note(&t->notes[i], "CORE",
1600                                                   NT_PRFPREG, size, data);
1601                                 }
1602                                 *total += notesize(&t->notes[i]);
1603                         }
1604                 }
1605         }
1606
1607         return 1;
1608 }
1609
1610 static int fill_note_info(struct elfhdr *elf, int phdrs,
1611                           struct elf_note_info *info,
1612                           siginfo_t *siginfo, struct pt_regs *regs)
1613 {
1614         struct task_struct *dump_task = current;
1615         const struct user_regset_view *view = task_user_regset_view(dump_task);
1616         struct elf_thread_core_info *t;
1617         struct elf_prpsinfo *psinfo;
1618         struct core_thread *ct;
1619         unsigned int i;
1620
1621         info->size = 0;
1622         info->thread = NULL;
1623
1624         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1625         if (psinfo == NULL) {
1626                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1627                 return 0;
1628         }
1629
1630         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1631
1632         /*
1633          * Figure out how many notes we're going to need for each thread.
1634          */
1635         info->thread_notes = 0;
1636         for (i = 0; i < view->n; ++i)
1637                 if (view->regsets[i].core_note_type != 0)
1638                         ++info->thread_notes;
1639
1640         /*
1641          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1642          * since it is our one special case.
1643          */
1644         if (unlikely(info->thread_notes == 0) ||
1645             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1646                 WARN_ON(1);
1647                 return 0;
1648         }
1649
1650         /*
1651          * Initialize the ELF file header.
1652          */
1653         fill_elf_header(elf, phdrs,
1654                         view->e_machine, view->e_flags);
1655
1656         /*
1657          * Allocate a structure for each thread.
1658          */
1659         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1660                 t = kzalloc(offsetof(struct elf_thread_core_info,
1661                                      notes[info->thread_notes]),
1662                             GFP_KERNEL);
1663                 if (unlikely(!t))
1664                         return 0;
1665
1666                 t->task = ct->task;
1667                 if (ct->task == dump_task || !info->thread) {
1668                         t->next = info->thread;
1669                         info->thread = t;
1670                 } else {
1671                         /*
1672                          * Make sure to keep the original task at
1673                          * the head of the list.
1674                          */
1675                         t->next = info->thread->next;
1676                         info->thread->next = t;
1677                 }
1678         }
1679
1680         /*
1681          * Now fill in each thread's information.
1682          */
1683         for (t = info->thread; t != NULL; t = t->next)
1684                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1685                         return 0;
1686
1687         /*
1688          * Fill in the two process-wide notes.
1689          */
1690         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1691         info->size += notesize(&info->psinfo);
1692
1693         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1694         info->size += notesize(&info->signote);
1695
1696         fill_auxv_note(&info->auxv, current->mm);
1697         info->size += notesize(&info->auxv);
1698
1699         if (fill_files_note(&info->files) == 0)
1700                 info->size += notesize(&info->files);
1701
1702         return 1;
1703 }
1704
1705 static size_t get_note_info_size(struct elf_note_info *info)
1706 {
1707         return info->size;
1708 }
1709
1710 /*
1711  * Write all the notes for each thread.  When writing the first thread, the
1712  * process-wide notes are interleaved after the first thread-specific note.
1713  */
1714 static int write_note_info(struct elf_note_info *info,
1715                            struct file *file, loff_t *foffset)
1716 {
1717         bool first = 1;
1718         struct elf_thread_core_info *t = info->thread;
1719
1720         do {
1721                 int i;
1722
1723                 if (!writenote(&t->notes[0], file, foffset))
1724                         return 0;
1725
1726                 if (first && !writenote(&info->psinfo, file, foffset))
1727                         return 0;
1728                 if (first && !writenote(&info->signote, file, foffset))
1729                         return 0;
1730                 if (first && !writenote(&info->auxv, file, foffset))
1731                         return 0;
1732                 if (first && info->files.data &&
1733                                 !writenote(&info->files, file, foffset))
1734                         return 0;
1735
1736                 for (i = 1; i < info->thread_notes; ++i)
1737                         if (t->notes[i].data &&
1738                             !writenote(&t->notes[i], file, foffset))
1739                                 return 0;
1740
1741                 first = 0;
1742                 t = t->next;
1743         } while (t);
1744
1745         return 1;
1746 }
1747
1748 static void free_note_info(struct elf_note_info *info)
1749 {
1750         struct elf_thread_core_info *threads = info->thread;
1751         while (threads) {
1752                 unsigned int i;
1753                 struct elf_thread_core_info *t = threads;
1754                 threads = t->next;
1755                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1756                 for (i = 1; i < info->thread_notes; ++i)
1757                         kfree(t->notes[i].data);
1758                 kfree(t);
1759         }
1760         kfree(info->psinfo.data);
1761         vfree(info->files.data);
1762 }
1763
1764 #else
1765
1766 /* Here is the structure in which status of each thread is captured. */
1767 struct elf_thread_status
1768 {
1769         struct list_head list;
1770         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1771         elf_fpregset_t fpu;             /* NT_PRFPREG */
1772         struct task_struct *thread;
1773 #ifdef ELF_CORE_COPY_XFPREGS
1774         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1775 #endif
1776         struct memelfnote notes[3];
1777         int num_notes;
1778 };
1779
1780 /*
1781  * In order to add the specific thread information for the elf file format,
1782  * we need to keep a linked list of every threads pr_status and then create
1783  * a single section for them in the final core file.
1784  */
1785 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1786 {
1787         int sz = 0;
1788         struct task_struct *p = t->thread;
1789         t->num_notes = 0;
1790
1791         fill_prstatus(&t->prstatus, p, signr);
1792         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1793         
1794         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1795                   &(t->prstatus));
1796         t->num_notes++;
1797         sz += notesize(&t->notes[0]);
1798
1799         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1800                                                                 &t->fpu))) {
1801                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1802                           &(t->fpu));
1803                 t->num_notes++;
1804                 sz += notesize(&t->notes[1]);
1805         }
1806
1807 #ifdef ELF_CORE_COPY_XFPREGS
1808         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1809                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1810                           sizeof(t->xfpu), &t->xfpu);
1811                 t->num_notes++;
1812                 sz += notesize(&t->notes[2]);
1813         }
1814 #endif  
1815         return sz;
1816 }
1817
1818 struct elf_note_info {
1819         struct memelfnote *notes;
1820         struct memelfnote *notes_files;
1821         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1822         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1823         struct list_head thread_list;
1824         elf_fpregset_t *fpu;
1825 #ifdef ELF_CORE_COPY_XFPREGS
1826         elf_fpxregset_t *xfpu;
1827 #endif
1828         user_siginfo_t csigdata;
1829         int thread_status_size;
1830         int numnote;
1831 };
1832
1833 static int elf_note_info_init(struct elf_note_info *info)
1834 {
1835         memset(info, 0, sizeof(*info));
1836         INIT_LIST_HEAD(&info->thread_list);
1837
1838         /* Allocate space for ELF notes */
1839         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1840         if (!info->notes)
1841                 return 0;
1842         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1843         if (!info->psinfo)
1844                 return 0;
1845         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1846         if (!info->prstatus)
1847                 return 0;
1848         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1849         if (!info->fpu)
1850                 return 0;
1851 #ifdef ELF_CORE_COPY_XFPREGS
1852         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1853         if (!info->xfpu)
1854                 return 0;
1855 #endif
1856         return 1;
1857 }
1858
1859 static int fill_note_info(struct elfhdr *elf, int phdrs,
1860                           struct elf_note_info *info,
1861                           siginfo_t *siginfo, struct pt_regs *regs)
1862 {
1863         struct list_head *t;
1864
1865         if (!elf_note_info_init(info))
1866                 return 0;
1867
1868         if (siginfo->si_signo) {
1869                 struct core_thread *ct;
1870                 struct elf_thread_status *ets;
1871
1872                 for (ct = current->mm->core_state->dumper.next;
1873                                                 ct; ct = ct->next) {
1874                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1875                         if (!ets)
1876                                 return 0;
1877
1878                         ets->thread = ct->task;
1879                         list_add(&ets->list, &info->thread_list);
1880                 }
1881
1882                 list_for_each(t, &info->thread_list) {
1883                         int sz;
1884
1885                         ets = list_entry(t, struct elf_thread_status, list);
1886                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1887                         info->thread_status_size += sz;
1888                 }
1889         }
1890         /* now collect the dump for the current */
1891         memset(info->prstatus, 0, sizeof(*info->prstatus));
1892         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1893         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1894
1895         /* Set up header */
1896         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1897
1898         /*
1899          * Set up the notes in similar form to SVR4 core dumps made
1900          * with info from their /proc.
1901          */
1902
1903         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1904                   sizeof(*info->prstatus), info->prstatus);
1905         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1906         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1907                   sizeof(*info->psinfo), info->psinfo);
1908
1909         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1910         fill_auxv_note(info->notes + 3, current->mm);
1911         info->numnote = 4;
1912
1913         if (fill_files_note(info->notes + info->numnote) == 0) {
1914                 info->notes_files = info->notes + info->numnote;
1915                 info->numnote++;
1916         }
1917
1918         /* Try to dump the FPU. */
1919         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1920                                                                info->fpu);
1921         if (info->prstatus->pr_fpvalid)
1922                 fill_note(info->notes + info->numnote++,
1923                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1924 #ifdef ELF_CORE_COPY_XFPREGS
1925         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1926                 fill_note(info->notes + info->numnote++,
1927                           "LINUX", ELF_CORE_XFPREG_TYPE,
1928                           sizeof(*info->xfpu), info->xfpu);
1929 #endif
1930
1931         return 1;
1932 }
1933
1934 static size_t get_note_info_size(struct elf_note_info *info)
1935 {
1936         int sz = 0;
1937         int i;
1938
1939         for (i = 0; i < info->numnote; i++)
1940                 sz += notesize(info->notes + i);
1941
1942         sz += info->thread_status_size;
1943
1944         return sz;
1945 }
1946
1947 static int write_note_info(struct elf_note_info *info,
1948                            struct file *file, loff_t *foffset)
1949 {
1950         int i;
1951         struct list_head *t;
1952
1953         for (i = 0; i < info->numnote; i++)
1954                 if (!writenote(info->notes + i, file, foffset))
1955                         return 0;
1956
1957         /* write out the thread status notes section */
1958         list_for_each(t, &info->thread_list) {
1959                 struct elf_thread_status *tmp =
1960                                 list_entry(t, struct elf_thread_status, list);
1961
1962                 for (i = 0; i < tmp->num_notes; i++)
1963                         if (!writenote(&tmp->notes[i], file, foffset))
1964                                 return 0;
1965         }
1966
1967         return 1;
1968 }
1969
1970 static void free_note_info(struct elf_note_info *info)
1971 {
1972         while (!list_empty(&info->thread_list)) {
1973                 struct list_head *tmp = info->thread_list.next;
1974                 list_del(tmp);
1975                 kfree(list_entry(tmp, struct elf_thread_status, list));
1976         }
1977
1978         /* Free data possibly allocated by fill_files_note(): */
1979         if (info->notes_files)
1980                 vfree(info->notes_files->data);
1981
1982         kfree(info->prstatus);
1983         kfree(info->psinfo);
1984         kfree(info->notes);
1985         kfree(info->fpu);
1986 #ifdef ELF_CORE_COPY_XFPREGS
1987         kfree(info->xfpu);
1988 #endif
1989 }
1990
1991 #endif
1992
1993 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1994                                         struct vm_area_struct *gate_vma)
1995 {
1996         struct vm_area_struct *ret = tsk->mm->mmap;
1997
1998         if (ret)
1999                 return ret;
2000         return gate_vma;
2001 }
2002 /*
2003  * Helper function for iterating across a vma list.  It ensures that the caller
2004  * will visit `gate_vma' prior to terminating the search.
2005  */
2006 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2007                                         struct vm_area_struct *gate_vma)
2008 {
2009         struct vm_area_struct *ret;
2010
2011         ret = this_vma->vm_next;
2012         if (ret)
2013                 return ret;
2014         if (this_vma == gate_vma)
2015                 return NULL;
2016         return gate_vma;
2017 }
2018
2019 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2020                              elf_addr_t e_shoff, int segs)
2021 {
2022         elf->e_shoff = e_shoff;
2023         elf->e_shentsize = sizeof(*shdr4extnum);
2024         elf->e_shnum = 1;
2025         elf->e_shstrndx = SHN_UNDEF;
2026
2027         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2028
2029         shdr4extnum->sh_type = SHT_NULL;
2030         shdr4extnum->sh_size = elf->e_shnum;
2031         shdr4extnum->sh_link = elf->e_shstrndx;
2032         shdr4extnum->sh_info = segs;
2033 }
2034
2035 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2036                                      unsigned long mm_flags)
2037 {
2038         struct vm_area_struct *vma;
2039         size_t size = 0;
2040
2041         for (vma = first_vma(current, gate_vma); vma != NULL;
2042              vma = next_vma(vma, gate_vma))
2043                 size += vma_dump_size(vma, mm_flags);
2044         return size;
2045 }
2046
2047 /*
2048  * Actual dumper
2049  *
2050  * This is a two-pass process; first we find the offsets of the bits,
2051  * and then they are actually written out.  If we run out of core limit
2052  * we just truncate.
2053  */
2054 static int elf_core_dump(struct coredump_params *cprm)
2055 {
2056         int has_dumped = 0;
2057         mm_segment_t fs;
2058         int segs;
2059         size_t size = 0;
2060         struct vm_area_struct *vma, *gate_vma;
2061         struct elfhdr *elf = NULL;
2062         loff_t offset = 0, dataoff, foffset;
2063         struct elf_note_info info = { };
2064         struct elf_phdr *phdr4note = NULL;
2065         struct elf_shdr *shdr4extnum = NULL;
2066         Elf_Half e_phnum;
2067         elf_addr_t e_shoff;
2068
2069         /*
2070          * We no longer stop all VM operations.
2071          * 
2072          * This is because those proceses that could possibly change map_count
2073          * or the mmap / vma pages are now blocked in do_exit on current
2074          * finishing this core dump.
2075          *
2076          * Only ptrace can touch these memory addresses, but it doesn't change
2077          * the map_count or the pages allocated. So no possibility of crashing
2078          * exists while dumping the mm->vm_next areas to the core file.
2079          */
2080   
2081         /* alloc memory for large data structures: too large to be on stack */
2082         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2083         if (!elf)
2084                 goto out;
2085         /*
2086          * The number of segs are recored into ELF header as 16bit value.
2087          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2088          */
2089         segs = current->mm->map_count;
2090         segs += elf_core_extra_phdrs();
2091
2092         gate_vma = get_gate_vma(current->mm);
2093         if (gate_vma != NULL)
2094                 segs++;
2095
2096         /* for notes section */
2097         segs++;
2098
2099         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2100          * this, kernel supports extended numbering. Have a look at
2101          * include/linux/elf.h for further information. */
2102         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2103
2104         /*
2105          * Collect all the non-memory information about the process for the
2106          * notes.  This also sets up the file header.
2107          */
2108         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2109                 goto cleanup;
2110
2111         has_dumped = 1;
2112
2113         fs = get_fs();
2114         set_fs(KERNEL_DS);
2115
2116         offset += sizeof(*elf);                         /* Elf header */
2117         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2118         foffset = offset;
2119
2120         /* Write notes phdr entry */
2121         {
2122                 size_t sz = get_note_info_size(&info);
2123
2124                 sz += elf_coredump_extra_notes_size();
2125
2126                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2127                 if (!phdr4note)
2128                         goto end_coredump;
2129
2130                 fill_elf_note_phdr(phdr4note, sz, offset);
2131                 offset += sz;
2132         }
2133
2134         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2135
2136         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2137         offset += elf_core_extra_data_size();
2138         e_shoff = offset;
2139
2140         if (e_phnum == PN_XNUM) {
2141                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2142                 if (!shdr4extnum)
2143                         goto end_coredump;
2144                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2145         }
2146
2147         offset = dataoff;
2148
2149         size += sizeof(*elf);
2150         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2151                 goto end_coredump;
2152
2153         size += sizeof(*phdr4note);
2154         if (size > cprm->limit
2155             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2156                 goto end_coredump;
2157
2158         /* Write program headers for segments dump */
2159         for (vma = first_vma(current, gate_vma); vma != NULL;
2160                         vma = next_vma(vma, gate_vma)) {
2161                 struct elf_phdr phdr;
2162
2163                 phdr.p_type = PT_LOAD;
2164                 phdr.p_offset = offset;
2165                 phdr.p_vaddr = vma->vm_start;
2166                 phdr.p_paddr = 0;
2167                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2168                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2169                 offset += phdr.p_filesz;
2170                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2171                 if (vma->vm_flags & VM_WRITE)
2172                         phdr.p_flags |= PF_W;
2173                 if (vma->vm_flags & VM_EXEC)
2174                         phdr.p_flags |= PF_X;
2175                 phdr.p_align = ELF_EXEC_PAGESIZE;
2176
2177                 size += sizeof(phdr);
2178                 if (size > cprm->limit
2179                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2180                         goto end_coredump;
2181         }
2182
2183         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2184                 goto end_coredump;
2185
2186         /* write out the notes section */
2187         if (!write_note_info(&info, cprm->file, &foffset))
2188                 goto end_coredump;
2189
2190         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2191                 goto end_coredump;
2192
2193         /* Align to page */
2194         if (!dump_seek(cprm->file, dataoff - foffset))
2195                 goto end_coredump;
2196
2197         for (vma = first_vma(current, gate_vma); vma != NULL;
2198                         vma = next_vma(vma, gate_vma)) {
2199                 unsigned long addr;
2200                 unsigned long end;
2201
2202                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2203
2204                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2205                         struct page *page;
2206                         int stop;
2207
2208                         page = get_dump_page(addr);
2209                         if (page) {
2210                                 void *kaddr = kmap(page);
2211                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2212                                         !dump_write(cprm->file, kaddr,
2213                                                     PAGE_SIZE);
2214                                 kunmap(page);
2215                                 page_cache_release(page);
2216                         } else
2217                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2218                         if (stop)
2219                                 goto end_coredump;
2220                 }
2221         }
2222
2223         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2224                 goto end_coredump;
2225
2226         if (e_phnum == PN_XNUM) {
2227                 size += sizeof(*shdr4extnum);
2228                 if (size > cprm->limit
2229                     || !dump_write(cprm->file, shdr4extnum,
2230                                    sizeof(*shdr4extnum)))
2231                         goto end_coredump;
2232         }
2233
2234 end_coredump:
2235         set_fs(fs);
2236
2237 cleanup:
2238         free_note_info(&info);
2239         kfree(shdr4extnum);
2240         kfree(phdr4note);
2241         kfree(elf);
2242 out:
2243         return has_dumped;
2244 }
2245
2246 #endif          /* CONFIG_ELF_CORE */
2247
2248 static int __init init_elf_binfmt(void)
2249 {
2250         register_binfmt(&elf_format);
2251         return 0;
2252 }
2253
2254 static void __exit exit_elf_binfmt(void)
2255 {
2256         /* Remove the COFF and ELF loaders. */
2257         unregister_binfmt(&elf_format);
2258 }
2259
2260 core_initcall(init_elf_binfmt);
2261 module_exit(exit_elf_binfmt);
2262 MODULE_LICENSE("GPL");