Btrfs: fix inode eviction infinite loop after cloning into it
[firefly-linux-kernel-4.4.55.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247         if (k_platform) {
248                 NEW_AUX_ENT(AT_PLATFORM,
249                             (elf_addr_t)(unsigned long)u_platform);
250         }
251         if (k_base_platform) {
252                 NEW_AUX_ENT(AT_BASE_PLATFORM,
253                             (elf_addr_t)(unsigned long)u_base_platform);
254         }
255         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257         }
258 #undef NEW_AUX_ENT
259         /* AT_NULL is zero; clear the rest too */
260         memset(&elf_info[ei_index], 0,
261                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262
263         /* And advance past the AT_NULL entry.  */
264         ei_index += 2;
265
266         sp = STACK_ADD(p, ei_index);
267
268         items = (argc + 1) + (envc + 1) + 1;
269         bprm->p = STACK_ROUND(sp, items);
270
271         /* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276         sp = (elf_addr_t __user *)bprm->p;
277 #endif
278
279
280         /*
281          * Grow the stack manually; some architectures have a limit on how
282          * far ahead a user-space access may be in order to grow the stack.
283          */
284         vma = find_extend_vma(current->mm, bprm->p);
285         if (!vma)
286                 return -EFAULT;
287
288         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
289         if (__put_user(argc, sp++))
290                 return -EFAULT;
291         argv = sp;
292         envp = argv + argc + 1;
293
294         /* Populate argv and envp */
295         p = current->mm->arg_end = current->mm->arg_start;
296         while (argc-- > 0) {
297                 size_t len;
298                 if (__put_user((elf_addr_t)p, argv++))
299                         return -EFAULT;
300                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301                 if (!len || len > MAX_ARG_STRLEN)
302                         return -EINVAL;
303                 p += len;
304         }
305         if (__put_user(0, argv))
306                 return -EFAULT;
307         current->mm->arg_end = current->mm->env_start = p;
308         while (envc-- > 0) {
309                 size_t len;
310                 if (__put_user((elf_addr_t)p, envp++))
311                         return -EFAULT;
312                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313                 if (!len || len > MAX_ARG_STRLEN)
314                         return -EINVAL;
315                 p += len;
316         }
317         if (__put_user(0, envp))
318                 return -EFAULT;
319         current->mm->env_end = p;
320
321         /* Put the elf_info on the stack in the right place.  */
322         sp = (elf_addr_t __user *)envp + 1;
323         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324                 return -EFAULT;
325         return 0;
326 }
327
328 #ifndef elf_map
329
330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331                 struct elf_phdr *eppnt, int prot, int type,
332                 unsigned long total_size)
333 {
334         unsigned long map_addr;
335         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337         addr = ELF_PAGESTART(addr);
338         size = ELF_PAGEALIGN(size);
339
340         /* mmap() will return -EINVAL if given a zero size, but a
341          * segment with zero filesize is perfectly valid */
342         if (!size)
343                 return addr;
344
345         /*
346         * total_size is the size of the ELF (interpreter) image.
347         * The _first_ mmap needs to know the full size, otherwise
348         * randomization might put this image into an overlapping
349         * position with the ELF binary image. (since size < total_size)
350         * So we first map the 'big' image - and unmap the remainder at
351         * the end. (which unmap is needed for ELF images with holes.)
352         */
353         if (total_size) {
354                 total_size = ELF_PAGEALIGN(total_size);
355                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356                 if (!BAD_ADDR(map_addr))
357                         vm_munmap(map_addr+size, total_size-size);
358         } else
359                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
360
361         return(map_addr);
362 }
363
364 #endif /* !elf_map */
365
366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368         int i, first_idx = -1, last_idx = -1;
369
370         for (i = 0; i < nr; i++) {
371                 if (cmds[i].p_type == PT_LOAD) {
372                         last_idx = i;
373                         if (first_idx == -1)
374                                 first_idx = i;
375                 }
376         }
377         if (first_idx == -1)
378                 return 0;
379
380         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383
384
385 /* This is much more generalized than the library routine read function,
386    so we keep this separate.  Technically the library read function
387    is only provided so that we can read a.out libraries that have
388    an ELF header */
389
390 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
391                 struct file *interpreter, unsigned long *interp_map_addr,
392                 unsigned long no_base)
393 {
394         struct elf_phdr *elf_phdata;
395         struct elf_phdr *eppnt;
396         unsigned long load_addr = 0;
397         int load_addr_set = 0;
398         unsigned long last_bss = 0, elf_bss = 0;
399         unsigned long error = ~0UL;
400         unsigned long total_size;
401         int retval, i, size;
402
403         /* First of all, some simple consistency checks */
404         if (interp_elf_ex->e_type != ET_EXEC &&
405             interp_elf_ex->e_type != ET_DYN)
406                 goto out;
407         if (!elf_check_arch(interp_elf_ex))
408                 goto out;
409         if (!interpreter->f_op || !interpreter->f_op->mmap)
410                 goto out;
411
412         /*
413          * If the size of this structure has changed, then punt, since
414          * we will be doing the wrong thing.
415          */
416         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
417                 goto out;
418         if (interp_elf_ex->e_phnum < 1 ||
419                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
420                 goto out;
421
422         /* Now read in all of the header information */
423         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
424         if (size > ELF_MIN_ALIGN)
425                 goto out;
426         elf_phdata = kmalloc(size, GFP_KERNEL);
427         if (!elf_phdata)
428                 goto out;
429
430         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
431                              (char *)elf_phdata, size);
432         error = -EIO;
433         if (retval != size) {
434                 if (retval < 0)
435                         error = retval; 
436                 goto out_close;
437         }
438
439         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
440         if (!total_size) {
441                 error = -EINVAL;
442                 goto out_close;
443         }
444
445         eppnt = elf_phdata;
446         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
447                 if (eppnt->p_type == PT_LOAD) {
448                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
449                         int elf_prot = 0;
450                         unsigned long vaddr = 0;
451                         unsigned long k, map_addr;
452
453                         if (eppnt->p_flags & PF_R)
454                                 elf_prot = PROT_READ;
455                         if (eppnt->p_flags & PF_W)
456                                 elf_prot |= PROT_WRITE;
457                         if (eppnt->p_flags & PF_X)
458                                 elf_prot |= PROT_EXEC;
459                         vaddr = eppnt->p_vaddr;
460                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
461                                 elf_type |= MAP_FIXED;
462                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
463                                 load_addr = -vaddr;
464
465                         map_addr = elf_map(interpreter, load_addr + vaddr,
466                                         eppnt, elf_prot, elf_type, total_size);
467                         total_size = 0;
468                         if (!*interp_map_addr)
469                                 *interp_map_addr = map_addr;
470                         error = map_addr;
471                         if (BAD_ADDR(map_addr))
472                                 goto out_close;
473
474                         if (!load_addr_set &&
475                             interp_elf_ex->e_type == ET_DYN) {
476                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
477                                 load_addr_set = 1;
478                         }
479
480                         /*
481                          * Check to see if the section's size will overflow the
482                          * allowed task size. Note that p_filesz must always be
483                          * <= p_memsize so it's only necessary to check p_memsz.
484                          */
485                         k = load_addr + eppnt->p_vaddr;
486                         if (BAD_ADDR(k) ||
487                             eppnt->p_filesz > eppnt->p_memsz ||
488                             eppnt->p_memsz > TASK_SIZE ||
489                             TASK_SIZE - eppnt->p_memsz < k) {
490                                 error = -ENOMEM;
491                                 goto out_close;
492                         }
493
494                         /*
495                          * Find the end of the file mapping for this phdr, and
496                          * keep track of the largest address we see for this.
497                          */
498                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
499                         if (k > elf_bss)
500                                 elf_bss = k;
501
502                         /*
503                          * Do the same thing for the memory mapping - between
504                          * elf_bss and last_bss is the bss section.
505                          */
506                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
507                         if (k > last_bss)
508                                 last_bss = k;
509                 }
510         }
511
512         if (last_bss > elf_bss) {
513                 /*
514                  * Now fill out the bss section.  First pad the last page up
515                  * to the page boundary, and then perform a mmap to make sure
516                  * that there are zero-mapped pages up to and including the
517                  * last bss page.
518                  */
519                 if (padzero(elf_bss)) {
520                         error = -EFAULT;
521                         goto out_close;
522                 }
523
524                 /* What we have mapped so far */
525                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
526
527                 /* Map the last of the bss segment */
528                 error = vm_brk(elf_bss, last_bss - elf_bss);
529                 if (BAD_ADDR(error))
530                         goto out_close;
531         }
532
533         error = load_addr;
534
535 out_close:
536         kfree(elf_phdata);
537 out:
538         return error;
539 }
540
541 /*
542  * These are the functions used to load ELF style executables and shared
543  * libraries.  There is no binary dependent code anywhere else.
544  */
545
546 #define INTERPRETER_NONE 0
547 #define INTERPRETER_ELF 2
548
549 #ifndef STACK_RND_MASK
550 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
551 #endif
552
553 static unsigned long randomize_stack_top(unsigned long stack_top)
554 {
555         unsigned long random_variable = 0;
556
557         if ((current->flags & PF_RANDOMIZE) &&
558                 !(current->personality & ADDR_NO_RANDOMIZE)) {
559                 random_variable = (unsigned long) get_random_int();
560                 random_variable &= STACK_RND_MASK;
561                 random_variable <<= PAGE_SHIFT;
562         }
563 #ifdef CONFIG_STACK_GROWSUP
564         return PAGE_ALIGN(stack_top) + random_variable;
565 #else
566         return PAGE_ALIGN(stack_top) - random_variable;
567 #endif
568 }
569
570 static int load_elf_binary(struct linux_binprm *bprm)
571 {
572         struct file *interpreter = NULL; /* to shut gcc up */
573         unsigned long load_addr = 0, load_bias = 0;
574         int load_addr_set = 0;
575         char * elf_interpreter = NULL;
576         unsigned long error;
577         struct elf_phdr *elf_ppnt, *elf_phdata;
578         unsigned long elf_bss, elf_brk;
579         int retval, i;
580         unsigned int size;
581         unsigned long elf_entry;
582         unsigned long interp_load_addr = 0;
583         unsigned long start_code, end_code, start_data, end_data;
584         unsigned long reloc_func_desc __maybe_unused = 0;
585         int executable_stack = EXSTACK_DEFAULT;
586         unsigned long def_flags = 0;
587         struct pt_regs *regs = current_pt_regs();
588         struct {
589                 struct elfhdr elf_ex;
590                 struct elfhdr interp_elf_ex;
591         } *loc;
592
593         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594         if (!loc) {
595                 retval = -ENOMEM;
596                 goto out_ret;
597         }
598         
599         /* Get the exec-header */
600         loc->elf_ex = *((struct elfhdr *)bprm->buf);
601
602         retval = -ENOEXEC;
603         /* First of all, some simple consistency checks */
604         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605                 goto out;
606
607         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608                 goto out;
609         if (!elf_check_arch(&loc->elf_ex))
610                 goto out;
611         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
612                 goto out;
613
614         /* Now read in all of the header information */
615         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616                 goto out;
617         if (loc->elf_ex.e_phnum < 1 ||
618                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619                 goto out;
620         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621         retval = -ENOMEM;
622         elf_phdata = kmalloc(size, GFP_KERNEL);
623         if (!elf_phdata)
624                 goto out;
625
626         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627                              (char *)elf_phdata, size);
628         if (retval != size) {
629                 if (retval >= 0)
630                         retval = -EIO;
631                 goto out_free_ph;
632         }
633
634         elf_ppnt = elf_phdata;
635         elf_bss = 0;
636         elf_brk = 0;
637
638         start_code = ~0UL;
639         end_code = 0;
640         start_data = 0;
641         end_data = 0;
642
643         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
644                 if (elf_ppnt->p_type == PT_INTERP) {
645                         /* This is the program interpreter used for
646                          * shared libraries - for now assume that this
647                          * is an a.out format binary
648                          */
649                         retval = -ENOEXEC;
650                         if (elf_ppnt->p_filesz > PATH_MAX || 
651                             elf_ppnt->p_filesz < 2)
652                                 goto out_free_ph;
653
654                         retval = -ENOMEM;
655                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
656                                                   GFP_KERNEL);
657                         if (!elf_interpreter)
658                                 goto out_free_ph;
659
660                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
661                                              elf_interpreter,
662                                              elf_ppnt->p_filesz);
663                         if (retval != elf_ppnt->p_filesz) {
664                                 if (retval >= 0)
665                                         retval = -EIO;
666                                 goto out_free_interp;
667                         }
668                         /* make sure path is NULL terminated */
669                         retval = -ENOEXEC;
670                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
671                                 goto out_free_interp;
672
673                         interpreter = open_exec(elf_interpreter);
674                         retval = PTR_ERR(interpreter);
675                         if (IS_ERR(interpreter))
676                                 goto out_free_interp;
677
678                         /*
679                          * If the binary is not readable then enforce
680                          * mm->dumpable = 0 regardless of the interpreter's
681                          * permissions.
682                          */
683                         would_dump(bprm, interpreter);
684
685                         retval = kernel_read(interpreter, 0, bprm->buf,
686                                              BINPRM_BUF_SIZE);
687                         if (retval != BINPRM_BUF_SIZE) {
688                                 if (retval >= 0)
689                                         retval = -EIO;
690                                 goto out_free_dentry;
691                         }
692
693                         /* Get the exec headers */
694                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
695                         break;
696                 }
697                 elf_ppnt++;
698         }
699
700         elf_ppnt = elf_phdata;
701         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
702                 if (elf_ppnt->p_type == PT_GNU_STACK) {
703                         if (elf_ppnt->p_flags & PF_X)
704                                 executable_stack = EXSTACK_ENABLE_X;
705                         else
706                                 executable_stack = EXSTACK_DISABLE_X;
707                         break;
708                 }
709
710         /* Some simple consistency checks for the interpreter */
711         if (elf_interpreter) {
712                 retval = -ELIBBAD;
713                 /* Not an ELF interpreter */
714                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715                         goto out_free_dentry;
716                 /* Verify the interpreter has a valid arch */
717                 if (!elf_check_arch(&loc->interp_elf_ex))
718                         goto out_free_dentry;
719         }
720
721         /* Flush all traces of the currently running executable */
722         retval = flush_old_exec(bprm);
723         if (retval)
724                 goto out_free_dentry;
725
726         /* OK, This is the point of no return */
727         current->mm->def_flags = def_flags;
728
729         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
730            may depend on the personality.  */
731         SET_PERSONALITY(loc->elf_ex);
732         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
733                 current->personality |= READ_IMPLIES_EXEC;
734
735         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
736                 current->flags |= PF_RANDOMIZE;
737
738         setup_new_exec(bprm);
739
740         /* Do this so that we can load the interpreter, if need be.  We will
741            change some of these later */
742         current->mm->free_area_cache = current->mm->mmap_base;
743         current->mm->cached_hole_size = 0;
744         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
745                                  executable_stack);
746         if (retval < 0) {
747                 send_sig(SIGKILL, current, 0);
748                 goto out_free_dentry;
749         }
750         
751         current->mm->start_stack = bprm->p;
752
753         /* Now we do a little grungy work by mmapping the ELF image into
754            the correct location in memory. */
755         for(i = 0, elf_ppnt = elf_phdata;
756             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
757                 int elf_prot = 0, elf_flags;
758                 unsigned long k, vaddr;
759
760                 if (elf_ppnt->p_type != PT_LOAD)
761                         continue;
762
763                 if (unlikely (elf_brk > elf_bss)) {
764                         unsigned long nbyte;
765                     
766                         /* There was a PT_LOAD segment with p_memsz > p_filesz
767                            before this one. Map anonymous pages, if needed,
768                            and clear the area.  */
769                         retval = set_brk(elf_bss + load_bias,
770                                          elf_brk + load_bias);
771                         if (retval) {
772                                 send_sig(SIGKILL, current, 0);
773                                 goto out_free_dentry;
774                         }
775                         nbyte = ELF_PAGEOFFSET(elf_bss);
776                         if (nbyte) {
777                                 nbyte = ELF_MIN_ALIGN - nbyte;
778                                 if (nbyte > elf_brk - elf_bss)
779                                         nbyte = elf_brk - elf_bss;
780                                 if (clear_user((void __user *)elf_bss +
781                                                         load_bias, nbyte)) {
782                                         /*
783                                          * This bss-zeroing can fail if the ELF
784                                          * file specifies odd protections. So
785                                          * we don't check the return value
786                                          */
787                                 }
788                         }
789                 }
790
791                 if (elf_ppnt->p_flags & PF_R)
792                         elf_prot |= PROT_READ;
793                 if (elf_ppnt->p_flags & PF_W)
794                         elf_prot |= PROT_WRITE;
795                 if (elf_ppnt->p_flags & PF_X)
796                         elf_prot |= PROT_EXEC;
797
798                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
799
800                 vaddr = elf_ppnt->p_vaddr;
801                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
802                         elf_flags |= MAP_FIXED;
803                 } else if (loc->elf_ex.e_type == ET_DYN) {
804                         /* Try and get dynamic programs out of the way of the
805                          * default mmap base, as well as whatever program they
806                          * might try to exec.  This is because the brk will
807                          * follow the loader, and is not movable.  */
808 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
809                         /* Memory randomization might have been switched off
810                          * in runtime via sysctl or explicit setting of
811                          * personality flags.
812                          * If that is the case, retain the original non-zero
813                          * load_bias value in order to establish proper
814                          * non-randomized mappings.
815                          */
816                         if (current->flags & PF_RANDOMIZE)
817                                 load_bias = 0;
818                         else
819                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
820 #else
821                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
822 #endif
823                 }
824
825                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
826                                 elf_prot, elf_flags, 0);
827                 if (BAD_ADDR(error)) {
828                         send_sig(SIGKILL, current, 0);
829                         retval = IS_ERR((void *)error) ?
830                                 PTR_ERR((void*)error) : -EINVAL;
831                         goto out_free_dentry;
832                 }
833
834                 if (!load_addr_set) {
835                         load_addr_set = 1;
836                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
837                         if (loc->elf_ex.e_type == ET_DYN) {
838                                 load_bias += error -
839                                              ELF_PAGESTART(load_bias + vaddr);
840                                 load_addr += load_bias;
841                                 reloc_func_desc = load_bias;
842                         }
843                 }
844                 k = elf_ppnt->p_vaddr;
845                 if (k < start_code)
846                         start_code = k;
847                 if (start_data < k)
848                         start_data = k;
849
850                 /*
851                  * Check to see if the section's size will overflow the
852                  * allowed task size. Note that p_filesz must always be
853                  * <= p_memsz so it is only necessary to check p_memsz.
854                  */
855                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
856                     elf_ppnt->p_memsz > TASK_SIZE ||
857                     TASK_SIZE - elf_ppnt->p_memsz < k) {
858                         /* set_brk can never work. Avoid overflows. */
859                         send_sig(SIGKILL, current, 0);
860                         retval = -EINVAL;
861                         goto out_free_dentry;
862                 }
863
864                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
865
866                 if (k > elf_bss)
867                         elf_bss = k;
868                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
869                         end_code = k;
870                 if (end_data < k)
871                         end_data = k;
872                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
873                 if (k > elf_brk)
874                         elf_brk = k;
875         }
876
877         loc->elf_ex.e_entry += load_bias;
878         elf_bss += load_bias;
879         elf_brk += load_bias;
880         start_code += load_bias;
881         end_code += load_bias;
882         start_data += load_bias;
883         end_data += load_bias;
884
885         /* Calling set_brk effectively mmaps the pages that we need
886          * for the bss and break sections.  We must do this before
887          * mapping in the interpreter, to make sure it doesn't wind
888          * up getting placed where the bss needs to go.
889          */
890         retval = set_brk(elf_bss, elf_brk);
891         if (retval) {
892                 send_sig(SIGKILL, current, 0);
893                 goto out_free_dentry;
894         }
895         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
896                 send_sig(SIGSEGV, current, 0);
897                 retval = -EFAULT; /* Nobody gets to see this, but.. */
898                 goto out_free_dentry;
899         }
900
901         if (elf_interpreter) {
902                 unsigned long interp_map_addr = 0;
903
904                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
905                                             interpreter,
906                                             &interp_map_addr,
907                                             load_bias);
908                 if (!IS_ERR((void *)elf_entry)) {
909                         /*
910                          * load_elf_interp() returns relocation
911                          * adjustment
912                          */
913                         interp_load_addr = elf_entry;
914                         elf_entry += loc->interp_elf_ex.e_entry;
915                 }
916                 if (BAD_ADDR(elf_entry)) {
917                         force_sig(SIGSEGV, current);
918                         retval = IS_ERR((void *)elf_entry) ?
919                                         (int)elf_entry : -EINVAL;
920                         goto out_free_dentry;
921                 }
922                 reloc_func_desc = interp_load_addr;
923
924                 allow_write_access(interpreter);
925                 fput(interpreter);
926                 kfree(elf_interpreter);
927         } else {
928                 elf_entry = loc->elf_ex.e_entry;
929                 if (BAD_ADDR(elf_entry)) {
930                         force_sig(SIGSEGV, current);
931                         retval = -EINVAL;
932                         goto out_free_dentry;
933                 }
934         }
935
936         kfree(elf_phdata);
937
938         set_binfmt(&elf_format);
939
940 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
941         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
942         if (retval < 0) {
943                 send_sig(SIGKILL, current, 0);
944                 goto out;
945         }
946 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
947
948         install_exec_creds(bprm);
949         retval = create_elf_tables(bprm, &loc->elf_ex,
950                           load_addr, interp_load_addr);
951         if (retval < 0) {
952                 send_sig(SIGKILL, current, 0);
953                 goto out;
954         }
955         /* N.B. passed_fileno might not be initialized? */
956         current->mm->end_code = end_code;
957         current->mm->start_code = start_code;
958         current->mm->start_data = start_data;
959         current->mm->end_data = end_data;
960         current->mm->start_stack = bprm->p;
961
962 #ifdef arch_randomize_brk
963         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
964                 current->mm->brk = current->mm->start_brk =
965                         arch_randomize_brk(current->mm);
966 #ifdef CONFIG_COMPAT_BRK
967                 current->brk_randomized = 1;
968 #endif
969         }
970 #endif
971
972         if (current->personality & MMAP_PAGE_ZERO) {
973                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
974                    and some applications "depend" upon this behavior.
975                    Since we do not have the power to recompile these, we
976                    emulate the SVr4 behavior. Sigh. */
977                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
978                                 MAP_FIXED | MAP_PRIVATE, 0);
979         }
980
981 #ifdef ELF_PLAT_INIT
982         /*
983          * The ABI may specify that certain registers be set up in special
984          * ways (on i386 %edx is the address of a DT_FINI function, for
985          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
986          * that the e_entry field is the address of the function descriptor
987          * for the startup routine, rather than the address of the startup
988          * routine itself.  This macro performs whatever initialization to
989          * the regs structure is required as well as any relocations to the
990          * function descriptor entries when executing dynamically links apps.
991          */
992         ELF_PLAT_INIT(regs, reloc_func_desc);
993 #endif
994
995         start_thread(regs, elf_entry, bprm->p);
996         retval = 0;
997 out:
998         kfree(loc);
999 out_ret:
1000         return retval;
1001
1002         /* error cleanup */
1003 out_free_dentry:
1004         allow_write_access(interpreter);
1005         if (interpreter)
1006                 fput(interpreter);
1007 out_free_interp:
1008         kfree(elf_interpreter);
1009 out_free_ph:
1010         kfree(elf_phdata);
1011         goto out;
1012 }
1013
1014 /* This is really simpleminded and specialized - we are loading an
1015    a.out library that is given an ELF header. */
1016 static int load_elf_library(struct file *file)
1017 {
1018         struct elf_phdr *elf_phdata;
1019         struct elf_phdr *eppnt;
1020         unsigned long elf_bss, bss, len;
1021         int retval, error, i, j;
1022         struct elfhdr elf_ex;
1023
1024         error = -ENOEXEC;
1025         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1026         if (retval != sizeof(elf_ex))
1027                 goto out;
1028
1029         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1030                 goto out;
1031
1032         /* First of all, some simple consistency checks */
1033         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1034             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1035                 goto out;
1036
1037         /* Now read in all of the header information */
1038
1039         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1040         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1041
1042         error = -ENOMEM;
1043         elf_phdata = kmalloc(j, GFP_KERNEL);
1044         if (!elf_phdata)
1045                 goto out;
1046
1047         eppnt = elf_phdata;
1048         error = -ENOEXEC;
1049         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1050         if (retval != j)
1051                 goto out_free_ph;
1052
1053         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1054                 if ((eppnt + i)->p_type == PT_LOAD)
1055                         j++;
1056         if (j != 1)
1057                 goto out_free_ph;
1058
1059         while (eppnt->p_type != PT_LOAD)
1060                 eppnt++;
1061
1062         /* Now use mmap to map the library into memory. */
1063         error = vm_mmap(file,
1064                         ELF_PAGESTART(eppnt->p_vaddr),
1065                         (eppnt->p_filesz +
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1067                         PROT_READ | PROT_WRITE | PROT_EXEC,
1068                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1069                         (eppnt->p_offset -
1070                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1071         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1072                 goto out_free_ph;
1073
1074         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1075         if (padzero(elf_bss)) {
1076                 error = -EFAULT;
1077                 goto out_free_ph;
1078         }
1079
1080         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1081                             ELF_MIN_ALIGN - 1);
1082         bss = eppnt->p_memsz + eppnt->p_vaddr;
1083         if (bss > len)
1084                 vm_brk(len, bss - len);
1085         error = 0;
1086
1087 out_free_ph:
1088         kfree(elf_phdata);
1089 out:
1090         return error;
1091 }
1092
1093 #ifdef CONFIG_ELF_CORE
1094 /*
1095  * ELF core dumper
1096  *
1097  * Modelled on fs/exec.c:aout_core_dump()
1098  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1099  */
1100
1101 /*
1102  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1103  * that are useful for post-mortem analysis are included in every core dump.
1104  * In that way we ensure that the core dump is fully interpretable later
1105  * without matching up the same kernel and hardware config to see what PC values
1106  * meant. These special mappings include - vDSO, vsyscall, and other
1107  * architecture specific mappings
1108  */
1109 static bool always_dump_vma(struct vm_area_struct *vma)
1110 {
1111         /* Any vsyscall mappings? */
1112         if (vma == get_gate_vma(vma->vm_mm))
1113                 return true;
1114         /*
1115          * arch_vma_name() returns non-NULL for special architecture mappings,
1116          * such as vDSO sections.
1117          */
1118         if (arch_vma_name(vma))
1119                 return true;
1120
1121         return false;
1122 }
1123
1124 /*
1125  * Decide what to dump of a segment, part, all or none.
1126  */
1127 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1128                                    unsigned long mm_flags)
1129 {
1130 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1131
1132         /* always dump the vdso and vsyscall sections */
1133         if (always_dump_vma(vma))
1134                 goto whole;
1135
1136         if (vma->vm_flags & VM_DONTDUMP)
1137                 return 0;
1138
1139         /* Hugetlb memory check */
1140         if (vma->vm_flags & VM_HUGETLB) {
1141                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1142                         goto whole;
1143                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1144                         goto whole;
1145                 return 0;
1146         }
1147
1148         /* Do not dump I/O mapped devices or special mappings */
1149         if (vma->vm_flags & VM_IO)
1150                 return 0;
1151
1152         /* By default, dump shared memory if mapped from an anonymous file. */
1153         if (vma->vm_flags & VM_SHARED) {
1154                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1155                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1156                         goto whole;
1157                 return 0;
1158         }
1159
1160         /* Dump segments that have been written to.  */
1161         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1162                 goto whole;
1163         if (vma->vm_file == NULL)
1164                 return 0;
1165
1166         if (FILTER(MAPPED_PRIVATE))
1167                 goto whole;
1168
1169         /*
1170          * If this looks like the beginning of a DSO or executable mapping,
1171          * check for an ELF header.  If we find one, dump the first page to
1172          * aid in determining what was mapped here.
1173          */
1174         if (FILTER(ELF_HEADERS) &&
1175             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1176                 u32 __user *header = (u32 __user *) vma->vm_start;
1177                 u32 word;
1178                 mm_segment_t fs = get_fs();
1179                 /*
1180                  * Doing it this way gets the constant folded by GCC.
1181                  */
1182                 union {
1183                         u32 cmp;
1184                         char elfmag[SELFMAG];
1185                 } magic;
1186                 BUILD_BUG_ON(SELFMAG != sizeof word);
1187                 magic.elfmag[EI_MAG0] = ELFMAG0;
1188                 magic.elfmag[EI_MAG1] = ELFMAG1;
1189                 magic.elfmag[EI_MAG2] = ELFMAG2;
1190                 magic.elfmag[EI_MAG3] = ELFMAG3;
1191                 /*
1192                  * Switch to the user "segment" for get_user(),
1193                  * then put back what elf_core_dump() had in place.
1194                  */
1195                 set_fs(USER_DS);
1196                 if (unlikely(get_user(word, header)))
1197                         word = 0;
1198                 set_fs(fs);
1199                 if (word == magic.cmp)
1200                         return PAGE_SIZE;
1201         }
1202
1203 #undef  FILTER
1204
1205         return 0;
1206
1207 whole:
1208         return vma->vm_end - vma->vm_start;
1209 }
1210
1211 /* An ELF note in memory */
1212 struct memelfnote
1213 {
1214         const char *name;
1215         int type;
1216         unsigned int datasz;
1217         void *data;
1218 };
1219
1220 static int notesize(struct memelfnote *en)
1221 {
1222         int sz;
1223
1224         sz = sizeof(struct elf_note);
1225         sz += roundup(strlen(en->name) + 1, 4);
1226         sz += roundup(en->datasz, 4);
1227
1228         return sz;
1229 }
1230
1231 #define DUMP_WRITE(addr, nr, foffset)   \
1232         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1233
1234 static int alignfile(struct file *file, loff_t *foffset)
1235 {
1236         static const char buf[4] = { 0, };
1237         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1238         return 1;
1239 }
1240
1241 static int writenote(struct memelfnote *men, struct file *file,
1242                         loff_t *foffset)
1243 {
1244         struct elf_note en;
1245         en.n_namesz = strlen(men->name) + 1;
1246         en.n_descsz = men->datasz;
1247         en.n_type = men->type;
1248
1249         DUMP_WRITE(&en, sizeof(en), foffset);
1250         DUMP_WRITE(men->name, en.n_namesz, foffset);
1251         if (!alignfile(file, foffset))
1252                 return 0;
1253         DUMP_WRITE(men->data, men->datasz, foffset);
1254         if (!alignfile(file, foffset))
1255                 return 0;
1256
1257         return 1;
1258 }
1259 #undef DUMP_WRITE
1260
1261 static void fill_elf_header(struct elfhdr *elf, int segs,
1262                             u16 machine, u32 flags)
1263 {
1264         memset(elf, 0, sizeof(*elf));
1265
1266         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1267         elf->e_ident[EI_CLASS] = ELF_CLASS;
1268         elf->e_ident[EI_DATA] = ELF_DATA;
1269         elf->e_ident[EI_VERSION] = EV_CURRENT;
1270         elf->e_ident[EI_OSABI] = ELF_OSABI;
1271
1272         elf->e_type = ET_CORE;
1273         elf->e_machine = machine;
1274         elf->e_version = EV_CURRENT;
1275         elf->e_phoff = sizeof(struct elfhdr);
1276         elf->e_flags = flags;
1277         elf->e_ehsize = sizeof(struct elfhdr);
1278         elf->e_phentsize = sizeof(struct elf_phdr);
1279         elf->e_phnum = segs;
1280
1281         return;
1282 }
1283
1284 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1285 {
1286         phdr->p_type = PT_NOTE;
1287         phdr->p_offset = offset;
1288         phdr->p_vaddr = 0;
1289         phdr->p_paddr = 0;
1290         phdr->p_filesz = sz;
1291         phdr->p_memsz = 0;
1292         phdr->p_flags = 0;
1293         phdr->p_align = 0;
1294         return;
1295 }
1296
1297 static void fill_note(struct memelfnote *note, const char *name, int type, 
1298                 unsigned int sz, void *data)
1299 {
1300         note->name = name;
1301         note->type = type;
1302         note->datasz = sz;
1303         note->data = data;
1304         return;
1305 }
1306
1307 /*
1308  * fill up all the fields in prstatus from the given task struct, except
1309  * registers which need to be filled up separately.
1310  */
1311 static void fill_prstatus(struct elf_prstatus *prstatus,
1312                 struct task_struct *p, long signr)
1313 {
1314         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1315         prstatus->pr_sigpend = p->pending.signal.sig[0];
1316         prstatus->pr_sighold = p->blocked.sig[0];
1317         rcu_read_lock();
1318         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1319         rcu_read_unlock();
1320         prstatus->pr_pid = task_pid_vnr(p);
1321         prstatus->pr_pgrp = task_pgrp_vnr(p);
1322         prstatus->pr_sid = task_session_vnr(p);
1323         if (thread_group_leader(p)) {
1324                 struct task_cputime cputime;
1325
1326                 /*
1327                  * This is the record for the group leader.  It shows the
1328                  * group-wide total, not its individual thread total.
1329                  */
1330                 thread_group_cputime(p, &cputime);
1331                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1332                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1333         } else {
1334                 cputime_t utime, stime;
1335
1336                 task_cputime(p, &utime, &stime);
1337                 cputime_to_timeval(utime, &prstatus->pr_utime);
1338                 cputime_to_timeval(stime, &prstatus->pr_stime);
1339         }
1340         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1341         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1342 }
1343
1344 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1345                        struct mm_struct *mm)
1346 {
1347         const struct cred *cred;
1348         unsigned int i, len;
1349         
1350         /* first copy the parameters from user space */
1351         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1352
1353         len = mm->arg_end - mm->arg_start;
1354         if (len >= ELF_PRARGSZ)
1355                 len = ELF_PRARGSZ-1;
1356         if (copy_from_user(&psinfo->pr_psargs,
1357                            (const char __user *)mm->arg_start, len))
1358                 return -EFAULT;
1359         for(i = 0; i < len; i++)
1360                 if (psinfo->pr_psargs[i] == 0)
1361                         psinfo->pr_psargs[i] = ' ';
1362         psinfo->pr_psargs[len] = 0;
1363
1364         rcu_read_lock();
1365         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1366         rcu_read_unlock();
1367         psinfo->pr_pid = task_pid_vnr(p);
1368         psinfo->pr_pgrp = task_pgrp_vnr(p);
1369         psinfo->pr_sid = task_session_vnr(p);
1370
1371         i = p->state ? ffz(~p->state) + 1 : 0;
1372         psinfo->pr_state = i;
1373         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1374         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1375         psinfo->pr_nice = task_nice(p);
1376         psinfo->pr_flag = p->flags;
1377         rcu_read_lock();
1378         cred = __task_cred(p);
1379         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1380         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1381         rcu_read_unlock();
1382         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1383         
1384         return 0;
1385 }
1386
1387 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1388 {
1389         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1390         int i = 0;
1391         do
1392                 i += 2;
1393         while (auxv[i - 2] != AT_NULL);
1394         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1395 }
1396
1397 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1398                 siginfo_t *siginfo)
1399 {
1400         mm_segment_t old_fs = get_fs();
1401         set_fs(KERNEL_DS);
1402         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1403         set_fs(old_fs);
1404         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1405 }
1406
1407 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1408 /*
1409  * Format of NT_FILE note:
1410  *
1411  * long count     -- how many files are mapped
1412  * long page_size -- units for file_ofs
1413  * array of [COUNT] elements of
1414  *   long start
1415  *   long end
1416  *   long file_ofs
1417  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1418  */
1419 static int fill_files_note(struct memelfnote *note)
1420 {
1421         struct vm_area_struct *vma;
1422         unsigned count, size, names_ofs, remaining, n;
1423         user_long_t *data;
1424         user_long_t *start_end_ofs;
1425         char *name_base, *name_curpos;
1426
1427         /* *Estimated* file count and total data size needed */
1428         count = current->mm->map_count;
1429         size = count * 64;
1430
1431         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1432  alloc:
1433         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1434                 return -EINVAL;
1435         size = round_up(size, PAGE_SIZE);
1436         data = vmalloc(size);
1437         if (!data)
1438                 return -ENOMEM;
1439
1440         start_end_ofs = data + 2;
1441         name_base = name_curpos = ((char *)data) + names_ofs;
1442         remaining = size - names_ofs;
1443         count = 0;
1444         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1445                 struct file *file;
1446                 const char *filename;
1447
1448                 file = vma->vm_file;
1449                 if (!file)
1450                         continue;
1451                 filename = d_path(&file->f_path, name_curpos, remaining);
1452                 if (IS_ERR(filename)) {
1453                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1454                                 vfree(data);
1455                                 size = size * 5 / 4;
1456                                 goto alloc;
1457                         }
1458                         continue;
1459                 }
1460
1461                 /* d_path() fills at the end, move name down */
1462                 /* n = strlen(filename) + 1: */
1463                 n = (name_curpos + remaining) - filename;
1464                 remaining = filename - name_curpos;
1465                 memmove(name_curpos, filename, n);
1466                 name_curpos += n;
1467
1468                 *start_end_ofs++ = vma->vm_start;
1469                 *start_end_ofs++ = vma->vm_end;
1470                 *start_end_ofs++ = vma->vm_pgoff;
1471                 count++;
1472         }
1473
1474         /* Now we know exact count of files, can store it */
1475         data[0] = count;
1476         data[1] = PAGE_SIZE;
1477         /*
1478          * Count usually is less than current->mm->map_count,
1479          * we need to move filenames down.
1480          */
1481         n = current->mm->map_count - count;
1482         if (n != 0) {
1483                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1484                 memmove(name_base - shift_bytes, name_base,
1485                         name_curpos - name_base);
1486                 name_curpos -= shift_bytes;
1487         }
1488
1489         size = name_curpos - (char *)data;
1490         fill_note(note, "CORE", NT_FILE, size, data);
1491         return 0;
1492 }
1493
1494 #ifdef CORE_DUMP_USE_REGSET
1495 #include <linux/regset.h>
1496
1497 struct elf_thread_core_info {
1498         struct elf_thread_core_info *next;
1499         struct task_struct *task;
1500         struct elf_prstatus prstatus;
1501         struct memelfnote notes[0];
1502 };
1503
1504 struct elf_note_info {
1505         struct elf_thread_core_info *thread;
1506         struct memelfnote psinfo;
1507         struct memelfnote signote;
1508         struct memelfnote auxv;
1509         struct memelfnote files;
1510         user_siginfo_t csigdata;
1511         size_t size;
1512         int thread_notes;
1513 };
1514
1515 /*
1516  * When a regset has a writeback hook, we call it on each thread before
1517  * dumping user memory.  On register window machines, this makes sure the
1518  * user memory backing the register data is up to date before we read it.
1519  */
1520 static void do_thread_regset_writeback(struct task_struct *task,
1521                                        const struct user_regset *regset)
1522 {
1523         if (regset->writeback)
1524                 regset->writeback(task, regset, 1);
1525 }
1526
1527 #ifndef PR_REG_SIZE
1528 #define PR_REG_SIZE(S) sizeof(S)
1529 #endif
1530
1531 #ifndef PRSTATUS_SIZE
1532 #define PRSTATUS_SIZE(S) sizeof(S)
1533 #endif
1534
1535 #ifndef PR_REG_PTR
1536 #define PR_REG_PTR(S) (&((S)->pr_reg))
1537 #endif
1538
1539 #ifndef SET_PR_FPVALID
1540 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1541 #endif
1542
1543 static int fill_thread_core_info(struct elf_thread_core_info *t,
1544                                  const struct user_regset_view *view,
1545                                  long signr, size_t *total)
1546 {
1547         unsigned int i;
1548
1549         /*
1550          * NT_PRSTATUS is the one special case, because the regset data
1551          * goes into the pr_reg field inside the note contents, rather
1552          * than being the whole note contents.  We fill the reset in here.
1553          * We assume that regset 0 is NT_PRSTATUS.
1554          */
1555         fill_prstatus(&t->prstatus, t->task, signr);
1556         (void) view->regsets[0].get(t->task, &view->regsets[0],
1557                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1558                                     PR_REG_PTR(&t->prstatus), NULL);
1559
1560         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1561                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1562         *total += notesize(&t->notes[0]);
1563
1564         do_thread_regset_writeback(t->task, &view->regsets[0]);
1565
1566         /*
1567          * Each other regset might generate a note too.  For each regset
1568          * that has no core_note_type or is inactive, we leave t->notes[i]
1569          * all zero and we'll know to skip writing it later.
1570          */
1571         for (i = 1; i < view->n; ++i) {
1572                 const struct user_regset *regset = &view->regsets[i];
1573                 do_thread_regset_writeback(t->task, regset);
1574                 if (regset->core_note_type && regset->get &&
1575                     (!regset->active || regset->active(t->task, regset))) {
1576                         int ret;
1577                         size_t size = regset->n * regset->size;
1578                         void *data = kmalloc(size, GFP_KERNEL);
1579                         if (unlikely(!data))
1580                                 return 0;
1581                         ret = regset->get(t->task, regset,
1582                                           0, size, data, NULL);
1583                         if (unlikely(ret))
1584                                 kfree(data);
1585                         else {
1586                                 if (regset->core_note_type != NT_PRFPREG)
1587                                         fill_note(&t->notes[i], "LINUX",
1588                                                   regset->core_note_type,
1589                                                   size, data);
1590                                 else {
1591                                         SET_PR_FPVALID(&t->prstatus, 1);
1592                                         fill_note(&t->notes[i], "CORE",
1593                                                   NT_PRFPREG, size, data);
1594                                 }
1595                                 *total += notesize(&t->notes[i]);
1596                         }
1597                 }
1598         }
1599
1600         return 1;
1601 }
1602
1603 static int fill_note_info(struct elfhdr *elf, int phdrs,
1604                           struct elf_note_info *info,
1605                           siginfo_t *siginfo, struct pt_regs *regs)
1606 {
1607         struct task_struct *dump_task = current;
1608         const struct user_regset_view *view = task_user_regset_view(dump_task);
1609         struct elf_thread_core_info *t;
1610         struct elf_prpsinfo *psinfo;
1611         struct core_thread *ct;
1612         unsigned int i;
1613
1614         info->size = 0;
1615         info->thread = NULL;
1616
1617         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1618         if (psinfo == NULL) {
1619                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1620                 return 0;
1621         }
1622
1623         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1624
1625         /*
1626          * Figure out how many notes we're going to need for each thread.
1627          */
1628         info->thread_notes = 0;
1629         for (i = 0; i < view->n; ++i)
1630                 if (view->regsets[i].core_note_type != 0)
1631                         ++info->thread_notes;
1632
1633         /*
1634          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1635          * since it is our one special case.
1636          */
1637         if (unlikely(info->thread_notes == 0) ||
1638             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1639                 WARN_ON(1);
1640                 return 0;
1641         }
1642
1643         /*
1644          * Initialize the ELF file header.
1645          */
1646         fill_elf_header(elf, phdrs,
1647                         view->e_machine, view->e_flags);
1648
1649         /*
1650          * Allocate a structure for each thread.
1651          */
1652         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1653                 t = kzalloc(offsetof(struct elf_thread_core_info,
1654                                      notes[info->thread_notes]),
1655                             GFP_KERNEL);
1656                 if (unlikely(!t))
1657                         return 0;
1658
1659                 t->task = ct->task;
1660                 if (ct->task == dump_task || !info->thread) {
1661                         t->next = info->thread;
1662                         info->thread = t;
1663                 } else {
1664                         /*
1665                          * Make sure to keep the original task at
1666                          * the head of the list.
1667                          */
1668                         t->next = info->thread->next;
1669                         info->thread->next = t;
1670                 }
1671         }
1672
1673         /*
1674          * Now fill in each thread's information.
1675          */
1676         for (t = info->thread; t != NULL; t = t->next)
1677                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1678                         return 0;
1679
1680         /*
1681          * Fill in the two process-wide notes.
1682          */
1683         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1684         info->size += notesize(&info->psinfo);
1685
1686         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1687         info->size += notesize(&info->signote);
1688
1689         fill_auxv_note(&info->auxv, current->mm);
1690         info->size += notesize(&info->auxv);
1691
1692         if (fill_files_note(&info->files) == 0)
1693                 info->size += notesize(&info->files);
1694
1695         return 1;
1696 }
1697
1698 static size_t get_note_info_size(struct elf_note_info *info)
1699 {
1700         return info->size;
1701 }
1702
1703 /*
1704  * Write all the notes for each thread.  When writing the first thread, the
1705  * process-wide notes are interleaved after the first thread-specific note.
1706  */
1707 static int write_note_info(struct elf_note_info *info,
1708                            struct file *file, loff_t *foffset)
1709 {
1710         bool first = 1;
1711         struct elf_thread_core_info *t = info->thread;
1712
1713         do {
1714                 int i;
1715
1716                 if (!writenote(&t->notes[0], file, foffset))
1717                         return 0;
1718
1719                 if (first && !writenote(&info->psinfo, file, foffset))
1720                         return 0;
1721                 if (first && !writenote(&info->signote, file, foffset))
1722                         return 0;
1723                 if (first && !writenote(&info->auxv, file, foffset))
1724                         return 0;
1725                 if (first && info->files.data &&
1726                                 !writenote(&info->files, file, foffset))
1727                         return 0;
1728
1729                 for (i = 1; i < info->thread_notes; ++i)
1730                         if (t->notes[i].data &&
1731                             !writenote(&t->notes[i], file, foffset))
1732                                 return 0;
1733
1734                 first = 0;
1735                 t = t->next;
1736         } while (t);
1737
1738         return 1;
1739 }
1740
1741 static void free_note_info(struct elf_note_info *info)
1742 {
1743         struct elf_thread_core_info *threads = info->thread;
1744         while (threads) {
1745                 unsigned int i;
1746                 struct elf_thread_core_info *t = threads;
1747                 threads = t->next;
1748                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1749                 for (i = 1; i < info->thread_notes; ++i)
1750                         kfree(t->notes[i].data);
1751                 kfree(t);
1752         }
1753         kfree(info->psinfo.data);
1754         vfree(info->files.data);
1755 }
1756
1757 #else
1758
1759 /* Here is the structure in which status of each thread is captured. */
1760 struct elf_thread_status
1761 {
1762         struct list_head list;
1763         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1764         elf_fpregset_t fpu;             /* NT_PRFPREG */
1765         struct task_struct *thread;
1766 #ifdef ELF_CORE_COPY_XFPREGS
1767         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1768 #endif
1769         struct memelfnote notes[3];
1770         int num_notes;
1771 };
1772
1773 /*
1774  * In order to add the specific thread information for the elf file format,
1775  * we need to keep a linked list of every threads pr_status and then create
1776  * a single section for them in the final core file.
1777  */
1778 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1779 {
1780         int sz = 0;
1781         struct task_struct *p = t->thread;
1782         t->num_notes = 0;
1783
1784         fill_prstatus(&t->prstatus, p, signr);
1785         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1786         
1787         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1788                   &(t->prstatus));
1789         t->num_notes++;
1790         sz += notesize(&t->notes[0]);
1791
1792         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1793                                                                 &t->fpu))) {
1794                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1795                           &(t->fpu));
1796                 t->num_notes++;
1797                 sz += notesize(&t->notes[1]);
1798         }
1799
1800 #ifdef ELF_CORE_COPY_XFPREGS
1801         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1802                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1803                           sizeof(t->xfpu), &t->xfpu);
1804                 t->num_notes++;
1805                 sz += notesize(&t->notes[2]);
1806         }
1807 #endif  
1808         return sz;
1809 }
1810
1811 struct elf_note_info {
1812         struct memelfnote *notes;
1813         struct memelfnote *notes_files;
1814         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1815         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1816         struct list_head thread_list;
1817         elf_fpregset_t *fpu;
1818 #ifdef ELF_CORE_COPY_XFPREGS
1819         elf_fpxregset_t *xfpu;
1820 #endif
1821         user_siginfo_t csigdata;
1822         int thread_status_size;
1823         int numnote;
1824 };
1825
1826 static int elf_note_info_init(struct elf_note_info *info)
1827 {
1828         memset(info, 0, sizeof(*info));
1829         INIT_LIST_HEAD(&info->thread_list);
1830
1831         /* Allocate space for ELF notes */
1832         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1833         if (!info->notes)
1834                 return 0;
1835         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1836         if (!info->psinfo)
1837                 return 0;
1838         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1839         if (!info->prstatus)
1840                 return 0;
1841         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1842         if (!info->fpu)
1843                 return 0;
1844 #ifdef ELF_CORE_COPY_XFPREGS
1845         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1846         if (!info->xfpu)
1847                 return 0;
1848 #endif
1849         return 1;
1850 }
1851
1852 static int fill_note_info(struct elfhdr *elf, int phdrs,
1853                           struct elf_note_info *info,
1854                           siginfo_t *siginfo, struct pt_regs *regs)
1855 {
1856         struct list_head *t;
1857
1858         if (!elf_note_info_init(info))
1859                 return 0;
1860
1861         if (siginfo->si_signo) {
1862                 struct core_thread *ct;
1863                 struct elf_thread_status *ets;
1864
1865                 for (ct = current->mm->core_state->dumper.next;
1866                                                 ct; ct = ct->next) {
1867                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1868                         if (!ets)
1869                                 return 0;
1870
1871                         ets->thread = ct->task;
1872                         list_add(&ets->list, &info->thread_list);
1873                 }
1874
1875                 list_for_each(t, &info->thread_list) {
1876                         int sz;
1877
1878                         ets = list_entry(t, struct elf_thread_status, list);
1879                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1880                         info->thread_status_size += sz;
1881                 }
1882         }
1883         /* now collect the dump for the current */
1884         memset(info->prstatus, 0, sizeof(*info->prstatus));
1885         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1886         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1887
1888         /* Set up header */
1889         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1890
1891         /*
1892          * Set up the notes in similar form to SVR4 core dumps made
1893          * with info from their /proc.
1894          */
1895
1896         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1897                   sizeof(*info->prstatus), info->prstatus);
1898         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1899         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1900                   sizeof(*info->psinfo), info->psinfo);
1901
1902         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1903         fill_auxv_note(info->notes + 3, current->mm);
1904         info->numnote = 4;
1905
1906         if (fill_files_note(info->notes + info->numnote) == 0) {
1907                 info->notes_files = info->notes + info->numnote;
1908                 info->numnote++;
1909         }
1910
1911         /* Try to dump the FPU. */
1912         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1913                                                                info->fpu);
1914         if (info->prstatus->pr_fpvalid)
1915                 fill_note(info->notes + info->numnote++,
1916                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1917 #ifdef ELF_CORE_COPY_XFPREGS
1918         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1919                 fill_note(info->notes + info->numnote++,
1920                           "LINUX", ELF_CORE_XFPREG_TYPE,
1921                           sizeof(*info->xfpu), info->xfpu);
1922 #endif
1923
1924         return 1;
1925 }
1926
1927 static size_t get_note_info_size(struct elf_note_info *info)
1928 {
1929         int sz = 0;
1930         int i;
1931
1932         for (i = 0; i < info->numnote; i++)
1933                 sz += notesize(info->notes + i);
1934
1935         sz += info->thread_status_size;
1936
1937         return sz;
1938 }
1939
1940 static int write_note_info(struct elf_note_info *info,
1941                            struct file *file, loff_t *foffset)
1942 {
1943         int i;
1944         struct list_head *t;
1945
1946         for (i = 0; i < info->numnote; i++)
1947                 if (!writenote(info->notes + i, file, foffset))
1948                         return 0;
1949
1950         /* write out the thread status notes section */
1951         list_for_each(t, &info->thread_list) {
1952                 struct elf_thread_status *tmp =
1953                                 list_entry(t, struct elf_thread_status, list);
1954
1955                 for (i = 0; i < tmp->num_notes; i++)
1956                         if (!writenote(&tmp->notes[i], file, foffset))
1957                                 return 0;
1958         }
1959
1960         return 1;
1961 }
1962
1963 static void free_note_info(struct elf_note_info *info)
1964 {
1965         while (!list_empty(&info->thread_list)) {
1966                 struct list_head *tmp = info->thread_list.next;
1967                 list_del(tmp);
1968                 kfree(list_entry(tmp, struct elf_thread_status, list));
1969         }
1970
1971         /* Free data possibly allocated by fill_files_note(): */
1972         if (info->notes_files)
1973                 vfree(info->notes_files->data);
1974
1975         kfree(info->prstatus);
1976         kfree(info->psinfo);
1977         kfree(info->notes);
1978         kfree(info->fpu);
1979 #ifdef ELF_CORE_COPY_XFPREGS
1980         kfree(info->xfpu);
1981 #endif
1982 }
1983
1984 #endif
1985
1986 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1987                                         struct vm_area_struct *gate_vma)
1988 {
1989         struct vm_area_struct *ret = tsk->mm->mmap;
1990
1991         if (ret)
1992                 return ret;
1993         return gate_vma;
1994 }
1995 /*
1996  * Helper function for iterating across a vma list.  It ensures that the caller
1997  * will visit `gate_vma' prior to terminating the search.
1998  */
1999 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2000                                         struct vm_area_struct *gate_vma)
2001 {
2002         struct vm_area_struct *ret;
2003
2004         ret = this_vma->vm_next;
2005         if (ret)
2006                 return ret;
2007         if (this_vma == gate_vma)
2008                 return NULL;
2009         return gate_vma;
2010 }
2011
2012 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2013                              elf_addr_t e_shoff, int segs)
2014 {
2015         elf->e_shoff = e_shoff;
2016         elf->e_shentsize = sizeof(*shdr4extnum);
2017         elf->e_shnum = 1;
2018         elf->e_shstrndx = SHN_UNDEF;
2019
2020         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2021
2022         shdr4extnum->sh_type = SHT_NULL;
2023         shdr4extnum->sh_size = elf->e_shnum;
2024         shdr4extnum->sh_link = elf->e_shstrndx;
2025         shdr4extnum->sh_info = segs;
2026 }
2027
2028 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2029                                      unsigned long mm_flags)
2030 {
2031         struct vm_area_struct *vma;
2032         size_t size = 0;
2033
2034         for (vma = first_vma(current, gate_vma); vma != NULL;
2035              vma = next_vma(vma, gate_vma))
2036                 size += vma_dump_size(vma, mm_flags);
2037         return size;
2038 }
2039
2040 /*
2041  * Actual dumper
2042  *
2043  * This is a two-pass process; first we find the offsets of the bits,
2044  * and then they are actually written out.  If we run out of core limit
2045  * we just truncate.
2046  */
2047 static int elf_core_dump(struct coredump_params *cprm)
2048 {
2049         int has_dumped = 0;
2050         mm_segment_t fs;
2051         int segs;
2052         size_t size = 0;
2053         struct vm_area_struct *vma, *gate_vma;
2054         struct elfhdr *elf = NULL;
2055         loff_t offset = 0, dataoff, foffset;
2056         struct elf_note_info info = { };
2057         struct elf_phdr *phdr4note = NULL;
2058         struct elf_shdr *shdr4extnum = NULL;
2059         Elf_Half e_phnum;
2060         elf_addr_t e_shoff;
2061
2062         /*
2063          * We no longer stop all VM operations.
2064          * 
2065          * This is because those proceses that could possibly change map_count
2066          * or the mmap / vma pages are now blocked in do_exit on current
2067          * finishing this core dump.
2068          *
2069          * Only ptrace can touch these memory addresses, but it doesn't change
2070          * the map_count or the pages allocated. So no possibility of crashing
2071          * exists while dumping the mm->vm_next areas to the core file.
2072          */
2073   
2074         /* alloc memory for large data structures: too large to be on stack */
2075         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2076         if (!elf)
2077                 goto out;
2078         /*
2079          * The number of segs are recored into ELF header as 16bit value.
2080          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2081          */
2082         segs = current->mm->map_count;
2083         segs += elf_core_extra_phdrs();
2084
2085         gate_vma = get_gate_vma(current->mm);
2086         if (gate_vma != NULL)
2087                 segs++;
2088
2089         /* for notes section */
2090         segs++;
2091
2092         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2093          * this, kernel supports extended numbering. Have a look at
2094          * include/linux/elf.h for further information. */
2095         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2096
2097         /*
2098          * Collect all the non-memory information about the process for the
2099          * notes.  This also sets up the file header.
2100          */
2101         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2102                 goto cleanup;
2103
2104         has_dumped = 1;
2105
2106         fs = get_fs();
2107         set_fs(KERNEL_DS);
2108
2109         offset += sizeof(*elf);                         /* Elf header */
2110         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2111         foffset = offset;
2112
2113         /* Write notes phdr entry */
2114         {
2115                 size_t sz = get_note_info_size(&info);
2116
2117                 sz += elf_coredump_extra_notes_size();
2118
2119                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2120                 if (!phdr4note)
2121                         goto end_coredump;
2122
2123                 fill_elf_note_phdr(phdr4note, sz, offset);
2124                 offset += sz;
2125         }
2126
2127         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2128
2129         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2130         offset += elf_core_extra_data_size();
2131         e_shoff = offset;
2132
2133         if (e_phnum == PN_XNUM) {
2134                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2135                 if (!shdr4extnum)
2136                         goto end_coredump;
2137                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2138         }
2139
2140         offset = dataoff;
2141
2142         size += sizeof(*elf);
2143         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2144                 goto end_coredump;
2145
2146         size += sizeof(*phdr4note);
2147         if (size > cprm->limit
2148             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2149                 goto end_coredump;
2150
2151         /* Write program headers for segments dump */
2152         for (vma = first_vma(current, gate_vma); vma != NULL;
2153                         vma = next_vma(vma, gate_vma)) {
2154                 struct elf_phdr phdr;
2155
2156                 phdr.p_type = PT_LOAD;
2157                 phdr.p_offset = offset;
2158                 phdr.p_vaddr = vma->vm_start;
2159                 phdr.p_paddr = 0;
2160                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2161                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2162                 offset += phdr.p_filesz;
2163                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2164                 if (vma->vm_flags & VM_WRITE)
2165                         phdr.p_flags |= PF_W;
2166                 if (vma->vm_flags & VM_EXEC)
2167                         phdr.p_flags |= PF_X;
2168                 phdr.p_align = ELF_EXEC_PAGESIZE;
2169
2170                 size += sizeof(phdr);
2171                 if (size > cprm->limit
2172                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2173                         goto end_coredump;
2174         }
2175
2176         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2177                 goto end_coredump;
2178
2179         /* write out the notes section */
2180         if (!write_note_info(&info, cprm->file, &foffset))
2181                 goto end_coredump;
2182
2183         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2184                 goto end_coredump;
2185
2186         /* Align to page */
2187         if (!dump_seek(cprm->file, dataoff - foffset))
2188                 goto end_coredump;
2189
2190         for (vma = first_vma(current, gate_vma); vma != NULL;
2191                         vma = next_vma(vma, gate_vma)) {
2192                 unsigned long addr;
2193                 unsigned long end;
2194
2195                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2196
2197                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2198                         struct page *page;
2199                         int stop;
2200
2201                         page = get_dump_page(addr);
2202                         if (page) {
2203                                 void *kaddr = kmap(page);
2204                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2205                                         !dump_write(cprm->file, kaddr,
2206                                                     PAGE_SIZE);
2207                                 kunmap(page);
2208                                 page_cache_release(page);
2209                         } else
2210                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2211                         if (stop)
2212                                 goto end_coredump;
2213                 }
2214         }
2215
2216         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2217                 goto end_coredump;
2218
2219         if (e_phnum == PN_XNUM) {
2220                 size += sizeof(*shdr4extnum);
2221                 if (size > cprm->limit
2222                     || !dump_write(cprm->file, shdr4extnum,
2223                                    sizeof(*shdr4extnum)))
2224                         goto end_coredump;
2225         }
2226
2227 end_coredump:
2228         set_fs(fs);
2229
2230 cleanup:
2231         free_note_info(&info);
2232         kfree(shdr4extnum);
2233         kfree(phdr4note);
2234         kfree(elf);
2235 out:
2236         return has_dumped;
2237 }
2238
2239 #endif          /* CONFIG_ELF_CORE */
2240
2241 static int __init init_elf_binfmt(void)
2242 {
2243         register_binfmt(&elf_format);
2244         return 0;
2245 }
2246
2247 static void __exit exit_elf_binfmt(void)
2248 {
2249         /* Remove the COFF and ELF loaders. */
2250         unregister_binfmt(&elf_format);
2251 }
2252
2253 core_initcall(init_elf_binfmt);
2254 module_exit(exit_elf_binfmt);
2255 MODULE_LICENSE("GPL");