Merge branch 'akpm' (Andrew's patch-bomb)

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)
diff --combined fs/exec.c

index b71b08ce71204824c7c66c8d101a7ebecdbb4db0,d5eb9e605ffd2fc55eae86f7cbcc4a35e818f23a..d8e1191cb112eb2d77154908a32699a06c4bda78
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -1266,13 -1266,14 +1266,13 @@@ int prepare_binprm(struct linux_binprm 
         bprm->cred->egid = current_egid();
   
         if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- -          !current->no_new_privs) {
+ +          !current->no_new_privs &&
+ +          kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+ +          kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
                 /* Set-uid? */
                 if (mode & S_ISUID) {
- -                      if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
- -                              return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->euid = inode->i_uid;
- -
                 }
   
                 /* Set-gid? */
@@@ -1282,6 -1283,8 +1282,6 @@@
                  * executable.
                  */
                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- -                      if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
- -                              return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->egid = inode->i_gid;
                 }
@@@ -1353,6 -1356,10 +1353,10 @@@ int search_binary_handler(struct linux_
         struct linux_binfmt *fmt;
         pid_t old_pid, old_vpid;
   
+       /* This allows 4 levels of binfmt rewrites before failing hard. */
+       if (depth > 5)
+               return -ELOOP;
+ 
         retval = security_bprm_check(bprm);
         if (retval)
                 return retval;
@@@ -1377,12 -1384,8 +1381,8 @@@
                         if (!try_module_get(fmt->module))
                                 continue;
                         read_unlock(&binfmt_lock);
+                       bprm->recursion_depth = depth + 1;
                         retval = fn(bprm);
-                       /*
-                        * Restore the depth counter to its starting value
-                        * in this call, so we don't have to rely on every
-                        * load_binary function to restore it on return.
-                        */
                         bprm->recursion_depth = depth;
                         if (retval >= 0) {
                                 if (depth == 0) {
diff --combined fs/proc/array.c

index d66248a1919b3a3028d07ae1d4dc59f36ce57aea,060a56a912789ba013b1d52e4ff8a250342da4e7..6a91e6ffbcbded857c4513cee0182ec61f899b74
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -162,7 -162,7 +162,7 @@@ static inline const char *get_task_stat
   static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                                 struct pid *pid, struct task_struct *p)
   {
- -      struct user_namespace *user_ns = current_user_ns();
+ +      struct user_namespace *user_ns = seq_user_ns(m);
         struct group_info *group_info;
         int g;
         struct fdtable *fdt = NULL;
@@@ -212,7 -212,7 +212,7 @@@
         group_info = cred->group_info;
         task_unlock(p);
   
-       for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
+       for (g = 0; g < group_info->ngroups; g++)
                 seq_printf(m, "%d ",
                            from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
         put_cred(cred);
@@@ -220,7 -220,7 +220,7 @@@
         seq_putc(m, '\n');
   }
   
- static void render_sigset_t(struct seq_file *m, const char *header,
+ void render_sigset_t(struct seq_file *m, const char *header,
                                 sigset_t *set)
   {
         int i;
@@@ -308,6 -308,10 +308,10 @@@ static void render_cap_t(struct seq_fil
         seq_putc(m, '\n');
   }
   
+ /* Remove non-existent capabilities */
+ #define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
+                               CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
+ 
   static inline void task_cap(struct seq_file *m, struct task_struct *p)
   {
         const struct cred *cred;
@@@ -321,12 -325,24 +325,24 @@@
         cap_bset        = cred->cap_bset;
         rcu_read_unlock();
   
+       NORM_CAPS(cap_inheritable);
+       NORM_CAPS(cap_permitted);
+       NORM_CAPS(cap_effective);
+       NORM_CAPS(cap_bset);
+ 
         render_cap_t(m, "CapInh:\t", &cap_inheritable);
         render_cap_t(m, "CapPrm:\t", &cap_permitted);
         render_cap_t(m, "CapEff:\t", &cap_effective);
         render_cap_t(m, "CapBnd:\t", &cap_bset);
   }
   
+ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
+ {
+ #ifdef CONFIG_SECCOMP
+       seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+ #endif
+ }
+ 
   static inline void task_context_switch_counts(struct seq_file *m,
                                                 struct task_struct *p)
   {
@@@ -360,6 -376,7 +376,7 @@@ int proc_pid_status(struct seq_file *m
         }
         task_sig(m, task);
         task_cap(m, task);
+       task_seccomp(m, task);
         task_cpus_allowed(m, task);
         cpuset_task_status_allowed(m, task);
         task_context_switch_counts(m, task);
diff --combined include/linux/fs.h

index 035521b46528ace428c7f07d4aa49ac4880e145e,5abf703d06ba8de5767dabf30dcc78136b47068f..a823d4be38e7590275be303d40840bf6bd9117e6
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -44,6 -44,7 +44,7 @@@ struct vm_area_struct
   struct vfsmount;
   struct cred;
   struct swap_info_struct;
+ struct seq_file;
   
   extern void __init inode_init(void);
   extern void __init inode_init_early(void);
@@@ -1543,6 -1544,7 +1544,7 @@@ struct file_operations 
         int (*setlease)(struct file *, long, struct file_lock **);
         long (*fallocate)(struct file *file, int mode, loff_t offset,
                           loff_t len);
+       int (*show_fdinfo)(struct seq_file *m, struct file *f);
   };
   
   struct inode_operations {
@@@ -1578,8 -1580,6 +1580,6 @@@
                            umode_t create_mode, int *opened);
   } ____cacheline_aligned;
   
- struct seq_file;
- 
   ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
                               unsigned long nr_segs, unsigned long fast_segs,
                               struct iovec *fast_pointer,
@@@ -1810,8 -1810,6 +1810,8 @@@ struct file_system_type 
   #define FS_REQUIRES_DEV               1 
   #define FS_BINARY_MOUNTDATA   2
   #define FS_HAS_SUBTYPE                4
+ +#define FS_USERNS_MOUNT               8       /* Can be mounted by userns root */
+ +#define FS_USERNS_DEV_MOUNT   16 /* A userns mount does not imply MNT_NODEV */
   #define FS_REVAL_DOT          16384   /* Check the paths ".", ".." for staleness */
   #define FS_RENAME_DOES_D_MOVE 32768   /* FS will handle d_move() during rename() internally. */
         struct dentry *(*mount) (struct file_system_type *, int,
@@@ -2288,9 -2286,9 +2288,9 @@@ extern ino_t find_inode_number(struct d
   #include <linux/err.h>
   
   /* needed for stackable file system support */
- extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
   
- extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
   
   extern int inode_init_always(struct super_block *, struct inode *);
   extern void inode_init_once(struct inode *);
@@@ -2398,11 -2396,11 +2398,11 @@@ extern long do_splice_direct(struct fil
   
   extern void
   file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
- extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
- extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
- extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+ extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+ extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
   extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-               int origin, loff_t maxsize, loff_t eof);
+               int whence, loff_t maxsize, loff_t eof);
   extern int generic_file_open(struct inode * inode, struct file * filp);
   extern int nonseekable_open(struct inode * inode, struct file * filp);
   
diff --combined include/linux/proc_fs.h

index 2e24018b7cecd9fd7a3e7e3090bb9a0d61c53a9d,b4f70f0a9a485ac4f4b13570e560f7b1bcff3902..32676b35d2f5dceb3e340574ad9c7dccdff482dc
--- 1/include/linux/proc_fs.h
--- 2/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@@ -28,11 -28,7 +28,11 @@@ struct mm_struct
    */
   
   enum {
- -      PROC_ROOT_INO = 1,
+ +      PROC_ROOT_INO           = 1,
+ +      PROC_IPC_INIT_INO       = 0xEFFFFFFFU,
+ +      PROC_UTS_INIT_INO       = 0xEFFFFFFEU,
+ +      PROC_USER_INIT_INO      = 0xEFFFFFFDU,
+ +      PROC_PID_INIT_INO       = 0xEFFFFFFCU,
   };
   
   /*
@@@ -178,10 -174,7 +178,10 @@@ extern struct proc_dir_entry *proc_net_
         struct proc_dir_entry *parent);
   
   extern struct file *proc_ns_fget(int fd);
+ +extern bool proc_ns_inode(struct inode *inode);
   
+ +extern int proc_alloc_inum(unsigned int *pino);
+ +extern void proc_free_inum(unsigned int inum);
   #else
   
   #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@@ -236,19 -229,6 +236,19 @@@ static inline struct file *proc_ns_fget
         return ERR_PTR(-EINVAL);
   }
   
+ +static inline bool proc_ns_inode(struct inode *inode)
+ +{
+ +      return false;
+ +}
+ +
+ +static inline int proc_alloc_inum(unsigned int *inum)
+ +{
+ +      *inum = 1;
+ +      return 0;
+ +}
+ +static inline void proc_free_inum(unsigned int inum)
+ +{
+ +}
   #endif /* CONFIG_PROC_FS */
   
   #if !defined(CONFIG_PROC_KCORE)
@@@ -267,14 -247,10 +267,14 @@@ struct proc_ns_operations 
         void *(*get)(struct task_struct *task);
         void (*put)(void *ns);
         int (*install)(struct nsproxy *nsproxy, void *ns);
+ +      unsigned int (*inum)(void *ns);
   };
   extern const struct proc_ns_operations netns_operations;
   extern const struct proc_ns_operations utsns_operations;
   extern const struct proc_ns_operations ipcns_operations;
+ +extern const struct proc_ns_operations pidns_operations;
+ +extern const struct proc_ns_operations userns_operations;
+ +extern const struct proc_ns_operations mntns_operations;
   
   union proc_op {
         int (*proc_get_link)(struct dentry *, struct path *);
@@@ -314,4 -290,7 +314,7 @@@ static inline struct net *PDE_NET(struc
         return pde->parent->data;
   }
   
+ #include <linux/signal.h>
+ 
+ void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
   #endif /* _LINUX_PROC_FS_H */
diff --combined kernel/cgroup.c

index 9915ffe013727d68fbe21013288b282154d576de,571264830a29a132e1c6cc876cd33fa5a9b7de55..4855892798fdfd6aeee695a4fc1d2a70b96662eb
--- 1/kernel/cgroup.c
--- 2/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -1333,7 -1333,6 +1333,6 @@@ static int cgroup_remount(struct super_
         if (ret)
                 goto out_unlock;
   
-       /* See feature-removal-schedule.txt */
         if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
                 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
                            task_tgid_nr(current), current->comm);
@@@ -3409,7 -3408,7 +3408,7 @@@ static struct cgroup_pidlist *cgroup_pi
   {
         struct cgroup_pidlist *l;
         /* don't need task_nsproxy() if we're looking at ourself */
- -      struct pid_namespace *ns = current->nsproxy->pid_ns;
+ +      struct pid_namespace *ns = task_active_pid_ns(current);
   
         /*
          * We can't drop the pidlist_mutex before taking the l->mutex in case
diff --combined kernel/pid.c

index 3e2cf8100acc84b23b5741603c44fc908e3d5068,a54a1123c7cf122a84f78da59944df34aff2a3a7..36aa02ff17d6c9cf67dcf39b0434898c700915ae
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -36,7 -36,6 +36,7 @@@
   #include <linux/pid_namespace.h>
   #include <linux/init_task.h>
   #include <linux/syscalls.h>
+ +#include <linux/proc_fs.h>
   
   #define pid_hashfn(nr, ns)    \
         hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@@ -79,26 -78,9 +79,11 @@@ struct pid_namespace init_pid_ns = 
         .last_pid = 0,
         .level = 0,
         .child_reaper = &init_task,
+ +      .user_ns = &init_user_ns,
+ +      .proc_inum = PROC_PID_INIT_INO,
   };
   EXPORT_SYMBOL_GPL(init_pid_ns);
   
- int is_container_init(struct task_struct *tsk)
- {
-       int ret = 0;
-       struct pid *pid;
- 
-       rcu_read_lock();
-       pid = task_pid(tsk);
-       if (pid != NULL && pid->numbers[pid->level].nr == 1)
-               ret = 1;
-       rcu_read_unlock();
- 
-       return ret;
- }
- EXPORT_SYMBOL(is_container_init);
- 
   /*
    * Note: disable interrupts while the pidmap_lock is held as an
    * interrupt might come in and do read_lock(&tasklist_lock).
@@@ -272,24 -254,8 +257,24 @@@ void free_pid(struct pid *pid
         unsigned long flags;
   
         spin_lock_irqsave(&pidmap_lock, flags);
- -      for (i = 0; i <= pid->level; i++)
- -              hlist_del_rcu(&pid->numbers[i].pid_chain);
+ +      for (i = 0; i <= pid->level; i++) {
+ +              struct upid *upid = pid->numbers + i;
+ +              struct pid_namespace *ns = upid->ns;
+ +              hlist_del_rcu(&upid->pid_chain);
+ +              switch(--ns->nr_hashed) {
+ +              case 1:
+ +                      /* When all that is left in the pid namespace
+ +                       * is the reaper wake up the reaper.  The reaper
+ +                       * may be sleeping in zap_pid_ns_processes().
+ +                       */
+ +                      wake_up_process(ns->child_reaper);
+ +                      break;
+ +              case 0:
+ +                      ns->nr_hashed = -1;
+ +                      schedule_work(&ns->proc_work);
+ +                      break;
+ +              }
+ +      }
         spin_unlock_irqrestore(&pidmap_lock, flags);
   
         for (i = 0; i <= pid->level; i++)
@@@ -311,7 -277,6 +296,7 @@@ struct pid *alloc_pid(struct pid_namesp
                 goto out;
   
         tmp = ns;
+ +      pid->level = ns->level;
         for (i = ns->level; i >= 0; i--) {
                 nr = alloc_pidmap(tmp);
                 if (nr < 0)
@@@ -322,32 -287,22 +307,32 @@@
                 tmp = tmp->parent;
         }
   
+ +      if (unlikely(is_child_reaper(pid))) {
+ +              if (pid_ns_prepare_proc(ns))
+ +                      goto out_free;
+ +      }
+ +
         get_pid_ns(ns);
- -      pid->level = ns->level;
         atomic_set(&pid->count, 1);
         for (type = 0; type < PIDTYPE_MAX; ++type)
                 INIT_HLIST_HEAD(&pid->tasks[type]);
   
         upid = pid->numbers + ns->level;
         spin_lock_irq(&pidmap_lock);
- -      for ( ; upid >= pid->numbers; --upid)
+ +      if (ns->nr_hashed < 0)
+ +              goto out_unlock;
+ +      for ( ; upid >= pid->numbers; --upid) {
                 hlist_add_head_rcu(&upid->pid_chain,
                                 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+ +              upid->ns->nr_hashed++;
+ +      }
         spin_unlock_irq(&pidmap_lock);
   
   out:
         return pid;
   
+ +out_unlock:
+ +      spin_unlock(&pidmap_lock);
   out_free:
         while (++i <= ns->level)
                 free_pidmap(pid->numbers + i);
@@@ -374,7 -329,7 +359,7 @@@ EXPORT_SYMBOL_GPL(find_pid_ns)
   
   struct pid *find_vpid(int nr)
   {
- -      return find_pid_ns(nr, current->nsproxy->pid_ns);
+ +      return find_pid_ns(nr, task_active_pid_ns(current));
   }
   EXPORT_SYMBOL_GPL(find_vpid);
   
@@@ -458,7 -413,7 +443,7 @@@ struct task_struct *find_task_by_pid_ns
   
   struct task_struct *find_task_by_vpid(pid_t vnr)
   {
- -      return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+ +      return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
   }
   
   struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@@ -513,7 -468,7 +498,7 @@@ EXPORT_SYMBOL_GPL(pid_nr_ns)
   
   pid_t pid_vnr(struct pid *pid)
   {
- -      return pid_nr_ns(pid, current->nsproxy->pid_ns);
+ +      return pid_nr_ns(pid, task_active_pid_ns(current));
   }
   EXPORT_SYMBOL_GPL(pid_vnr);
   
@@@ -524,7 -479,7 +509,7 @@@ pid_t __task_pid_nr_ns(struct task_stru
   
         rcu_read_lock();
         if (!ns)
- -              ns = current->nsproxy->pid_ns;
+ +              ns = task_active_pid_ns(current);
         if (likely(pid_alive(task))) {
                 if (type != PIDTYPE_PID)
                         task = task->group_leader;
@@@ -599,7 -554,6 +584,7 @@@ void __init pidmap_init(void
         /* Reserve PID 0. We never call free_pidmap(0) */
         set_bit(0, init_pid_ns.pidmap[0].page);
         atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+ +      init_pid_ns.nr_hashed = 1;
   
         init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                         SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --combined kernel/ptrace.c

index 7b09b88862cc8fdb78a893e3fbbd93418a5858aa,ec8118ab2a47edaacda635a5eca0a104db3c09b3..1599157336a6c2be56037a510b7c2c28648201b8
--- 1/kernel/ptrace.c
--- 2/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@@ -215,12 -215,8 +215,12 @@@ ok
         smp_rmb();
         if (task->mm)
                 dumpable = get_dumpable(task->mm);
- -      if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+ +      rcu_read_lock();
+ +      if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+ +              rcu_read_unlock();
                 return -EPERM;
+ +      }
+ +      rcu_read_unlock();
   
         return security_ptrace_access_check(task, mode);
   }
@@@ -284,10 -280,8 +284,10 @@@ static int ptrace_attach(struct task_st
   
         if (seize)
                 flags |= PT_SEIZED;
- -      if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+ +      rcu_read_lock();
+ +      if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
                 flags |= PT_PTRACE_CAP;
+ +      rcu_read_unlock();
         task->ptrace = flags;
   
         __ptrace_link(task, current);
@@@ -463,6 -457,9 +463,9 @@@ void exit_ptrace(struct task_struct *tr
                 return;
   
         list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
+               if (unlikely(p->ptrace & PT_EXITKILL))
+                       send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
+ 
                 if (__ptrace_detach(tracer, p))
                         list_add(&p->ptrace_entry, &ptrace_dead);
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 18 Dec 2012 04:58:12 +0000 (20:58 -0800)
		1	2
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/array.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/proc_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history