bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !current->no_new_privs) {
+ !current->no_new_privs &&
+ kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+ kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
if (mode & S_ISUID) {
- if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->euid = inode->i_uid;
-
}
/* Set-gid? */
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
- return -EPERM;
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->egid = inode->i_gid;
}
struct linux_binfmt *fmt;
pid_t old_pid, old_vpid;
+ /* This allows 4 levels of binfmt rewrites before failing hard. */
+ if (depth > 5)
+ return -ELOOP;
+
retval = security_bprm_check(bprm);
if (retval)
return retval;
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
+ bprm->recursion_depth = depth + 1;
retval = fn(bprm);
- /*
- * Restore the depth counter to its starting value
- * in this call, so we don't have to rely on every
- * load_binary function to restore it on return.
- */
bprm->recursion_depth = depth;
if (retval >= 0) {
if (depth == 0) {
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
- struct user_namespace *user_ns = current_user_ns();
+ struct user_namespace *user_ns = seq_user_ns(m);
struct group_info *group_info;
int g;
struct fdtable *fdt = NULL;
group_info = cred->group_info;
task_unlock(p);
- for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
+ for (g = 0; g < group_info->ngroups; g++)
seq_printf(m, "%d ",
from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
put_cred(cred);
seq_putc(m, '\n');
}
- static void render_sigset_t(struct seq_file *m, const char *header,
+ void render_sigset_t(struct seq_file *m, const char *header,
sigset_t *set)
{
int i;
seq_putc(m, '\n');
}
+ /* Remove non-existent capabilities */
+ #define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
+ CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
+
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
const struct cred *cred;
cap_bset = cred->cap_bset;
rcu_read_unlock();
+ NORM_CAPS(cap_inheritable);
+ NORM_CAPS(cap_permitted);
+ NORM_CAPS(cap_effective);
+ NORM_CAPS(cap_bset);
+
render_cap_t(m, "CapInh:\t", &cap_inheritable);
render_cap_t(m, "CapPrm:\t", &cap_permitted);
render_cap_t(m, "CapEff:\t", &cap_effective);
render_cap_t(m, "CapBnd:\t", &cap_bset);
}
+ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
+ {
+ #ifdef CONFIG_SECCOMP
+ seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+ #endif
+ }
+
static inline void task_context_switch_counts(struct seq_file *m,
struct task_struct *p)
{
}
task_sig(m, task);
task_cap(m, task);
+ task_seccomp(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
task_context_switch_counts(m, task);
struct vfsmount;
struct cred;
struct swap_info_struct;
+ struct seq_file;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
int (*setlease)(struct file *, long, struct file_lock **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
+ int (*show_fdinfo)(struct seq_file *m, struct file *f);
};
struct inode_operations {
umode_t create_mode, int *opened);
} ____cacheline_aligned;
- struct seq_file;
-
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
+#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int,
#include <linux/err.h>
/* needed for stackable file system support */
- extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
- extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
- extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
- extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
- extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+ extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+ extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
- int origin, loff_t maxsize, loff_t eof);
+ int whence, loff_t maxsize, loff_t eof);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
*/
enum {
- PROC_ROOT_INO = 1,
+ PROC_ROOT_INO = 1,
+ PROC_IPC_INIT_INO = 0xEFFFFFFFU,
+ PROC_UTS_INIT_INO = 0xEFFFFFFEU,
+ PROC_USER_INIT_INO = 0xEFFFFFFDU,
+ PROC_PID_INIT_INO = 0xEFFFFFFCU,
};
/*
struct proc_dir_entry *parent);
extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
#else
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
return ERR_PTR(-EINVAL);
}
+static inline bool proc_ns_inode(struct inode *inode)
+{
+ return false;
+}
+
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+ *inum = 1;
+ return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
void *(*get)(struct task_struct *task);
void (*put)(void *ns);
int (*install)(struct nsproxy *nsproxy, void *ns);
+ unsigned int (*inum)(void *ns);
};
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
union proc_op {
int (*proc_get_link)(struct dentry *, struct path *);
return pde->parent->data;
}
+ #include <linux/signal.h>
+
+ void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
#endif /* _LINUX_PROC_FS_H */
if (ret)
goto out_unlock;
- /* See feature-removal-schedule.txt */
if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
task_tgid_nr(current), current->comm);
{
struct cgroup_pidlist *l;
/* don't need task_nsproxy() if we're looking at ourself */
- struct pid_namespace *ns = current->nsproxy->pid_ns;
+ struct pid_namespace *ns = task_active_pid_ns(current);
/*
* We can't drop the pidlist_mutex before taking the l->mutex in case
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
+#include <linux/proc_fs.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
.last_pid = 0,
.level = 0,
.child_reaper = &init_task,
+ .user_ns = &init_user_ns,
+ .proc_inum = PROC_PID_INIT_INO,
};
EXPORT_SYMBOL_GPL(init_pid_ns);
- int is_container_init(struct task_struct *tsk)
- {
- int ret = 0;
- struct pid *pid;
-
- rcu_read_lock();
- pid = task_pid(tsk);
- if (pid != NULL && pid->numbers[pid->level].nr == 1)
- ret = 1;
- rcu_read_unlock();
-
- return ret;
- }
- EXPORT_SYMBOL(is_container_init);
-
/*
* Note: disable interrupts while the pidmap_lock is held as an
* interrupt might come in and do read_lock(&tasklist_lock).
unsigned long flags;
spin_lock_irqsave(&pidmap_lock, flags);
- for (i = 0; i <= pid->level; i++)
- hlist_del_rcu(&pid->numbers[i].pid_chain);
+ for (i = 0; i <= pid->level; i++) {
+ struct upid *upid = pid->numbers + i;
+ struct pid_namespace *ns = upid->ns;
+ hlist_del_rcu(&upid->pid_chain);
+ switch(--ns->nr_hashed) {
+ case 1:
+ /* When all that is left in the pid namespace
+ * is the reaper wake up the reaper. The reaper
+ * may be sleeping in zap_pid_ns_processes().
+ */
+ wake_up_process(ns->child_reaper);
+ break;
+ case 0:
+ ns->nr_hashed = -1;
+ schedule_work(&ns->proc_work);
+ break;
+ }
+ }
spin_unlock_irqrestore(&pidmap_lock, flags);
for (i = 0; i <= pid->level; i++)
goto out;
tmp = ns;
+ pid->level = ns->level;
for (i = ns->level; i >= 0; i--) {
nr = alloc_pidmap(tmp);
if (nr < 0)
tmp = tmp->parent;
}
+ if (unlikely(is_child_reaper(pid))) {
+ if (pid_ns_prepare_proc(ns))
+ goto out_free;
+ }
+
get_pid_ns(ns);
- pid->level = ns->level;
atomic_set(&pid->count, 1);
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
- for ( ; upid >= pid->numbers; --upid)
+ if (ns->nr_hashed < 0)
+ goto out_unlock;
+ for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+ upid->ns->nr_hashed++;
+ }
spin_unlock_irq(&pidmap_lock);
out:
return pid;
+out_unlock:
+ spin_unlock(&pidmap_lock);
out_free:
while (++i <= ns->level)
free_pidmap(pid->numbers + i);
struct pid *find_vpid(int nr)
{
- return find_pid_ns(nr, current->nsproxy->pid_ns);
+ return find_pid_ns(nr, task_active_pid_ns(current));
}
EXPORT_SYMBOL_GPL(find_vpid);
struct task_struct *find_task_by_vpid(pid_t vnr)
{
- return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+ return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
}
struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
pid_t pid_vnr(struct pid *pid)
{
- return pid_nr_ns(pid, current->nsproxy->pid_ns);
+ return pid_nr_ns(pid, task_active_pid_ns(current));
}
EXPORT_SYMBOL_GPL(pid_vnr);
rcu_read_lock();
if (!ns)
- ns = current->nsproxy->pid_ns;
+ ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
task = task->group_leader;
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+ init_pid_ns.nr_hashed = 1;
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC);
smp_rmb();
if (task->mm)
dumpable = get_dumpable(task->mm);
- if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode))
+ rcu_read_lock();
+ if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+ rcu_read_unlock();
return -EPERM;
+ }
+ rcu_read_unlock();
return security_ptrace_access_check(task, mode);
}
if (seize)
flags |= PT_SEIZED;
- if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+ rcu_read_lock();
+ if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
flags |= PT_PTRACE_CAP;
+ rcu_read_unlock();
task->ptrace = flags;
__ptrace_link(task, current);
return;
list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
+ if (unlikely(p->ptrace & PT_EXITKILL))
+ send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
+
if (__ptrace_detach(tracer, p))
list_add(&p->ptrace_entry, &ptrace_dead);
}