if (ns->pid_cachep == NULL)
goto out_free_map;
- err = proc_alloc_inum(&ns->proc_inum);
+ err = ns_alloc_inum(&ns->ns);
if (err)
goto out_free_map;
+ ns->ns.ops = &pidns_operations;
kref_init(&ns->kref);
ns->level = level;
return ERR_PTR(err);
}
+static void delayed_free_pidns(struct rcu_head *p)
+{
+ kmem_cache_free(pid_ns_cachep,
+ container_of(p, struct pid_namespace, rcu));
+}
+
static void destroy_pid_namespace(struct pid_namespace *ns)
{
int i;
- proc_free_inum(ns->proc_inum);
+ ns_free_inum(&ns->ns);
for (i = 0; i < PIDMAP_ENTRIES; i++)
kfree(ns->pidmap[i].page);
put_user_ns(ns->user_ns);
- kmem_cache_free(pid_ns_cachep, ns);
+ call_rcu(&ns->rcu, delayed_free_pidns);
}
struct pid_namespace *copy_pid_ns(unsigned long flags,
/* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns);
- /* Ignore SIGCHLD causing any terminated children to autoreap */
+ /*
+ * Ignore SIGCHLD causing any terminated children to autoreap.
+ * This speeds up the namespace shutdown, plus see the comment
+ * below.
+ */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
spin_unlock_irq(&me->sighand->siglock);
}
read_unlock(&tasklist_lock);
- /* Firstly reap the EXIT_ZOMBIE children we may have. */
+ /*
+ * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
+ * sys_wait4() will also block until our children traced from the
+ * parent namespace are detached and become EXIT_DEAD.
+ */
do {
clear_thread_flag(TIF_SIGPENDING);
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
/*
- * sys_wait4() above can't reap the TASK_DEAD children.
- * Make sure they all go away, see free_pid().
+ * sys_wait4() above can't reap the EXIT_DEAD children but we do not
+ * really care, we could reparent them to the global init. We could
+ * exit and reap ->child_reaper even if it is not the last thread in
+ * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(),
+ * pid_ns can not go away until proc_kill_sb() drops the reference.
+ *
+ * But this ns can also have other tasks injected by setns()+fork().
+ * Again, ignoring the user visible semantics we do not really need
+ * to wait until they are all reaped, but they can be reparented to
+ * us and thus we need to ensure that pid->child_reaper stays valid
+ * until they all go away. See free_pid()->wake_up_process().
+ *
+ * We rely on ignored SIGCHLD, an injected zombie must be autoreaped
+ * if reparented.
*/
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
return 0;
}
-static void *pidns_get(struct task_struct *task)
+static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct pid_namespace, ns);
+}
+
+static struct ns_common *pidns_get(struct task_struct *task)
{
struct pid_namespace *ns;
get_pid_ns(ns);
rcu_read_unlock();
- return ns;
+ return ns ? &ns->ns : NULL;
}
-static void pidns_put(void *ns)
+static void pidns_put(struct ns_common *ns)
{
- put_pid_ns(ns);
+ put_pid_ns(to_pid_ns(ns));
}
-static int pidns_install(struct nsproxy *nsproxy, void *ns)
+static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
{
struct pid_namespace *active = task_active_pid_ns(current);
- struct pid_namespace *ancestor, *new = ns;
+ struct pid_namespace *ancestor, *new = to_pid_ns(ns);
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
/*
if (ancestor != active)
return -EINVAL;
- put_pid_ns(nsproxy->pid_ns);
- nsproxy->pid_ns = get_pid_ns(new);
+ put_pid_ns(nsproxy->pid_ns_for_children);
+ nsproxy->pid_ns_for_children = get_pid_ns(new);
return 0;
}
-static unsigned int pidns_inum(void *ns)
-{
- struct pid_namespace *pid_ns = ns;
- return pid_ns->proc_inum;
-}
-
const struct proc_ns_operations pidns_operations = {
.name = "pid",
.type = CLONE_NEWPID,
.get = pidns_get,
.put = pidns_put,
.install = pidns_install,
- .inum = pidns_inum,
};
static __init int pid_namespaces_init(void)