#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/kthread.h>
#include <linux/delay.h>
-
+#include <linux/cpuset.h>
#include <linux/atomic.h>
/*
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
+ /*
+ * If ->dead, @src_set is associated with one or more dead cgroups
+ * and doesn't contain any migratable tasks. Ignore it early so
+ * that the rest of migration path doesn't get confused by it.
+ */
+ if (src_cset->dead)
+ return;
+
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node))
return ret;
}
+int subsys_cgroup_allow_attach(struct cgroup_taskset *tset)
+{
+ const struct cred *cred = current_cred(), *tcred;
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ tcred = __task_cred(task);
+
+ if (current != task && !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid))
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+ int i;
+ int ret;
+
+ for_each_css(css, i, cgrp) {
+ if (css->ss->allow_attach) {
+ ret = css->ss->allow_attach(tset);
+ if (ret)
+ return ret;
+ } else {
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *dst_cgrp,
struct kernfs_open_file *of)
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid))
- ret = -EACCES;
+ !uid_eq(cred->euid, tcred->suid)) {
+ /*
+ * if the default permission check fails, give each
+ * cgroup a chance to extend the permission check
+ */
+ struct cgroup_taskset tset = {
+ .src_csets = LIST_HEAD_INIT(tset.src_csets),
+ .dst_csets = LIST_HEAD_INIT(tset.dst_csets),
+ .csets = &tset.src_csets,
+ };
+ struct css_set *cset;
+ cset = task_css_set(task);
+ list_add(&cset->mg_node, &tset.src_csets);
+ ret = cgroup_allow_attach(dst_cgrp, &tset);
+ list_del(&tset.src_csets);
+ if (ret)
+ ret = -EACCES;
+ }
if (!ret && cgroup_on_dfl(dst_cgrp)) {
struct super_block *sb = of->file->f_path.dentry->d_sb;
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
+ struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
- int ret;
+ int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
+ for_each_subsys(ss, ssid)
+ if (ss->post_attach)
+ ss->post_attach();
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
}
if (ss) {
/* css free path */
+ struct cgroup_subsys_state *parent = css->parent;
int id = css->id;
- if (css->parent)
- css_put(css->parent);
-
ss->css_free(css);
cgroup_idr_remove(&ss->css_idr, id);
cgroup_put(cgrp);
+
+ if (parent)
+ css_put(parent);
} else {
/* cgroup free path */
atomic_dec(&cgrp->root->nr_cgrps);
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
css->serial_nr = css_serial_nr_next++;
+ atomic_set(&css->online_cnt, 0);
if (cgroup_parent(cgrp)) {
css->parent = cgroup_css(cgroup_parent(cgrp), ss);
if (!ret) {
css->flags |= CSS_ONLINE;
rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
+
+ atomic_inc(&css->online_cnt);
+ if (css->parent)
+ atomic_inc(&css->parent->online_cnt);
}
return ret;
}
container_of(work, struct cgroup_subsys_state, destroy_work);
mutex_lock(&cgroup_mutex);
- offline_css(css);
- mutex_unlock(&cgroup_mutex);
- css_put(css);
+ do {
+ offline_css(css);
+ css_put(css);
+ /* @css can't go away while we're holding cgroup_mutex */
+ css = css->parent;
+ } while (css && atomic_dec_and_test(&css->online_cnt));
+
+ mutex_unlock(&cgroup_mutex);
}
/* css kill confirmation processing requires process context, bounce */
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ if (atomic_dec_and_test(&css->online_cnt)) {
+ INIT_WORK(&css->destroy_work, css_killed_work_fn);
+ queue_work(cgroup_destroy_wq, &css->destroy_work);
+ }
}
/**
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
struct cgroup_subsys_state *css;
+ struct cgrp_cset_link *link;
int ssid;
lockdep_assert_held(&cgroup_mutex);
return -EBUSY;
/*
- * Mark @cgrp dead. This prevents further task migration and child
- * creation by disabling cgroup_lock_live_group().
+ * Mark @cgrp and the associated csets dead. The former prevents
+ * further task migration and child creation by disabling
+ * cgroup_lock_live_group(). The latter makes the csets ignored by
+ * the migration path.
*/
cgrp->self.flags &= ~CSS_ONLINE;
+ spin_lock_bh(&css_set_lock);
+ list_for_each_entry(link, &cgrp->cset_links, cset_link)
+ link->cset->dead = true;
+ spin_unlock_bh(&css_set_lock);
+
/* initiate massacre of all css's */
for_each_css(css, ssid, cgrp)
kill_css(css);