#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/eventfd.h>
#include <linux/poll.h>
-#include <linux/capability.h>
#include <asm/atomic.h>
static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
-/*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
- if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
- wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
- css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
- cgroup_wakeup_rmdir_waiter(css->cgroup);
- css_put(css);
-}
-
/* Link structure for associating css_set objects with cgroups */
struct cg_cgroup_link {
/*
return &css_set_table[index];
}
-static void free_css_set_work(struct work_struct *work)
-{
- struct css_set *cg = container_of(work, struct css_set, work);
- struct cg_cgroup_link *link;
- struct cg_cgroup_link *saved_link;
-
- write_lock(&css_set_lock);
- list_for_each_entry_safe(link, saved_link, &cg->cg_links,
- cg_link_list) {
- struct cgroup *cgrp = link->cgrp;
- list_del(&link->cg_link_list);
- list_del(&link->cgrp_link_list);
- if (atomic_dec_and_test(&cgrp->count)) {
- check_for_release(cgrp);
- cgroup_wakeup_rmdir_waiter(cgrp);
- }
- kfree(link);
- }
- write_unlock(&css_set_lock);
-
- kfree(cg);
-}
-
static void free_css_set_rcu(struct rcu_head *obj)
{
struct css_set *cg = container_of(obj, struct css_set, rcu_head);
-
- INIT_WORK(&cg->work, free_css_set_work);
- schedule_work(&cg->work);
+ kfree(cg);
}
/* We don't maintain the lists running through each css_set to its
* compiled into their kernel but not actually in use */
static int use_task_css_set_links __read_mostly;
-/*
- * refcounted get/put for css_set objects
- */
-static inline void get_css_set(struct css_set *cg)
-{
- atomic_inc(&cg->refcount);
-}
-
-static void put_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
{
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
return;
}
+ /* This css_set is dead. unlink it and release cgroup refcounts */
hlist_del(&cg->hlist);
css_set_count--;
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
+ struct cgroup *cgrp = link->cgrp;
+ list_del(&link->cg_link_list);
+ list_del(&link->cgrp_link_list);
+ if (atomic_dec_and_test(&cgrp->count) &&
+ notify_on_release(cgrp)) {
+ if (taskexit)
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
+ check_for_release(cgrp);
+ }
+
+ kfree(link);
+ }
+
write_unlock(&css_set_lock);
call_rcu(&cg->rcu_head, free_css_set_rcu);
}
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cg)
+{
+ atomic_inc(&cg->refcount);
+}
+
+static inline void put_css_set(struct css_set *cg)
+{
+ __put_css_set(cg, 0);
+}
+
+static inline void put_css_set_taskexit(struct css_set *cg)
+{
+ __put_css_set(cg, 1);
+}
+
/*
* compare_css_sets - helper function for find_existing_css_set().
* @cg: candidate css_set being tested
* cgroup_attach_task(), which overwrites one tasks cgroup pointer with
* another. It does so using cgroup_mutex, however there are
* several performance critical places that need to reference
- * task->cgroups without the expense of grabbing a system global
+ * task->cgroup without the expense of grabbing a system global
* mutex. Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task's cgroups pointer we use
+ * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
* task_lock(), which acts on a spinlock (task->alloc_lock) already in
* the task_struct routinely used for such matters.
*
remove_dir(dentry);
}
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+ if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+ wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+ css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+ cgroup_wakeup_rmdir_waiter(css->cgroup);
+ css_put(css);
+}
+
/*
* Call with cgroup_mutex held. Drops reference counts on modules, including
* any duplicate ones that parse_cgroupfs_options took. If this function
failed_ss = ss;
goto out;
}
- } else if (!capable(CAP_SYS_ADMIN)) {
- const struct cred *cred = current_cred(), *tcred;
-
- /* No can_attach() - check perms generically */
- tcred = __task_cred(tsk);
- if (cred->euid != tcred->uid &&
- cred->euid != tcred->suid) {
- return -EACCES;
- }
}
}
if (ss->attach)
ss->attach(ss, cgrp, oldcgrp, tsk, false);
}
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
- /* put_css_set will not destroy cg until after an RCU grace period */
+ set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+ synchronize_rcu();
put_css_set(cg);
/*
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
struct task_struct *tsk;
+ const struct cred *cred = current_cred(), *tcred;
int ret;
if (pid) {
rcu_read_unlock();
return -ESRCH;
}
+
+ tcred = __task_cred(tsk);
+ if (cred->euid &&
+ cred->euid != tcred->uid &&
+ cred->euid != tcred->suid) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
get_task_struct(tsk);
rcu_read_unlock();
} else {
if (err < 0)
goto err_remove;
- set_bit(CGRP_RELEASABLE, &parent->flags);
-
/* The cgroup directory was pre-locked for us */
BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
return !failed;
}
-/* checks if all of the css_sets attached to a cgroup have a refcount of 0.
- * Must be called with css_set_lock held */
-static int cgroup_css_sets_empty(struct cgroup *cgrp)
-{
- struct cg_cgroup_link *link;
-
- list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
- struct css_set *cg = link->cg;
- if (atomic_read(&cg->refcount) > 0)
- return 0;
- }
-
- return 1;
-}
-
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
struct cgroup *cgrp = dentry->d_fsdata;
/* the vfs holds both inode->i_mutex already */
again:
mutex_lock(&cgroup_mutex);
- if (!cgroup_css_sets_empty(cgrp)) {
+ if (atomic_read(&cgrp->count) != 0) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
- if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) {
+ if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
mutex_unlock(&cgroup_mutex);
return -EBUSY;
cgroup_d_remove_dir(d);
dput(d);
+ set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);
/*
if (!list_empty(&tsk->cg_list)) {
write_lock(&css_set_lock);
if (!list_empty(&tsk->cg_list))
- list_del_init(&tsk->cg_list);
+ list_del(&tsk->cg_list);
write_unlock(&css_set_lock);
}
tsk->cgroups = &init_css_set;
task_unlock(tsk);
if (cg)
- put_css_set(cg);
+ put_css_set_taskexit(cg);
}
/**
}
}
-/* Caller must verify that the css is not for root cgroup */
-void __css_get(struct cgroup_subsys_state *css, int count)
-{
- atomic_add(count, &css->refcnt);
- set_bit(CGRP_RELEASABLE, &css->cgroup->flags);
-}
-EXPORT_SYMBOL_GPL(__css_get);
-
/* Caller must verify that the css is not for root cgroup */
void __css_put(struct cgroup_subsys_state *css, int count)
{
rcu_read_lock();
val = atomic_sub_return(count, &css->refcnt);
if (val == 1) {
- check_for_release(cgrp);
+ if (notify_on_release(cgrp)) {
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
+ check_for_release(cgrp);
+ }
cgroup_wakeup_rmdir_waiter(cgrp);
}
rcu_read_unlock();