revert android-tegra-2.6.36-honeycomb-mr1-9001adc to v2.6.36

[firefly-linux-kernel-4.4.55.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index cc2a04ed3cc5d85b295ea5592fc7c96aff8583e1..c9483d8f6140ed6cb4e06fa6139e2aeb907b3d47 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -58,7 +58,6 @@
  #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
  #include <linux/eventfd.h>
  #include <linux/poll.h>
-#include <linux/capability.h>
  
  #include <asm/atomic.h>
  
@@ -264,33 +263,6 @@ static void cgroup_release_agent(struct work_struct *work);
  static DECLARE_WORK(release_agent_work, cgroup_release_agent);
  static void check_for_release(struct cgroup *cgrp);
  
-/*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-       if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-               wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-       css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-       cgroup_wakeup_rmdir_waiter(css->cgroup);
-       css_put(css);
-}
-
  /* Link structure for associating css_set objects with cgroups */
  struct cg_cgroup_link {
         /*
@@ -350,35 +322,10 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
         return &css_set_table[index];
  }
  
-static void free_css_set_work(struct work_struct *work)
-{
-       struct css_set *cg = container_of(work, struct css_set, work);
-       struct cg_cgroup_link *link;
-       struct cg_cgroup_link *saved_link;
-
-       write_lock(&css_set_lock);
-       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
-                                cg_link_list) {
-               struct cgroup *cgrp = link->cgrp;
-               list_del(&link->cg_link_list);
-               list_del(&link->cgrp_link_list);
-               if (atomic_dec_and_test(&cgrp->count)) {
-                       check_for_release(cgrp);
-                       cgroup_wakeup_rmdir_waiter(cgrp);
-               }
-               kfree(link);
-       }
-       write_unlock(&css_set_lock);
-
-       kfree(cg);
-}
-
  static void free_css_set_rcu(struct rcu_head *obj)
  {
         struct css_set *cg = container_of(obj, struct css_set, rcu_head);
-
-       INIT_WORK(&cg->work, free_css_set_work);
-       schedule_work(&cg->work);
+       kfree(cg);
  }
  
  /* We don't maintain the lists running through each css_set to its
@@ -387,16 +334,10 @@ static void free_css_set_rcu(struct rcu_head *obj)
   * compiled into their kernel but not actually in use */
  static int use_task_css_set_links __read_mostly;
  
-/*
- * refcounted get/put for css_set objects
- */
-static inline void get_css_set(struct css_set *cg)
-{
-       atomic_inc(&cg->refcount);
-}
-
-static void put_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
  {
+       struct cg_cgroup_link *link;
+       struct cg_cgroup_link *saved_link;
         /*
          * Ensure that the refcount doesn't hit zero while any readers
          * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -410,13 +351,47 @@ static void put_css_set(struct css_set *cg)
                 return;
         }
  
+       /* This css_set is dead. unlink it and release cgroup refcounts */
         hlist_del(&cg->hlist);
         css_set_count--;
  
+       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+                                cg_link_list) {
+               struct cgroup *cgrp = link->cgrp;
+               list_del(&link->cg_link_list);
+               list_del(&link->cgrp_link_list);
+               if (atomic_dec_and_test(&cgrp->count) &&
+                   notify_on_release(cgrp)) {
+                       if (taskexit)
+                               set_bit(CGRP_RELEASABLE, &cgrp->flags);
+                       check_for_release(cgrp);
+               }
+
+               kfree(link);
+       }
+
         write_unlock(&css_set_lock);
         call_rcu(&cg->rcu_head, free_css_set_rcu);
  }
  
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cg)
+{
+       atomic_inc(&cg->refcount);
+}
+
+static inline void put_css_set(struct css_set *cg)
+{
+       __put_css_set(cg, 0);
+}
+
+static inline void put_css_set_taskexit(struct css_set *cg)
+{
+       __put_css_set(cg, 1);
+}
+
  /*
   * compare_css_sets - helper function for find_existing_css_set().
   * @cg: candidate css_set being tested
@@ -746,9 +721,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
   * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
   * another.  It does so using cgroup_mutex, however there are
   * several performance critical places that need to reference
- * task->cgroups without the expense of grabbing a system global
+ * task->cgroup without the expense of grabbing a system global
   * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task's cgroups pointer we use
+ * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
   * task_lock(), which acts on a spinlock (task->alloc_lock) already in
   * the task_struct routinely used for such matters.
   *
@@ -928,6 +903,33 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
         remove_dir(dentry);
  }
  
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+       if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+               wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+       css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+       cgroup_wakeup_rmdir_waiter(css->cgroup);
+       css_put(css);
+}
+
  /*
   * Call with cgroup_mutex held. Drops reference counts on modules, including
   * any duplicate ones that parse_cgroupfs_options took. If this function
@@ -1721,15 +1723,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                                 failed_ss = ss;
                                 goto out;
                         }
-               } else if (!capable(CAP_SYS_ADMIN)) {
-                       const struct cred *cred = current_cred(), *tcred;
-
-                       /* No can_attach() - check perms generically */
-                       tcred = __task_cred(tsk);
-                       if (cred->euid != tcred->uid &&
-                           cred->euid != tcred->suid) {
-                               return -EACCES;
-                       }
                 }
         }
  
@@ -1770,8 +1763,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                 if (ss->attach)
                         ss->attach(ss, cgrp, oldcgrp, tsk, false);
         }
-       set_bit(CGRP_RELEASABLE, &cgrp->flags);
-       /* put_css_set will not destroy cg until after an RCU grace period */
+       set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+       synchronize_rcu();
         put_css_set(cg);
  
         /*
@@ -1828,6 +1821,7 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
  static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
  {
         struct task_struct *tsk;
+       const struct cred *cred = current_cred(), *tcred;
         int ret;
  
         if (pid) {
@@ -1837,6 +1831,14 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
                         rcu_read_unlock();
                         return -ESRCH;
                 }
+
+               tcred = __task_cred(tsk);
+               if (cred->euid &&
+                   cred->euid != tcred->uid &&
+                   cred->euid != tcred->suid) {
+                       rcu_read_unlock();
+                       return -EACCES;
+               }
                 get_task_struct(tsk);
                 rcu_read_unlock();
         } else {
@@ -3358,8 +3360,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
         if (err < 0)
                 goto err_remove;
  
-       set_bit(CGRP_RELEASABLE, &parent->flags);
-
         /* The cgroup directory was pre-locked for us */
         BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
  
@@ -3491,21 +3491,6 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
         return !failed;
  }
  
-/* checks if all of the css_sets attached to a cgroup have a refcount of 0.
- * Must be called with css_set_lock held */
-static int cgroup_css_sets_empty(struct cgroup *cgrp)
-{
-       struct cg_cgroup_link *link;
-
-       list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-               struct css_set *cg = link->cg;
-               if (atomic_read(&cg->refcount) > 0)
-                       return 0;
-       }
-
-       return 1;
-}
-
  static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
  {
         struct cgroup *cgrp = dentry->d_fsdata;
@@ -3518,7 +3503,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
         /* the vfs holds both inode->i_mutex already */
  again:
         mutex_lock(&cgroup_mutex);
-       if (!cgroup_css_sets_empty(cgrp)) {
+       if (atomic_read(&cgrp->count) != 0) {
                 mutex_unlock(&cgroup_mutex);
                 return -EBUSY;
         }
@@ -3551,7 +3536,7 @@ again:
  
         mutex_lock(&cgroup_mutex);
         parent = cgrp->parent;
-       if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) {
+       if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
                 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
                 mutex_unlock(&cgroup_mutex);
                 return -EBUSY;
@@ -3593,6 +3578,7 @@ again:
         cgroup_d_remove_dir(d);
         dput(d);
  
+       set_bit(CGRP_RELEASABLE, &parent->flags);
         check_for_release(parent);
  
         /*
@@ -4177,7 +4163,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
         if (!list_empty(&tsk->cg_list)) {
                 write_lock(&css_set_lock);
                 if (!list_empty(&tsk->cg_list))
-                       list_del_init(&tsk->cg_list);
+                       list_del(&tsk->cg_list);
                 write_unlock(&css_set_lock);
         }
  
@@ -4187,7 +4173,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
         tsk->cgroups = &init_css_set;
         task_unlock(tsk);
         if (cg)
-               put_css_set(cg);
+               put_css_set_taskexit(cg);
  }
  
  /**
@@ -4356,14 +4342,6 @@ static void check_for_release(struct cgroup *cgrp)
         }
  }
  
-/* Caller must verify that the css is not for root cgroup */
-void __css_get(struct cgroup_subsys_state *css, int count)
-{
-       atomic_add(count, &css->refcnt);
-       set_bit(CGRP_RELEASABLE, &css->cgroup->flags);
-}
-EXPORT_SYMBOL_GPL(__css_get);
-
  /* Caller must verify that the css is not for root cgroup */
  void __css_put(struct cgroup_subsys_state *css, int count)
  {
@@ -4372,7 +4350,10 @@ void __css_put(struct cgroup_subsys_state *css, int count)
         rcu_read_lock();
         val = atomic_sub_return(count, &css->refcnt);
         if (val == 1) {
-               check_for_release(cgrp);
+               if (notify_on_release(cgrp)) {
+                       set_bit(CGRP_RELEASABLE, &cgrp->flags);
+                       check_for_release(cgrp);
+               }
                 cgroup_wakeup_rmdir_waiter(cgrp);
         }
         rcu_read_unlock();