Merge remote-tracking branch 'lsk/v3.10/topic/aosp' into linux-linaro-lsk-android

[firefly-linux-kernel-4.4.55.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 8dc7ec1de42951b0ee27e5691895a7f1c2142f1a..e646e870ec5fbe6929e582866c313cd513a5ac0b 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -91,6 +91,14 @@ static DEFINE_MUTEX(cgroup_mutex);
  
  static DEFINE_MUTEX(cgroup_root_mutex);
  
+/*
+ * cgroup destruction makes heavy use of work items and there can be a lot
+ * of concurrent destructions.  Use a separate workqueue so that cgroup
+ * destruction work items don't end up filling up max_active of system_wq
+ * which may lead to deadlock.
+ */
+static struct workqueue_struct *cgroup_destroy_wq;
+
  /*
   * Generate an array of cgroup subsystem pointers. At boot time, this is
   * populated with the built in subsystems, and modular subsystems are
@@ -873,7 +881,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
  {
         struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
  
-       schedule_work(&cgrp->free_work);
+       queue_work(cgroup_destroy_wq, &cgrp->free_work);
  }
  
  static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -1995,7 +2003,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
  
                 /* @tsk either already exited or can't exit until the end */
                 if (tsk->flags & PF_EXITING)
-                       continue;
+                       goto next;
  
                 /* as per above, nr_threads may decrease, but not increase. */
                 BUG_ON(i >= group_size);
@@ -2003,7 +2011,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
                 ent.cgrp = task_cgroup_from_root(tsk, root);
                 /* nothing to do if this task is already in the cgroup */
                 if (ent.cgrp == cgrp)
-                       continue;
+                       goto next;
                 /*
                  * saying GFP_ATOMIC has no effect here because we did prealloc
                  * earlier, but it's good form to communicate our expectations.
@@ -2011,7 +2019,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
                 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
                 BUG_ON(retval != 0);
                 i++;
-
+       next:
                 if (!threadgroup)
                         break;
         } while_each_thread(leader, tsk);
@@ -2815,13 +2823,17 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
  {
         LIST_HEAD(pending);
         struct cgroup *cgrp, *n;
+       struct super_block *sb = ss->root->sb;
  
         /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
-       if (cfts && ss->root != &rootnode) {
+       if (cfts && ss->root != &rootnode &&
+           atomic_inc_not_zero(&sb->s_active)) {
                 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
                         dget(cgrp->dentry);
                         list_add_tail(&cgrp->cft_q_node, &pending);
                 }
+       } else {
+               sb = NULL;
         }
  
         mutex_unlock(&cgroup_mutex);
@@ -2844,6 +2856,9 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
                 dput(cgrp->dentry);
         }
  
+       if (sb)
+               deactivate_super(sb);
+
         mutex_unlock(&cgroup_cft_mutex);
  }
  
@@ -3772,6 +3787,23 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
         return 0;
  }
  
+/*
+ * When dput() is called asynchronously, if umount has been done and
+ * then deactivate_super() in cgroup_free_fn() kills the superblock,
+ * there's a small window that vfs will see the root dentry with non-zero
+ * refcnt and trigger BUG().
+ *
+ * That's why we hold a reference before dput() and drop it right after.
+ */
+static void cgroup_dput(struct cgroup *cgrp)
+{
+       struct super_block *sb = cgrp->root->sb;
+
+       atomic_inc(&sb->s_active);
+       dput(cgrp->dentry);
+       deactivate_super(sb);
+}
+
  /*
   * Unregister event and free resources.
   *
@@ -3792,7 +3824,7 @@ static void cgroup_event_remove(struct work_struct *work)
  
         eventfd_ctx_put(event->eventfd);
         kfree(event);
-       dput(cgrp->dentry);
+       cgroup_dput(cgrp);
  }
  
  /*
@@ -4077,12 +4109,8 @@ static void css_dput_fn(struct work_struct *work)
  {
         struct cgroup_subsys_state *css =
                 container_of(work, struct cgroup_subsys_state, dput_work);
-       struct dentry *dentry = css->cgroup->dentry;
-       struct super_block *sb = dentry->d_sb;
  
-       atomic_inc(&sb->s_active);
-       dput(dentry);
-       deactivate_super(sb);
+       cgroup_dput(css->cgroup);
  }
  
  static void init_cgroup_css(struct cgroup_subsys_state *css,
@@ -4712,6 +4740,22 @@ out:
         return err;
  }
  
+static int __init cgroup_wq_init(void)
+{
+       /*
+        * There isn't much point in executing destruction path in
+        * parallel.  Good chunk is serialized with cgroup_mutex anyway.
+        * Use 1 for @max_active.
+        *
+        * We would prefer to do this in cgroup_init() above, but that
+        * is called before init_workqueues(): so leave this until after.
+        */
+       cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
+       BUG_ON(!cgroup_destroy_wq);
+       return 0;
+}
+core_initcall(cgroup_wq_init);
+
  /*
   * proc_cgroup_show()
   *  - Print task's cgroup paths into seq_file, one line for each hierarchy
@@ -5022,7 +5066,7 @@ void __css_put(struct cgroup_subsys_state *css)
  
         v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
         if (v == 0)
-               schedule_work(&css->dput_work);
+               queue_work(cgroup_destroy_wq, &css->dput_work);
  }
  EXPORT_SYMBOL_GPL(__css_put);