cgroup: simplify threadgroup locking
authorTejun Heo <tj@kernel.org>
Wed, 16 Sep 2015 17:03:02 +0000 (13:03 -0400)
committerTejun Heo <tj@kernel.org>
Wed, 16 Sep 2015 17:03:46 +0000 (13:03 -0400)
Note: This commit was originally committed as b5ba75b5fc0e but got
      reverted by f9f9e7b77614 due to the performance regression from
      the percpu_rwsem write down/up operations added to cgroup task
      migration path.  percpu_rwsem changes which alleviate the
      performance issue are pending for v4.4-rc1 merge window.
      Re-apply.

Now that threadgroup locking is made global, code paths around it can
be simplified.

* lock-verify-unlock-retry dancing removed from __cgroup_procs_write().

* Race protection against de_thread() removed from
  cgroup_update_dfl_csses().

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
kernel/cgroup.c

index 115091efa889b13cc227ae61dbdb1725b2df1275..2cf0f79f1fc9014cffce5ed79969bbcdaa3b9f90 100644 (file)
@@ -2460,14 +2460,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
        if (!cgrp)
                return -ENODEV;
 
-retry_find_task:
+       percpu_down_write(&cgroup_threadgroup_rwsem);
        rcu_read_lock();
        if (pid) {
                tsk = find_task_by_vpid(pid);
                if (!tsk) {
-                       rcu_read_unlock();
                        ret = -ESRCH;
-                       goto out_unlock_cgroup;
+                       goto out_unlock_rcu;
                }
        } else {
                tsk = current;
@@ -2483,37 +2482,23 @@ retry_find_task:
         */
        if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
                ret = -EINVAL;
-               rcu_read_unlock();
-               goto out_unlock_cgroup;
+               goto out_unlock_rcu;
        }
 
        get_task_struct(tsk);
        rcu_read_unlock();
 
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-       if (threadgroup) {
-               if (!thread_group_leader(tsk)) {
-                       /*
-                        * a race with de_thread from another thread's exec()
-                        * may strip us of our leadership, if this happens,
-                        * there is no choice but to throw this task away and
-                        * try again; this is
-                        * "double-double-toil-and-trouble-check locking".
-                        */
-                       percpu_up_write(&cgroup_threadgroup_rwsem);
-                       put_task_struct(tsk);
-                       goto retry_find_task;
-               }
-       }
-
        ret = cgroup_procs_write_permission(tsk, cgrp, of);
        if (!ret)
                ret = cgroup_attach_task(cgrp, tsk, threadgroup);
 
-       percpu_up_write(&cgroup_threadgroup_rwsem);
-
        put_task_struct(tsk);
-out_unlock_cgroup:
+       goto out_unlock_threadgroup;
+
+out_unlock_rcu:
+       rcu_read_unlock();
+out_unlock_threadgroup:
+       percpu_up_write(&cgroup_threadgroup_rwsem);
        cgroup_kn_unlock(of->kn);
        return ret ?: nbytes;
 }
@@ -2658,6 +2643,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 
        lockdep_assert_held(&cgroup_mutex);
 
+       percpu_down_write(&cgroup_threadgroup_rwsem);
+
        /* look up all csses currently attached to @cgrp's subtree */
        down_read(&css_set_rwsem);
        css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2713,17 +2700,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
                                goto out_finish;
                        last_task = task;
 
-                       percpu_down_write(&cgroup_threadgroup_rwsem);
-                       /* raced against de_thread() from another thread? */
-                       if (!thread_group_leader(task)) {
-                               percpu_up_write(&cgroup_threadgroup_rwsem);
-                               put_task_struct(task);
-                               continue;
-                       }
-
                        ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
 
-                       percpu_up_write(&cgroup_threadgroup_rwsem);
                        put_task_struct(task);
 
                        if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2733,6 +2711,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 
 out_finish:
        cgroup_migrate_finish(&preloaded_csets);
+       percpu_up_write(&cgroup_threadgroup_rwsem);
        return ret;
 }