X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=kernel%2Fcgroup.c;h=e646e870ec5fbe6929e582866c313cd513a5ac0b;hb=e8f1cafd746408478934b9c8b9e9e0d5f450906c;hp=8dc7ec1de42951b0ee27e5691895a7f1c2142f1a;hpb=57114e95e8c4f5035c993fc74bbe94cd9573f1bb;p=firefly-linux-kernel-4.4.55.git diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8dc7ec1de429..e646e870ec5f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -91,6 +91,14 @@ static DEFINE_MUTEX(cgroup_mutex); static DEFINE_MUTEX(cgroup_root_mutex); +/* + * cgroup destruction makes heavy use of work items and there can be a lot + * of concurrent destructions. Use a separate workqueue so that cgroup + * destruction work items don't end up filling up max_active of system_wq + * which may lead to deadlock. + */ +static struct workqueue_struct *cgroup_destroy_wq; + /* * Generate an array of cgroup subsystem pointers. At boot time, this is * populated with the built in subsystems, and modular subsystems are @@ -873,7 +881,7 @@ static void cgroup_free_rcu(struct rcu_head *head) { struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); - schedule_work(&cgrp->free_work); + queue_work(cgroup_destroy_wq, &cgrp->free_work); } static void cgroup_diput(struct dentry *dentry, struct inode *inode) @@ -1995,7 +2003,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) - continue; + goto next; /* as per above, nr_threads may decrease, but not increase. */ BUG_ON(i >= group_size); @@ -2003,7 +2011,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, ent.cgrp = task_cgroup_from_root(tsk, root); /* nothing to do if this task is already in the cgroup */ if (ent.cgrp == cgrp) - continue; + goto next; /* * saying GFP_ATOMIC has no effect here because we did prealloc * earlier, but it's good form to communicate our expectations. @@ -2011,7 +2019,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; - + next: if (!threadgroup) break; } while_each_thread(leader, tsk); @@ -2815,13 +2823,17 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, { LIST_HEAD(pending); struct cgroup *cgrp, *n; + struct super_block *sb = ss->root->sb; /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ - if (cfts && ss->root != &rootnode) { + if (cfts && ss->root != &rootnode && + atomic_inc_not_zero(&sb->s_active)) { list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { dget(cgrp->dentry); list_add_tail(&cgrp->cft_q_node, &pending); } + } else { + sb = NULL; } mutex_unlock(&cgroup_mutex); @@ -2844,6 +2856,9 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, dput(cgrp->dentry); } + if (sb) + deactivate_super(sb); + mutex_unlock(&cgroup_cft_mutex); } @@ -3772,6 +3787,23 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, return 0; } +/* + * When dput() is called asynchronously, if umount has been done and + * then deactivate_super() in cgroup_free_fn() kills the superblock, + * there's a small window that vfs will see the root dentry with non-zero + * refcnt and trigger BUG(). + * + * That's why we hold a reference before dput() and drop it right after. + */ +static void cgroup_dput(struct cgroup *cgrp) +{ + struct super_block *sb = cgrp->root->sb; + + atomic_inc(&sb->s_active); + dput(cgrp->dentry); + deactivate_super(sb); +} + /* * Unregister event and free resources. * @@ -3792,7 +3824,7 @@ static void cgroup_event_remove(struct work_struct *work) eventfd_ctx_put(event->eventfd); kfree(event); - dput(cgrp->dentry); + cgroup_dput(cgrp); } /* @@ -4077,12 +4109,8 @@ static void css_dput_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, dput_work); - struct dentry *dentry = css->cgroup->dentry; - struct super_block *sb = dentry->d_sb; - atomic_inc(&sb->s_active); - dput(dentry); - deactivate_super(sb); + cgroup_dput(css->cgroup); } static void init_cgroup_css(struct cgroup_subsys_state *css, @@ -4712,6 +4740,22 @@ out: return err; } +static int __init cgroup_wq_init(void) +{ + /* + * There isn't much point in executing destruction path in + * parallel. Good chunk is serialized with cgroup_mutex anyway. + * Use 1 for @max_active. + * + * We would prefer to do this in cgroup_init() above, but that + * is called before init_workqueues(): so leave this until after. + */ + cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); + BUG_ON(!cgroup_destroy_wq); + return 0; +} +core_initcall(cgroup_wq_init); + /* * proc_cgroup_show() * - Print task's cgroup paths into seq_file, one line for each hierarchy @@ -5022,7 +5066,7 @@ void __css_put(struct cgroup_subsys_state *css) v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); if (v == 0) - schedule_work(&css->dput_work); + queue_work(cgroup_destroy_wq, &css->dput_work); } EXPORT_SYMBOL_GPL(__css_put);