Merge remote-tracking branch 'lsk/linux-linaro-lsk-v4.4-android' into linux-linaro...
[firefly-linux-kernel-4.4.55.git] / mm / memcontrol.c
index e234c21a5e6cb3d02104da2422548692fda1e881..5c714f33f49477f66f70377e60a8e5930dbcb753 100644 (file)
@@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                if (prev && reclaim->generation != iter->generation)
                        goto out_unlock;
 
-               do {
+               while (1) {
                        pos = READ_ONCE(iter->position);
+                       if (!pos || css_tryget(&pos->css))
+                               break;
                        /*
-                        * A racing update may change the position and
-                        * put the last reference, hence css_tryget(),
-                        * or retry to see the updated position.
+                        * css reference reached zero, so iter->position will
+                        * be cleared by ->css_released. However, we should not
+                        * rely on this happening soon, because ->css_released
+                        * is called from a work queue, and by busy-waiting we
+                        * might block it. So we clear iter->position right
+                        * away.
                         */
-               } while (pos && !css_tryget(&pos->css));
+                       (void)cmpxchg(&iter->position, pos, NULL);
+               }
        }
 
        if (pos)
@@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
        }
 
        if (reclaim) {
-               if (cmpxchg(&iter->position, pos, memcg) == pos) {
-                       if (memcg)
-                               css_get(&memcg->css);
-                       if (pos)
-                               css_put(&pos->css);
-               }
-
                /*
-                * pairs with css_tryget when dereferencing iter->position
-                * above.
+                * The position could have already been updated by a competing
+                * thread, so check that the value hasn't changed since we read
+                * it to avoid reclaiming from the same cgroup twice.
                 */
+               (void)cmpxchg(&iter->position, pos, memcg);
+
                if (pos)
                        css_put(&pos->css);
 
@@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
                css_put(&prev->css);
 }
 
+static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+{
+       struct mem_cgroup *memcg = dead_memcg;
+       struct mem_cgroup_reclaim_iter *iter;
+       struct mem_cgroup_per_zone *mz;
+       int nid, zid;
+       int i;
+
+       while ((memcg = parent_mem_cgroup(memcg))) {
+               for_each_node(nid) {
+                       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+                               mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+                               for (i = 0; i <= DEF_PRIORITY; i++) {
+                                       iter = &mz->iter[i];
+                                       cmpxchg(&iter->position,
+                                               dead_memcg, NULL);
+                               }
+                       }
+               }
+       }
+}
+
 /*
  * Iteration constructs for visiting all cgroups (under a tree).  If
  * loops are exited prematurely (break), mem_cgroup_iter_break() must
@@ -3498,16 +3522,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 swap_buffers:
        /* Swap primary and spare array */
        thresholds->spare = thresholds->primary;
-       /* If all events are unregistered, free the spare array */
-       if (!new) {
-               kfree(thresholds->spare);
-               thresholds->spare = NULL;
-       }
 
        rcu_assign_pointer(thresholds->primary, new);
 
        /* To be sure that nobody uses thresholds */
        synchronize_rcu();
+
+       /* If all events are unregistered, free the spare array */
+       if (!new) {
+               kfree(thresholds->spare);
+               thresholds->spare = NULL;
+       }
 unlock:
        mutex_unlock(&memcg->thresholds_lock);
 }
@@ -4324,6 +4349,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
        wb_memcg_offline(memcg);
 }
 
+static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+       invalidate_reclaim_iterators(memcg);
+}
+
 static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -4847,6 +4879,11 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
        return ret;
 }
 
+static int mem_cgroup_allow_attach(struct cgroup_taskset *tset)
+{
+       return subsys_cgroup_allow_attach(tset);
+}
+
 static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
        if (mc.to)
@@ -5008,6 +5045,10 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 {
        return 0;
 }
+static int mem_cgroup_allow_attach(struct cgroup_taskset *tset)
+{
+       return 0;
+}
 static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
 }
@@ -5185,11 +5226,13 @@ struct cgroup_subsys memory_cgrp_subsys = {
        .css_alloc = mem_cgroup_css_alloc,
        .css_online = mem_cgroup_css_online,
        .css_offline = mem_cgroup_css_offline,
+       .css_released = mem_cgroup_css_released,
        .css_free = mem_cgroup_css_free,
        .css_reset = mem_cgroup_css_reset,
        .can_attach = mem_cgroup_can_attach,
        .cancel_attach = mem_cgroup_cancel_attach,
        .attach = mem_cgroup_move_task,
+       .allow_attach = mem_cgroup_allow_attach,
        .bind = mem_cgroup_bind,
        .dfl_cftypes = memory_files,
        .legacy_cftypes = mem_cgroup_legacy_files,