mm: factor out memory isolate functions
[firefly-linux-kernel-4.4.55.git] / mm / memcontrol.c
index f72b5e52451a7d8e62648dbfe211e5ff11c7c34f..0f692a2dbfcb71a82a818de0905511165d379d6a 100644 (file)
@@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES     5
 static struct mem_cgroup *root_mem_cgroup __read_mostly;
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
 int do_swap_account __read_mostly;
 
 /* for remember boot option*/
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED
+#ifdef CONFIG_MEMCG_SWAP_ENABLED
 static int really_do_swap_account __initdata = 1;
 #else
 static int really_do_swap_account __initdata = 0;
@@ -87,7 +87,7 @@ enum mem_cgroup_stat_index {
        MEM_CGROUP_STAT_CACHE,     /* # of pages charged as cache */
        MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */
        MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
-       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
+       MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
        MEM_CGROUP_STAT_NSTATS,
 };
 
@@ -378,9 +378,8 @@ static bool move_file(void)
 
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
-       MEM_CGROUP_CHARGE_TYPE_MAPPED,
+       MEM_CGROUP_CHARGE_TYPE_ANON,
        MEM_CGROUP_CHARGE_TYPE_SHMEM,   /* used by page migration of shmem */
-       MEM_CGROUP_CHARGE_TYPE_FORCE,   /* used by force_empty */
        MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */
        MEM_CGROUP_CHARGE_TYPE_DROP,    /* a page was unused swap cache */
        NR_CHARGE_TYPE,
@@ -408,7 +407,7 @@ static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
 
 /* Writing them here to avoid exposing memcg's inner layout */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
 #include <net/sock.h>
 #include <net/ip.h>
 
@@ -467,9 +466,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(tcp_proto_cgroup);
 #endif /* CONFIG_INET */
-#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+#endif /* CONFIG_MEMCG_KMEM */
 
-#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
+#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 static void disarm_sock_keys(struct mem_cgroup *memcg)
 {
        if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
@@ -703,7 +702,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
                                         bool charge)
 {
        int val = (charge) ? 1 : -1;
-       this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
+       this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
 }
 
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
@@ -1454,7 +1453,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 /*
  * Return the memory (and swap, if configured) limit for a memcg.
  */
-u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
+static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        u64 limit;
        u64 memsw;
@@ -1470,6 +1469,73 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
        return min(limit, memsw);
 }
 
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+                             int order)
+{
+       struct mem_cgroup *iter;
+       unsigned long chosen_points = 0;
+       unsigned long totalpages;
+       unsigned int points = 0;
+       struct task_struct *chosen = NULL;
+
+       /*
+        * If current has a pending SIGKILL, then automatically select it.  The
+        * goal is to allow it to allocate so that it may quickly exit and free
+        * its memory.
+        */
+       if (fatal_signal_pending(current)) {
+               set_thread_flag(TIF_MEMDIE);
+               return;
+       }
+
+       check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
+       totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
+       for_each_mem_cgroup_tree(iter, memcg) {
+               struct cgroup *cgroup = iter->css.cgroup;
+               struct cgroup_iter it;
+               struct task_struct *task;
+
+               cgroup_iter_start(cgroup, &it);
+               while ((task = cgroup_iter_next(cgroup, &it))) {
+                       switch (oom_scan_process_thread(task, totalpages, NULL,
+                                                       false)) {
+                       case OOM_SCAN_SELECT:
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               chosen = task;
+                               chosen_points = ULONG_MAX;
+                               get_task_struct(chosen);
+                               /* fall through */
+                       case OOM_SCAN_CONTINUE:
+                               continue;
+                       case OOM_SCAN_ABORT:
+                               cgroup_iter_end(cgroup, &it);
+                               mem_cgroup_iter_break(memcg, iter);
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               return;
+                       case OOM_SCAN_OK:
+                               break;
+                       };
+                       points = oom_badness(task, memcg, NULL, totalpages);
+                       if (points > chosen_points) {
+                               if (chosen)
+                                       put_task_struct(chosen);
+                               chosen = task;
+                               chosen_points = points;
+                               get_task_struct(chosen);
+                       }
+               }
+               cgroup_iter_end(cgroup, &it);
+       }
+
+       if (!chosen)
+               return;
+       points = chosen_points * 1000 / totalpages;
+       oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
+                        NULL, "Memory cgroup out of memory");
+}
+
 static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
                                        gfp_t gfp_mask,
                                        unsigned long flags)
@@ -1899,7 +1965,7 @@ again:
                return;
        /*
         * If this memory cgroup is not under account moving, we don't
-        * need to take move_lock_page_cgroup(). Because we already hold
+        * need to take move_lock_mem_cgroup(). Because we already hold
         * rcu_read_lock(), any calls to move_account will be delayed until
         * rcu_read_unlock() if mem_cgroup_stolen() == true.
         */
@@ -1921,7 +1987,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
        /*
         * It's guaranteed that pc->mem_cgroup never changes while
         * lock is held because a routine modifies pc->mem_cgroup
-        * should take move_lock_page_cgroup().
+        * should take move_lock_mem_cgroup().
         */
        move_unlock_mem_cgroup(pc->mem_cgroup, flags);
 }
@@ -2519,7 +2585,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                spin_unlock_irq(&zone->lru_lock);
        }
 
-       if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+       if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
                anon = true;
        else
                anon = false;
@@ -2644,8 +2710,7 @@ out:
 
 static int mem_cgroup_move_parent(struct page *page,
                                  struct page_cgroup *pc,
-                                 struct mem_cgroup *child,
-                                 gfp_t gfp_mask)
+                                 struct mem_cgroup *child)
 {
        struct mem_cgroup *parent;
        unsigned int nr_pages;
@@ -2728,7 +2793,7 @@ int mem_cgroup_newpage_charge(struct page *page,
        VM_BUG_ON(page->mapping && !PageAnon(page));
        VM_BUG_ON(!mm);
        return mem_cgroup_charge_common(page, mm, gfp_mask,
-                                       MEM_CGROUP_CHARGE_TYPE_MAPPED);
+                                       MEM_CGROUP_CHARGE_TYPE_ANON);
 }
 
 static void
@@ -2842,7 +2907,7 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
                                     struct mem_cgroup *memcg)
 {
        __mem_cgroup_commit_charge_swapin(page, memcg,
-                                         MEM_CGROUP_CHARGE_TYPE_MAPPED);
+                                         MEM_CGROUP_CHARGE_TYPE_ANON);
 }
 
 void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
@@ -2945,7 +3010,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        anon = PageAnon(page);
 
        switch (ctype) {
-       case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+       case MEM_CGROUP_CHARGE_TYPE_ANON:
                /*
                 * Generally PageAnon tells if it's the anon statistics to be
                 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
@@ -3005,7 +3070,7 @@ void mem_cgroup_uncharge_page(struct page *page)
        if (page_mapped(page))
                return;
        VM_BUG_ON(page->mapping && !PageAnon(page));
-       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
 }
 
 void mem_cgroup_uncharge_cache_page(struct page *page)
@@ -3087,7 +3152,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 }
 #endif
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 /*
  * called from swap_entry_free(). remove record in swap_cgroup and
  * uncharge "memsw" account.
@@ -3248,7 +3313,7 @@ int mem_cgroup_prepare_migration(struct page *page,
         * mapcount will be finally 0 and we call uncharge in end_migration().
         */
        if (PageAnon(page))
-               ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+               ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
        else if (page_is_file_cache(page))
                ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
        else
@@ -3287,7 +3352,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        unlock_page_cgroup(pc);
        anon = PageAnon(used);
        __mem_cgroup_uncharge_common(unused,
-               anon ? MEM_CGROUP_CHARGE_TYPE_MAPPED
+               anon ? MEM_CGROUP_CHARGE_TYPE_ANON
                     : MEM_CGROUP_CHARGE_TYPE_CACHE);
 
        /*
@@ -3418,7 +3483,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
+                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
@@ -3479,7 +3544,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                /*
                 * Rather than hide all in some function, I do this in
                 * open coded manner. You see what this really does.
-                * We have to guarantee memcg->res.limit < memcg->memsw.limit.
+                * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
                 */
                mutex_lock(&set_limit_mutex);
                memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -3611,10 +3676,12 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 }
 
 /*
- * This routine traverse page_cgroup in given list and drop them all.
- * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
+ * Traverse a specified page_cgroup list and try to drop them all.  This doesn't
+ * reclaim the pages page themselves - it just removes the page_cgroups.
+ * Returns true if some page_cgroups were not freed, indicating that the caller
+ * must retry this operation.
  */
-static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
+static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
                                int node, int zid, enum lru_list lru)
 {
        struct mem_cgroup_per_zone *mz;
@@ -3622,7 +3689,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
        struct list_head *list;
        struct page *busy;
        struct zone *zone;
-       int ret = 0;
 
        zone = &NODE_DATA(node)->node_zones[zid];
        mz = mem_cgroup_zoneinfo(memcg, node, zid);
@@ -3636,7 +3702,6 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
                struct page_cgroup *pc;
                struct page *page;
 
-               ret = 0;
                spin_lock_irqsave(&zone->lru_lock, flags);
                if (list_empty(list)) {
                        spin_unlock_irqrestore(&zone->lru_lock, flags);
@@ -3653,21 +3718,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 
                pc = lookup_page_cgroup(page);
 
-               ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
-               if (ret == -ENOMEM || ret == -EINTR)
-                       break;
-
-               if (ret == -EBUSY || ret == -EINVAL) {
+               if (mem_cgroup_move_parent(page, pc, memcg)) {
                        /* found lock contention or "pc" is obsolete. */
                        busy = page;
                        cond_resched();
                } else
                        busy = NULL;
        }
-
-       if (!ret && !list_empty(list))
-               return -EBUSY;
-       return ret;
+       return !list_empty(list);
 }
 
 /*
@@ -3692,9 +3750,6 @@ move_account:
                ret = -EBUSY;
                if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
                        goto out;
-               ret = -EINTR;
-               if (signal_pending(current))
-                       goto out;
                /* This is for making all *used* pages to be on LRU. */
                lru_add_drain_all();
                drain_all_stock_sync(memcg);
@@ -3715,9 +3770,6 @@ move_account:
                }
                mem_cgroup_end_move(memcg);
                memcg_oom_recover(memcg);
-               /* it seems parent cgroup doesn't have enough mem */
-               if (ret == -ENOMEM)
-                       goto try_to_free;
                cond_resched();
        /* "ret" should also be checked to ensure all lists are empty. */
        } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret);
@@ -3779,6 +3831,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
                parent_memcg = mem_cgroup_from_cont(parent);
 
        cgroup_lock();
+
+       if (memcg->use_hierarchy == val)
+               goto out;
+
        /*
         * If parent's use_hierarchy is set, we can't make any modifications
         * in the child subtrees. If it is unset, then the change can
@@ -3795,6 +3851,8 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
                        retval = -EBUSY;
        } else
                retval = -EINVAL;
+
+out:
        cgroup_unlock();
 
        return retval;
@@ -3831,7 +3889,7 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
        val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
 
        if (swap)
-               val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
+               val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
 
        return val << PAGE_SHIFT;
 }
@@ -4015,7 +4073,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 #endif
 
 #ifdef CONFIG_NUMA
-static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft,
+static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,
                                      struct seq_file *m)
 {
        int nid;
@@ -4074,7 +4132,7 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
        BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 }
 
-static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
+static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
                                 struct seq_file *m)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
@@ -4082,7 +4140,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        unsigned int i;
 
        for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-               if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
+               if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
                        continue;
                seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
                           mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
@@ -4109,7 +4167,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
                long long val = 0;
 
-               if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account)
+               if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
                        continue;
                for_each_mem_cgroup_tree(mi, memcg)
                        val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
@@ -4533,7 +4591,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
        return 0;
 }
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
        return mem_cgroup_sockets_init(memcg, ss);
@@ -4588,7 +4646,7 @@ static struct cftype mem_cgroup_files[] = {
        },
        {
                .name = "stat",
-               .read_seq_string = mem_control_stat_show,
+               .read_seq_string = memcg_stat_show,
        },
        {
                .name = "force_empty",
@@ -4620,10 +4678,10 @@ static struct cftype mem_cgroup_files[] = {
 #ifdef CONFIG_NUMA
        {
                .name = "numa_stat",
-               .read_seq_string = mem_control_numa_stat_show,
+               .read_seq_string = memcg_numa_stat_show,
        },
 #endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
        {
                .name = "memsw.usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
@@ -4810,7 +4868,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(parent_mem_cgroup);
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 static void __init enable_swap_cgroup(void)
 {
        if (!mem_cgroup_disabled() && really_do_swap_account)
@@ -5541,7 +5599,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
        .__DEPRECATED_clear_css_refs = true,
 };
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#ifdef CONFIG_MEMCG_SWAP
 static int __init enable_swap_account(char *s)
 {
        /* consider enabled if no parameter or 1 is given */