X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=mm%2Fmemcontrol.c;h=1e50d37ee132b3768ecb052486e0dd62f2a0d066;hb=582a8ca38f661c7c88ceb193e257eebd7c110abb;hp=67648e6b2ac85e081f74084e8b4525a34c84f3ae;hpb=d3f97524ef1b2b12df6669a701e66c02f1da523d;p=firefly-linux-kernel-4.4.55.git diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 67648e6b2ac8..1e50d37ee132 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -272,21 +272,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { - return memcg->css.id; -} - -/* - * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling - * css_tryget_online() if the mem_cgroup is used for charging. (dropping - * refcnt from swap can be called against removed memcg.) - */ -static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) -{ - struct cgroup_subsys_state *css; - - css = css_from_id(id, &memory_cgrp_subsys); - return mem_cgroup_from_css(css); + return memcg->id.id; } /* Writing them here to avoid exposing memcg's inner layout */ @@ -4124,6 +4110,88 @@ static struct cftype mem_cgroup_legacy_files[] = { { }, /* terminate */ }; +/* + * Private memory cgroup IDR + * + * Swap-out records and page cache shadow entries need to store memcg + * references in constrained space, so we maintain an ID space that is + * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of + * memory-controlled cgroups to 64k. + * + * However, there usually are many references to the oflline CSS after + * the cgroup has been destroyed, such as page cache or reclaimable + * slab objects, that don't need to hang on to the ID. We want to keep + * those dead CSS from occupying IDs, or we might quickly exhaust the + * relatively small ID space and prevent the creation of new cgroups + * even when there are much fewer than 64k cgroups - possibly none. + * + * Maintain a private 16-bit ID space for memcg, and allow the ID to + * be freed and recycled when it's no longer needed, which is usually + * when the CSS is offlined. + * + * The only exception to that are records of swapped out tmpfs/shmem + * pages that need to be attributed to live ancestors on swapin. But + * those references are manageable from userspace. + */ + +static DEFINE_IDR(mem_cgroup_idr); + +static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) +{ + atomic_add(n, &memcg->id.ref); +} + +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +{ + while (!atomic_inc_not_zero(&memcg->id.ref)) { + /* + * The root cgroup cannot be destroyed, so it's refcount must + * always be >= 1. + */ + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { + VM_BUG_ON(1); + break; + } + memcg = parent_mem_cgroup(memcg); + if (!memcg) + memcg = root_mem_cgroup; + } + return memcg; +} + +static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) +{ + if (atomic_sub_and_test(n, &memcg->id.ref)) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + + /* Memcg ID pins CSS */ + css_put(&memcg->css); + } +} + +static inline void mem_cgroup_id_get(struct mem_cgroup *memcg) +{ + mem_cgroup_id_get_many(memcg, 1); +} + +static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) +{ + mem_cgroup_id_put_many(memcg, 1); +} + +/** + * mem_cgroup_from_id - look up a memcg from a memcg id + * @id: the memcg id to look up + * + * Caller must hold rcu_read_lock(). + */ +struct mem_cgroup *mem_cgroup_from_id(unsigned short id) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return idr_find(&mem_cgroup_idr, id); +} + static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; @@ -4178,6 +4246,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; + memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, + 1, MEM_CGROUP_ID_MAX, + GFP_KERNEL); + if (memcg->id.id < 0) + goto out_free_stat; + return memcg; out_free_stat: @@ -4263,9 +4337,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&memcg->cgwb_list); #endif + idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return &memcg->css; free_out: + idr_remove(&mem_cgroup_idr, memcg->id.id); __mem_cgroup_free(memcg); return ERR_PTR(error); } @@ -4277,8 +4353,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); int ret; - if (css->id > MEM_CGROUP_ID_MAX) - return -ENOSPC; + /* Online state pins memcg ID, memcg ID pins CSS */ + mem_cgroup_id_get(mem_cgroup_from_css(css)); + css_get(css); if (!parent) return 0; @@ -4352,6 +4429,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) memcg_deactivate_kmem(memcg); wb_memcg_offline(memcg); + + mem_cgroup_id_put(memcg); } static void mem_cgroup_css_released(struct cgroup_subsys_state *css) @@ -4785,6 +4864,8 @@ static void __mem_cgroup_clear_mc(void) if (!mem_cgroup_is_root(mc.from)) page_counter_uncharge(&mc.from->memsw, mc.moved_swap); + mem_cgroup_id_put_many(mc.from, mc.moved_swap); + /* * we charged both to->memory and to->memsw, so we * should uncharge to->memory. @@ -4792,9 +4873,9 @@ static void __mem_cgroup_clear_mc(void) if (!mem_cgroup_is_root(mc.to)) page_counter_uncharge(&mc.to->memory, mc.moved_swap); - css_put_many(&mc.from->css, mc.moved_swap); + mem_cgroup_id_get_many(mc.to, mc.moved_swap); + css_put_many(&mc.to->css, mc.moved_swap); - /* we've already done css_get(mc.to) */ mc.moved_swap = 0; } memcg_oom_recover(from); @@ -5262,6 +5343,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_reset = mem_cgroup_css_reset, .can_attach = mem_cgroup_can_attach, .cancel_attach = mem_cgroup_cancel_attach, + .attach = mem_cgroup_move_task, .post_attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, .dfl_cftypes = memory_files, @@ -5670,7 +5752,7 @@ subsys_initcall(mem_cgroup_init); */ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg, *swap_memcg; unsigned short oldid; VM_BUG_ON_PAGE(PageLRU(page), page); @@ -5685,15 +5767,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) if (!memcg) return; - oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); + /* + * In case the memcg owning these pages has been offlined and doesn't + * have an ID allocated to it anymore, charge the closest online + * ancestor for the swap instead and transfer the memory+swap charge. + */ + swap_memcg = mem_cgroup_id_get_online(memcg); + oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg)); VM_BUG_ON_PAGE(oldid, page); - mem_cgroup_swap_statistics(memcg, true); + mem_cgroup_swap_statistics(swap_memcg, true); page->mem_cgroup = NULL; if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memory, 1); + if (memcg != swap_memcg) { + if (!mem_cgroup_is_root(swap_memcg)) + page_counter_charge(&swap_memcg->memsw, 1); + page_counter_uncharge(&memcg->memsw, 1); + } + /* * Interrupts should be disabled here because the caller holds the * mapping->tree_lock lock which is taken with interrupts-off. It is @@ -5703,6 +5797,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON(!irqs_disabled()); mem_cgroup_charge_statistics(memcg, page, -1); memcg_check_events(memcg, page); + + if (!mem_cgroup_is_root(memcg)) + css_put(&memcg->css); } /** @@ -5726,7 +5823,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memsw, 1); mem_cgroup_swap_statistics(memcg, false); - css_put(&memcg->css); + mem_cgroup_id_put(memcg); } rcu_read_unlock(); }