X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=mm%2Fmemory-failure.c;h=603f1fa1b7a3aaffc6b1f198ef62e8a4f1391e73;hb=3659bb266d2b84f868e31fd6f94ee338322aa5ff;hp=ceb0c7f1932f2e97c18cb1971f5b0593d57172ab;hpb=942d33da999b86821c9aee9615fcb81207ee04c7;p=firefly-linux-kernel-4.4.55.git diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ceb0c7f1932f..603f1fa1b7a3 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -208,9 +208,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, #endif si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; - if ((flags & MF_ACTION_REQUIRED) && t == current) { + if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { si.si_code = BUS_MCEERR_AR; - ret = force_sig_info(SIGBUS, &si, t); + ret = force_sig_info(SIGBUS, &si, current); } else { /* * Don't use force here, it's convenient if the signal @@ -382,10 +382,12 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, } } -static int task_early_kill(struct task_struct *tsk) +static int task_early_kill(struct task_struct *tsk, int force_early) { if (!tsk->mm) return 0; + if (force_early) + return 1; if (tsk->flags & PF_MCE_PROCESS) return !!(tsk->flags & PF_MCE_EARLY); return sysctl_memory_failure_early_kill; @@ -395,7 +397,7 @@ static int task_early_kill(struct task_struct *tsk) * Collect processes when the error hit an anonymous page. */ static void collect_procs_anon(struct page *page, struct list_head *to_kill, - struct to_kill **tkc) + struct to_kill **tkc, int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -411,7 +413,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, for_each_process (tsk) { struct anon_vma_chain *vmac; - if (!task_early_kill(tsk)) + if (!task_early_kill(tsk, force_early)) continue; anon_vma_interval_tree_foreach(vmac, &av->rb_root, pgoff, pgoff) { @@ -430,7 +432,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, * Collect processes when the error hit a file mapped page. */ static void collect_procs_file(struct page *page, struct list_head *to_kill, - struct to_kill **tkc) + struct to_kill **tkc, int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -441,7 +443,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, for_each_process(tsk) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - if (!task_early_kill(tsk)) + if (!task_early_kill(tsk, force_early)) continue; vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, @@ -467,7 +469,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, * First preallocate one tokill structure outside the spin locks, * so that we can kill at least one process reasonably reliable. */ -static void collect_procs(struct page *page, struct list_head *tokill) +static void collect_procs(struct page *page, struct list_head *tokill, + int force_early) { struct to_kill *tk; @@ -478,9 +481,9 @@ static void collect_procs(struct page *page, struct list_head *tokill) if (!tk) return; if (PageAnon(page)) - collect_procs_anon(page, tokill, &tk); + collect_procs_anon(page, tokill, &tk, force_early); else - collect_procs_file(page, tokill, &tk); + collect_procs_file(page, tokill, &tk, force_early); kfree(tk); } @@ -854,14 +857,14 @@ static int page_action(struct page_state *ps, struct page *p, * the pages and send SIGBUS to the processes if the data was dirty. */ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, - int trapno, int flags) + int trapno, int flags, struct page **hpagep) { enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; struct address_space *mapping; LIST_HEAD(tokill); int ret; int kill = 1, forcekill; - struct page *hpage = compound_head(p); + struct page *hpage = *hpagep; struct page *ppage; if (PageReserved(p) || PageSlab(p)) @@ -936,6 +939,21 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, BUG_ON(!PageHWPoison(p)); return SWAP_FAIL; } + /* + * We pinned the head page for hwpoison handling, + * now we split the thp and we are interested in + * the hwpoisoned raw page, so move the refcount + * to it. Similarly, page lock is shifted. + */ + if (hpage != p) { + if (!(flags & MF_COUNT_INCREASED)) { + put_page(hpage); + get_page(p); + } + lock_page(p); + unlock_page(hpage); + *hpagep = p; + } /* THP is split, so ppage should be the real poisoned page. */ ppage = p; } @@ -950,19 +968,13 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * there's nothing that can be done. */ if (kill) - collect_procs(ppage, &tokill); - - if (hpage != ppage) - lock_page(ppage); + collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED); ret = try_to_unmap(ppage, ttu); if (ret != SWAP_SUCCESS) printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", pfn, page_mapcount(ppage)); - if (hpage != ppage) - unlock_page(ppage); - /* * Now that the dirty bit has been propagated to the * struct page and all unmaps done we can decide if @@ -1074,15 +1086,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } else if (PageHuge(hpage)) { /* - * Check "just unpoisoned", "filter hit", and - * "race with other subpage." + * Check "filter hit" and "race with other subpage." */ lock_page(hpage); - if (!PageHWPoison(hpage) - || (hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &num_poisoned_pages); - return 0; + if (PageHWPoison(hpage)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != hpage && TestSetPageHWPoison(hpage))) { + atomic_long_sub(nr_pages, &num_poisoned_pages); + unlock_page(hpage); + return 0; + } } set_page_hwpoison_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage); @@ -1143,6 +1156,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (!PageHWPoison(p)) { printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn); + atomic_long_sub(nr_pages, &num_poisoned_pages); + put_page(hpage); res = 0; goto out; } @@ -1179,8 +1194,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags) /* * Now take care of user space mappings. * Abort on fail: __delete_from_page_cache() assumes unmapped page. + * + * When the raw error page is thp tail page, hpage points to the raw + * page after thp split. */ - if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { + if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) + != SWAP_SUCCESS) { printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); res = -EBUSY; goto out; @@ -1410,7 +1429,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) /* * Isolate the page, so that it doesn't get reallocated if it - * was free. + * was free. This flag should be kept set until the source page + * is freed and PG_hwpoison on it is set. */ set_migratetype_isolate(p, true); /* @@ -1433,7 +1453,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p, MIGRATE_MOVABLE); unlock_memory_hotplug(); return ret; } @@ -1489,12 +1508,17 @@ static int soft_offline_huge_page(struct page *page, int flags) pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); } else { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), - &num_poisoned_pages); + /* overcommit hugetlb page will be freed to buddy */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } } - /* keep elevated page count for bad page */ return ret; } @@ -1526,7 +1550,7 @@ int soft_offline_page(struct page *page, int flags) { int ret; unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_trans_head(page); + struct page *hpage = compound_head(page); if (PageHWPoison(page)) { pr_info("soft offline: %#lx page already poisoned\n", pfn); @@ -1559,7 +1583,7 @@ int soft_offline_page(struct page *page, int flags) atomic_long_inc(&num_poisoned_pages); } } - /* keep elevated page count for bad page */ + unset_migratetype_isolate(page, MIGRATE_MOVABLE); return ret; } @@ -1625,7 +1649,22 @@ static int __soft_offline_page(struct page *page, int flags) if (ret > 0) ret = -EIO; } else { + /* + * After page migration succeeds, the source page can + * be trapped in pagevec and actual freeing is delayed. + * Freeing code works differently based on PG_hwpoison, + * so there's a race. We need to make sure that the + * source page should be freed back to buddy before + * setting PG_hwpoison. + */ + if (!is_free_buddy_page(page)) + lru_add_drain_all(); + if (!is_free_buddy_page(page)) + drain_all_pages(); SetPageHWPoison(page); + if (!is_free_buddy_page(page)) + pr_info("soft offline: %#lx: page leaked\n", + pfn); atomic_long_inc(&num_poisoned_pages); } } else {