Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus

[firefly-linux-kernel-4.4.55.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 899f6a81e77aff6df3bb48f7410a9e4f0899074d..ef6963b577fd2920c1a4857f7b74b50e91c98b7a 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -372,8 +372,10 @@ retry_locked:
                 spin_unlock(&resv->lock);
  
                 trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-               if (!trg)
+               if (!trg) {
+                       kfree(nrg);
                         return -ENOMEM;
+               }
  
                 spin_lock(&resv->lock);
                 list_add(&trg->link, &resv->region_cache);
@@ -483,8 +485,16 @@ static long region_del(struct resv_map *resv, long f, long t)
  retry:
         spin_lock(&resv->lock);
         list_for_each_entry_safe(rg, trg, head, link) {
-               if (rg->to <= f)
+               /*
+                * Skip regions before the range to be deleted.  file_region
+                * ranges are normally of the form [from, to).  However, there
+                * may be a "placeholder" entry in the map which is of the form
+                * (from, to) with from == to.  Check for placeholder entries
+                * at the beginning of the range to be deleted.
+                */
+               if (rg->to <= f && (rg->to != rg->from || rg->to != f))
                         continue;
+
                 if (rg->from >= t)
                         break;
  
@@ -994,23 +1004,22 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
  
  #if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
  static void destroy_compound_gigantic_page(struct page *page,
-                                       unsigned long order)
+                                       unsigned int order)
  {
         int i;
         int nr_pages = 1 << order;
         struct page *p = page + 1;
  
         for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-               __ClearPageTail(p);
+               clear_compound_head(p);
                 set_page_refcounted(p);
-               p->first_page = NULL;
         }
  
         set_compound_order(page, 0);
         __ClearPageHead(page);
  }
  
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
  {
         free_contig_range(page_to_pfn(page), 1 << order);
  }
@@ -1054,7 +1063,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
         return zone_spans_pfn(zone, last_pfn);
  }
  
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
  {
         unsigned long nr_pages = 1 << order;
         unsigned long ret, pfn, flags;
@@ -1090,7 +1099,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
  }
  
  static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
  
  static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
  {
@@ -1123,9 +1132,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
  static inline bool gigantic_page_supported(void) { return true; }
  #else
  static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
  static inline void destroy_compound_gigantic_page(struct page *page,
-                                               unsigned long order) { }
+                                               unsigned int order) { }
  static inline int alloc_fresh_gigantic_page(struct hstate *h,
                                         nodemask_t *nodes_allowed) { return 0; }
  #endif
@@ -1146,7 +1155,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
                                 1 << PG_writeback);
         }
         VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-       set_compound_page_dtor(page, NULL);
+       set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
         set_page_refcounted(page);
         if (hstate_is_gigantic(h)) {
                 destroy_compound_gigantic_page(page, huge_page_order(h));
@@ -1242,7 +1251,7 @@ void free_huge_page(struct page *page)
  static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
  {
         INIT_LIST_HEAD(&page->lru);
-       set_compound_page_dtor(page, free_huge_page);
+       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
         spin_lock(&hugetlb_lock);
         set_hugetlb_cgroup(page, NULL);
         h->nr_huge_pages++;
@@ -1251,7 +1260,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
         put_page(page); /* free it into the hugepage allocator */
  }
  
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
  {
         int i;
         int nr_pages = 1 << order;
@@ -1276,10 +1285,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
                  */
                 __ClearPageReserved(p);
                 set_page_count(p, 0);
-               p->first_page = page;
-               /* Make sure p->first_page is always valid for PageTail() */
-               smp_wmb();
-               __SetPageTail(p);
+               set_compound_head(p, page);
         }
  }
  
@@ -1294,7 +1300,7 @@ int PageHuge(struct page *page)
                 return 0;
  
         page = compound_head(page);
-       return get_compound_page_dtor(page) == free_huge_page;
+       return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
  }
  EXPORT_SYMBOL_GPL(PageHuge);
  
@@ -1455,9 +1461,14 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
  
         /*
          * We need a VMA to get a memory policy.  If we do not
-        * have one, we use the 'nid' argument
+        * have one, we use the 'nid' argument.
+        *
+        * The mempolicy stuff below has some non-inlined bits
+        * and calls ->vm_ops.  That makes it hard to optimize at
+        * compile-time, even when NUMA is off and it does
+        * nothing.  This helps the compiler optimize it out.
          */
-       if (!vma) {
+       if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
                 /*
                  * If a specific node is requested, make sure to
                  * get memory from there, but only when a node
@@ -1474,7 +1485,8 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
  
         /*
          * OK, so we have a VMA.  Fetch the mempolicy and try to
-        * allocate a huge page with it.
+        * allocate a huge page with it.  We will only reach this
+        * when CONFIG_NUMA=y.
          */
         do {
                 struct page *page;
@@ -1520,8 +1532,8 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
          * we can call this function, not both.
          */
         if (vma || (addr != -1)) {
-               WARN_ON_ONCE(addr == -1);
-               WARN_ON_ONCE(nid != NUMA_NO_NODE);
+               VM_WARN_ON_ONCE(addr == -1);
+               VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
         }
         /*
          * Assume we will successfully allocate the surplus page to
@@ -1562,7 +1574,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
         if (page) {
                 INIT_LIST_HEAD(&page->lru);
                 r_nid = page_to_nid(page);
-               set_compound_page_dtor(page, free_huge_page);
+               set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
                 set_hugetlb_cgroup(page, NULL);
                 /*
                  * We incremented the global counters already
@@ -1585,6 +1597,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
   * NUMA_NO_NODE, which means that it may be allocated
   * anywhere.
   */
+static
  struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
  {
         unsigned long addr = -1;
@@ -1595,6 +1608,7 @@ struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
  /*
   * Use the VMA's mpolicy to allocate a huge page from the buddy.
   */
+static
  struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
                 struct vm_area_struct *vma, unsigned long addr)
  {
@@ -1882,7 +1896,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                 page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
                 if (!page)
                         goto out_uncharge_cgroup;
-
+               if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+                       SetPagePrivate(page);
+                       h->resv_huge_pages--;
+               }
                 spin_lock(&hugetlb_lock);
                 list_move(&page->lru, &h->hugepage_activelist);
                 /* Fall through */
@@ -1964,7 +1981,8 @@ found:
         return 1;
  }
  
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+               unsigned int order)
  {
         if (unlikely(order > (MAX_ORDER - 1)))
                 prep_compound_gigantic_page(page, order);
@@ -2133,7 +2151,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
          * First take pages out of surplus state.  Then make up the
          * remaining difference by allocating fresh huge pages.
          *
-        * We might race with alloc_buddy_huge_page() here and be unable
+        * We might race with __alloc_buddy_huge_page() here and be unable
          * to convert a surplus huge page to a normal huge page. That is
          * not critical, though, it just means the overall size of the
          * pool might be one hugepage larger than it needs to be, but
@@ -2175,7 +2193,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
          * By placing pages into the surplus state independent of the
          * overcommit value, we are allowing the surplus pool size to
          * exceed overcommit. There are few sane options here. Since
-        * alloc_buddy_huge_page() is checking the global counter,
+        * __alloc_buddy_huge_page() is checking the global counter,
          * though, we'll note that we're not allowed to exceed surplus
          * and won't grow the pool anywhere else. Not until one of the
          * sysctls are changed, or the surplus pages go out of use.
@@ -2675,7 +2693,7 @@ static int __init hugetlb_init(void)
  module_init(hugetlb_init);
  
  /* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
  {
         struct hstate *h;
         unsigned long i;
@@ -3688,12 +3706,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
                         return VM_FAULT_HWPOISON_LARGE |
                                 VM_FAULT_SET_HINDEX(hstate_index(h));
+       } else {
+               ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+               if (!ptep)
+                       return VM_FAULT_OOM;
         }
  
-       ptep = huge_pte_alloc(mm, address, huge_page_size(h));
-       if (!ptep)
-               return VM_FAULT_OOM;
-
         mapping = vma->vm_file->f_mapping;
         idx = vma_hugecache_offset(h, vma, address);
  
@@ -4129,8 +4147,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
         unsigned long s_end = sbase + PUD_SIZE;
  
         /* Allow segments to share if only one is marked locked */
-       unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
-       unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+       unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
+       unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK;
  
         /*
          * match the virtual addresses, permission and the alignment of the