ARM64: dts: rockchip: add usb otg node for rk3366
[firefly-linux-kernel-4.4.55.git] / mm / mmap.c
index f681e1842fadc1ccd8d7188a7ca1688c73548111..2b11430087937f2ff8bdf21b6c5622531c9d09ab 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -6,10 +6,13 @@
  * Address space accounting code       <alan@lxorguk.ukuu.org.uk>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
@@ -28,6 +31,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
 #include <linux/perf_event.h>
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
@@ -36,6 +40,8 @@
 #include <linux/sched/sysctl.h>
 #include <linux/notifier.h>
 #include <linux/memory.h>
+#include <linux/printk.h>
+#include <linux/userfaultfd_k.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #define arch_rebalance_pgtables(addr, len)             (addr)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
+const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
+int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
+const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
+int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
+#endif
+
+
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                unsigned long start, unsigned long end);
@@ -65,7 +83,7 @@ static void unmap_region(struct mm_struct *mm,
  * MAP_SHARED  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  *             w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
  *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
- *             
+ *
  * MAP_PRIVATE r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  *             w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
  *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
@@ -84,8 +102,28 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
+static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
+{
+       return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
+}
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+void vma_set_page_prot(struct vm_area_struct *vma)
+{
+       unsigned long vm_flags = vma->vm_flags;
+
+       vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+       if (vma_wants_writenotify(vma)) {
+               vm_flags &= ~VM_SHARED;
+               vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
+                                                    vm_flags);
+       }
+}
+
+
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;        /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
@@ -127,7 +165,11 @@ EXPORT_SYMBOL_GPL(vm_memory_committed);
  */
 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
-       unsigned long free, allowed, reserve;
+       long free, allowed, reserve;
+
+       VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                       -(s64)vm_committed_as_batch * num_online_cpus(),
+                       "memory commitment underflow");
 
        vm_acct_memory(pages);
 
@@ -179,21 +221,19 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                goto error;
        }
 
-       allowed = (totalram_pages - hugetlb_total_pages())
-               * sysctl_overcommit_ratio / 100;
+       allowed = vm_commit_limit();
        /*
         * Reserve some for root
         */
        if (!cap_sys_admin)
                allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-       allowed += total_swap_pages;
 
        /*
         * Don't let a single process grow so big a user can't recover
         */
        if (mm) {
                reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
-               allowed -= min(mm->total_vm / 32, reserve);
+               allowed -= min_t(long, mm->total_vm / 32, reserve);
        }
 
        if (percpu_counter_read_positive(&vm_committed_as) < allowed)
@@ -205,7 +245,7 @@ error:
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_mutex
+ * Requires inode->i_mapping->i_mmap_rwsem
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                struct file *file, struct address_space *mapping)
@@ -213,13 +253,10 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
        if (vma->vm_flags & VM_DENYWRITE)
                atomic_inc(&file_inode(file)->i_writecount);
        if (vma->vm_flags & VM_SHARED)
-               mapping->i_mmap_writable--;
+               mapping_unmap_writable(mapping);
 
        flush_dcache_mmap_lock(mapping);
-       if (unlikely(vma->vm_flags & VM_NONLINEAR))
-               list_del_init(&vma->shared.nonlinear);
-       else
-               vma_interval_tree_remove(vma, &mapping->i_mmap);
+       vma_interval_tree_remove(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
 }
 
@@ -233,9 +270,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
 
        if (file) {
                struct address_space *mapping = file->f_mapping;
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
                __remove_shared_vm_struct(vma, file, mapping);
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
        }
 }
 
@@ -260,7 +297,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len);
 
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
-       unsigned long rlim, retval;
+       unsigned long retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
        unsigned long min_brk;
@@ -290,9 +327,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         * segment grow beyond its set limit the in case where the limit is
         * not page aligned -Ram Gupta
         */
-       rlim = rlimit(RLIMIT_DATA);
-       if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
-                       (mm->end_data - mm->start_data) > rlim)
+       if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
+                             mm->end_data, mm->start_data))
                goto out;
 
        newbrk = PAGE_ALIGN(brk);
@@ -361,20 +397,22 @@ static int browse_rb(struct rb_root *root)
                struct vm_area_struct *vma;
                vma = rb_entry(nd, struct vm_area_struct, vm_rb);
                if (vma->vm_start < prev) {
-                       printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+                       pr_emerg("vm_start %lx < prev %lx\n",
+                                 vma->vm_start, prev);
                        bug = 1;
                }
                if (vma->vm_start < pend) {
-                       printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+                       pr_emerg("vm_start %lx < pend %lx\n",
+                                 vma->vm_start, pend);
                        bug = 1;
                }
                if (vma->vm_start > vma->vm_end) {
-                       printk("vm_end %lx < vm_start %lx\n",
-                               vma->vm_end, vma->vm_start);
+                       pr_emerg("vm_start %lx > vm_end %lx\n",
+                                 vma->vm_start, vma->vm_end);
                        bug = 1;
                }
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
-                       printk("free gap %lx, correct %lx\n",
+                       pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
@@ -388,7 +426,7 @@ static int browse_rb(struct rb_root *root)
        for (nd = pn; nd; nd = rb_prev(nd))
                j++;
        if (i != j) {
-               printk("backwards %d, forwards %d\n", j, i);
+               pr_emerg("backwards %d, forwards %d\n", j, i);
                bug = 1;
        }
        return bug ? -1 : i;
@@ -401,19 +439,22 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
        for (nd = rb_first(root); nd; nd = rb_next(nd)) {
                struct vm_area_struct *vma;
                vma = rb_entry(nd, struct vm_area_struct, vm_rb);
-               BUG_ON(vma != ignore &&
-                      vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
+               VM_BUG_ON_VMA(vma != ignore &&
+                       vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
+                       vma);
        }
 }
 
-void validate_mm(struct mm_struct *mm)
+static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
        int i = 0;
        unsigned long highest_address = 0;
        struct vm_area_struct *vma = mm->mmap;
+
        while (vma) {
                struct anon_vma_chain *avc;
+
                vma_lock_anon_vma(vma);
                list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                        anon_vma_interval_tree_verify(avc);
@@ -423,20 +464,21 @@ void validate_mm(struct mm_struct *mm)
                i++;
        }
        if (i != mm->map_count) {
-               printk("map_count %d vm_next %d\n", mm->map_count, i);
+               pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
                bug = 1;
        }
        if (highest_address != mm->highest_vm_end) {
-               printk("mm->highest_vm_end %lx, found %lx\n",
-                      mm->highest_vm_end, highest_address);
+               pr_emerg("mm->highest_vm_end %lx, found %lx\n",
+                         mm->highest_vm_end, highest_address);
                bug = 1;
        }
        i = browse_rb(&mm->mm_rb);
        if (i != mm->map_count) {
-               printk("map_count %d rb %d\n", mm->map_count, i);
+               if (i != -1)
+                       pr_emerg("map_count %d rb %d\n", mm->map_count, i);
                bug = 1;
        }
-       BUG_ON(bug);
+       VM_BUG_ON_MM(bug, mm);
 }
 #else
 #define validate_mm_rb(root, ignore) do { } while (0)
@@ -614,13 +656,10 @@ static void __vma_link_file(struct vm_area_struct *vma)
                if (vma->vm_flags & VM_DENYWRITE)
                        atomic_dec(&file_inode(file)->i_writecount);
                if (vma->vm_flags & VM_SHARED)
-                       mapping->i_mmap_writable++;
+                       atomic_inc(&mapping->i_mmap_writable);
 
                flush_dcache_mmap_lock(mapping);
-               if (unlikely(vma->vm_flags & VM_NONLINEAR))
-                       vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
-               else
-                       vma_interval_tree_insert(vma, &mapping->i_mmap);
+               vma_interval_tree_insert(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
        }
 }
@@ -640,17 +679,16 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct address_space *mapping = NULL;
 
-       if (vma->vm_file)
+       if (vma->vm_file) {
                mapping = vma->vm_file->f_mapping;
-
-       if (mapping)
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
+       }
 
        __vma_link(mm, vma, prev, rb_link, rb_parent);
        __vma_link_file(vma);
 
        if (mapping)
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
 
        mm->map_count++;
        validate_mm(mm);
@@ -682,8 +720,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
@@ -733,7 +772,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                         * split_vma inserting another: so it must be
                         * mprotect case 4 shifting the boundary down.
                         */
-                       adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+                       adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
                        exporter = vma;
                        importer = next;
                }
@@ -744,24 +783,24 @@ again:                    remove_next = 1 + (end > next->vm_end);
                 * shrinking vma had, to cover any anon pages imported.
                 */
                if (exporter && exporter->anon_vma && !importer->anon_vma) {
-                       if (anon_vma_clone(importer, exporter))
-                               return -ENOMEM;
+                       int error;
+
                        importer->anon_vma = exporter->anon_vma;
+                       error = anon_vma_clone(importer, exporter);
+                       if (error)
+                               return error;
                }
        }
 
        if (file) {
                mapping = file->f_mapping;
-               if (!(vma->vm_flags & VM_NONLINEAR)) {
-                       root = &mapping->i_mmap;
-                       uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+               root = &mapping->i_mmap;
+               uprobe_munmap(vma, vma->vm_start, vma->vm_end);
 
-                       if (adjust_next)
-                               uprobe_munmap(next, next->vm_start,
-                                                       next->vm_end);
-               }
+               if (adjust_next)
+                       uprobe_munmap(next, next->vm_start, next->vm_end);
 
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
                if (insert) {
                        /*
                         * Put into interval tree now, so instantiated pages
@@ -779,8 +818,8 @@ again:                      remove_next = 1 + (end > next->vm_end);
        if (!anon_vma && adjust_next)
                anon_vma = next->anon_vma;
        if (anon_vma) {
-               VM_BUG_ON(adjust_next && next->anon_vma &&
-                         anon_vma != next->anon_vma);
+               VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
+                         anon_vma != next->anon_vma, next);
                anon_vma_lock_write(anon_vma);
                anon_vma_interval_tree_pre_update_vma(vma);
                if (adjust_next)
@@ -848,7 +887,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                anon_vma_unlock_write(anon_vma);
        }
        if (mapping)
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
 
        if (root) {
                uprobe_mmap(vma);
@@ -865,7 +904,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                if (next->anon_vma)
                        anon_vma_merge(vma, next);
                mm->map_count--;
-               vma_set_policy(vma, vma_policy(next));
+               mpol_put(vma_policy(next));
                kmem_cache_free(vm_area_cachep, next);
                /*
                 * In mprotect's case 6 (see comments on vma_merge),
@@ -893,14 +932,28 @@ again:                    remove_next = 1 + (end > next->vm_end);
  * per-vma resources, so we don't attempt to merge those.
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
-                       struct file *file, unsigned long vm_flags)
+                               struct file *file, unsigned long vm_flags,
+                               struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                               const char __user *anon_name)
 {
-       if (vma->vm_flags ^ vm_flags)
+       /*
+        * VM_SOFTDIRTY should not prevent from VMA merging, if we
+        * match the flags but dirty bit -- the caller should mark
+        * merged VMA as dirty. If dirty bit won't be excluded from
+        * comparison, we increase pressue on the memory system forcing
+        * the kernel to generate new VMAs when old one could be
+        * extended instead.
+        */
+       if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
                return 0;
        if (vma->vm_file != file)
                return 0;
        if (vma->vm_ops && vma->vm_ops->close)
                return 0;
+       if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
+               return 0;
+       if (vma_get_anon_name(vma) != anon_name)
+               return 0;
        return 1;
 }
 
@@ -931,9 +984,12 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  */
 static int
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                    struct anon_vma *anon_vma, struct file *file,
+                    pgoff_t vm_pgoff,
+                    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                    const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                if (vma->vm_pgoff == vm_pgoff)
                        return 1;
@@ -950,12 +1006,15 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  */
 static int
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+                   struct anon_vma *anon_vma, struct file *file,
+                   pgoff_t vm_pgoff,
+                   struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                   const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                pgoff_t vm_pglen;
-               vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               vm_pglen = vma_pages(vma);
                if (vma->vm_pgoff + vm_pglen == vm_pgoff)
                        return 1;
        }
@@ -963,9 +1022,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 }
 
 /*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor.  Or both (it neatly fills a hole).
  *
  * In most cases - when called for mmap, brk or mremap - [addr,end) is
  * certain not to be mapped by the time vma_merge is called; but when
@@ -994,8 +1053,10 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct vm_area_struct *prev, unsigned long addr,
                        unsigned long end, unsigned long vm_flags,
-                       struct anon_vma *anon_vma, struct file *file,
-                       pgoff_t pgoff, struct mempolicy *policy)
+                       struct anon_vma *anon_vma, struct file *file,
+                       pgoff_t pgoff, struct mempolicy *policy,
+                       struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                       const char __user *anon_name)
 {
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
        struct vm_area_struct *area, *next;
@@ -1020,16 +1081,21 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         * Can it merge with the predecessor?
         */
        if (prev && prev->vm_end == addr &&
-                       mpol_equal(vma_policy(prev), policy) &&
+                       mpol_equal(vma_policy(prev), policy) &&
                        can_vma_merge_after(prev, vm_flags,
-                                               anon_vma, file, pgoff)) {
+                                           anon_vma, file, pgoff,
+                                           vm_userfaultfd_ctx,
+                                           anon_name)) {
                /*
                 * OK, it can.  Can we now merge in the successor as well?
                 */
                if (next && end == next->vm_start &&
                                mpol_equal(policy, vma_policy(next)) &&
                                can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen) &&
+                                                    anon_vma, file,
+                                                    pgoff+pglen,
+                                                    vm_userfaultfd_ctx,
+                                                    anon_name) &&
                                is_mergeable_anon_vma(prev->anon_vma,
                                                      next->anon_vma, NULL)) {
                                                        /* cases 1, 6 */
@@ -1040,7 +1106,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                end, prev->vm_pgoff, NULL);
                if (err)
                        return NULL;
-               khugepaged_enter_vma_merge(prev);
+               khugepaged_enter_vma_merge(prev, vm_flags);
                return prev;
        }
 
@@ -1048,9 +1114,11 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         * Can this new request be merged in front of next?
         */
        if (next && end == next->vm_start &&
-                       mpol_equal(policy, vma_policy(next)) &&
+                       mpol_equal(policy, vma_policy(next)) &&
                        can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen)) {
+                                            anon_vma, file, pgoff+pglen,
+                                            vm_userfaultfd_ctx,
+                                            anon_name)) {
                if (prev && addr < prev->vm_end)        /* case 4 */
                        err = vma_adjust(prev, prev->vm_start,
                                addr, prev->vm_pgoff, NULL);
@@ -1059,7 +1127,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                next->vm_pgoff - pglen, NULL);
                if (err)
                        return NULL;
-               khugepaged_enter_vma_merge(area);
+               khugepaged_enter_vma_merge(area, vm_flags);
                return area;
        }
 
@@ -1084,7 +1152,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
        return a->vm_end == b->vm_start &&
                mpol_equal(vma_policy(a), vma_policy(b)) &&
                a->vm_file == b->vm_file &&
-               !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
+               !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
                b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
 }
 
@@ -1099,7 +1167,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
  * by another page fault trying to merge _that_. But that's ok: if it
  * is being set up, that automatically means that it will be a singleton
  * acceptable for merging, so we can do all of this optimistically. But
- * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
+ * we do that READ_ONCE() to make sure that we never re-load the pointer.
  *
  * IOW: that the "list_is_singular()" test on the anon_vma_chain only
  * matters for the 'stable anon_vma' case (ie the thing we want to avoid
@@ -1113,7 +1181,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
 static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
 {
        if (anon_vma_compatible(a, b)) {
-               struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
+               struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
 
                if (anon_vma && list_is_singular(&old->anon_vma_chain))
                        return anon_vma;
@@ -1192,21 +1260,39 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
        return hint;
 }
 
+static inline int mlock_future_check(struct mm_struct *mm,
+                                    unsigned long flags,
+                                    unsigned long len)
+{
+       unsigned long locked, lock_limit;
+
+       /*  mlock MCL_FUTURE? */
+       if (flags & VM_LOCKED) {
+               locked = len >> PAGE_SHIFT;
+               locked += mm->locked_vm;
+               lock_limit = rlimit(RLIMIT_MEMLOCK);
+               lock_limit >>= PAGE_SHIFT;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
+       }
+       return 0;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff,
-                       unsigned long *populate)
+                       unsigned long flags, vm_flags_t vm_flags,
+                       unsigned long pgoff, unsigned long *populate)
 {
-       struct mm_struct * mm = current->mm;
-       struct inode *inode;
-       vm_flags_t vm_flags;
+       struct mm_struct *mm = current->mm;
 
        *populate = 0;
 
+       if (!len)
+               return -EINVAL;
+
        /*
         * Does the application expect PROT_READ to imply PROT_EXEC?
         *
@@ -1214,12 +1300,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         *  mounted, in which case we dont add PROT_EXEC.)
         */
        if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-               if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
+               if (!(file && path_noexec(&file->f_path)))
                        prot |= PROT_EXEC;
 
-       if (!len)
-               return -EINVAL;
-
        if (!(flags & MAP_FIXED))
                addr = round_hint_to_min(addr);
 
@@ -1230,7 +1313,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 
        /* offset overflow? */
        if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
-               return -EOVERFLOW;
+               return -EOVERFLOW;
 
        /* Too many mappings? */
        if (mm->map_count > sysctl_max_map_count)
@@ -1240,34 +1323,26 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         * that it represents a valid section of the address space.
         */
        addr = get_unmapped_area(file, addr, len, pgoff, flags);
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return addr;
 
        /* Do simple checking here so the lower-level routines won't have
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
-       vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+       vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
                if (!can_do_mlock())
                        return -EPERM;
 
-       /* mlock MCL_FUTURE? */
-       if (vm_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
-
-       inode = file ? file_inode(file) : NULL;
+       if (mlock_future_check(mm, vm_flags, len))
+               return -EAGAIN;
 
        if (file) {
+               struct inode *inode = file_inode(file);
+
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
                        if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
@@ -1283,7 +1358,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        /*
                         * Make sure there are no mandatory locks on the file.
                         */
-                       if (locks_verify_locked(inode))
+                       if (locks_verify_locked(file))
                                return -EAGAIN;
 
                        vm_flags |= VM_SHARED | VM_MAYSHARE;
@@ -1294,14 +1369,16 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                case MAP_PRIVATE:
                        if (!(file->f_mode & FMODE_READ))
                                return -EACCES;
-                       if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+                       if (path_noexec(&file->f_path)) {
                                if (vm_flags & VM_EXEC)
                                        return -EPERM;
                                vm_flags &= ~VM_MAYEXEC;
                        }
 
-                       if (!file->f_op || !file->f_op->mmap)
+                       if (!file->f_op->mmap)
                                return -ENODEV;
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        break;
 
                default:
@@ -1310,6 +1387,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        } else {
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        /*
                         * Ignore pgoff.
                         */
@@ -1354,22 +1433,23 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                unsigned long, fd, unsigned long, pgoff)
 {
        struct file *file = NULL;
-       unsigned long retval = -EBADF;
+       unsigned long retval;
 
        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
-               if (unlikely(flags & MAP_HUGETLB))
-                       return -EINVAL;
                file = fget(fd);
                if (!file)
-                       goto out;
+                       return -EBADF;
                if (is_file_hugepages(file))
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
+               retval = -EINVAL;
+               if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+                       goto out_fput;
        } else if (flags & MAP_HUGETLB) {
                struct user_struct *user = NULL;
-               struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) &
-                                                  SHM_HUGE_MASK);
+               struct hstate *hs;
 
+               hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
                if (!hs)
                        return -EINVAL;
 
@@ -1391,9 +1471,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
        retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+out_fput:
        if (file)
                fput(file);
-out:
        return retval;
 }
 
@@ -1413,7 +1493,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 
        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
-       if (a.offset & ~PAGE_MASK)
+       if (offset_in_page(a.offset))
                return -EINVAL;
 
        return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
@@ -1430,20 +1510,26 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 int vma_wants_writenotify(struct vm_area_struct *vma)
 {
        vm_flags_t vm_flags = vma->vm_flags;
+       const struct vm_operations_struct *vm_ops = vma->vm_ops;
 
        /* If it was private or non-writable, the write bit is already clear */
        if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
                return 0;
 
        /* The backer wishes to know when pages are first written to? */
-       if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+       if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
                return 1;
 
-       /* The open routine did something to the protections already? */
+       /* The open routine did something to the protections that pgprot_modify
+        * won't preserve? */
        if (pgprot_val(vma->vm_page_prot) !=
-           pgprot_val(vm_get_page_prot(vm_flags)))
+           pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
                return 0;
 
+       /* Do we need to track softdirty? */
+       if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+               return 1;
+
        /* Specialty mapping? */
        if (vm_flags & VM_PFNMAP)
                return 0;
@@ -1474,11 +1560,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
-       int correct_wcount = 0;
        int error;
        struct rb_node **rb_link, *rb_parent;
        unsigned long charged = 0;
-       struct inode *inode =  file ? file_inode(file) : NULL;
 
        /* Check against address space limit. */
        if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
@@ -1498,12 +1582,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        }
 
        /* Clear old maps */
-       error = -ENOMEM;
-munmap_back:
-       if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
+       while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
+                             &rb_parent)) {
                if (do_munmap(mm, addr, len))
                        return -ENOMEM;
-               goto munmap_back;
        }
 
        /*
@@ -1519,7 +1601,8 @@ munmap_back:
        /*
         * Can we just expand an old mapping?
         */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
+       vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+                       NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
        if (vma)
                goto out;
 
@@ -1542,17 +1625,23 @@ munmap_back:
        vma->vm_pgoff = pgoff;
        INIT_LIST_HEAD(&vma->anon_vma_chain);
 
-       error = -EINVAL;        /* when rejecting VM_GROWSDOWN|VM_GROWSUP */
-
        if (file) {
-               if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-                       goto free_vma;
                if (vm_flags & VM_DENYWRITE) {
                        error = deny_write_access(file);
                        if (error)
                                goto free_vma;
-                       correct_wcount = 1;
                }
+               if (vm_flags & VM_SHARED) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto allow_write_and_free_vma;
+               }
+
+               /* ->mmap() can change vma->vm_file, but must guarantee that
+                * vma_link() below can deny write-access if VM_DENYWRITE is set
+                * and map writably if VM_SHARED is set. This usually means the
+                * new file must not have been exposed to user-space, yet.
+                */
                vma->vm_file = get_file(file);
                error = file->f_op->mmap(file, vma);
                if (error)
@@ -1568,37 +1657,22 @@ munmap_back:
                WARN_ON_ONCE(addr != vma->vm_start);
 
                addr = vma->vm_start;
-               pgoff = vma->vm_pgoff;
                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
-               if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
-                       goto free_vma;
                error = shmem_zero_setup(vma);
                if (error)
                        goto free_vma;
        }
 
-       if (vma_wants_writenotify(vma)) {
-               pgprot_t pprot = vma->vm_page_prot;
-
-               /* Can vma->vm_page_prot have changed??
-                *
-                * Answer: Yes, drivers may have changed it in their
-                *         f_op->mmap method.
-                *
-                * Ensures that vmas marked as uncached stay that way.
-                */
-               vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
-               if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
-                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       }
-
        vma_link(mm, vma, prev, rb_link, rb_parent);
-       file = vma->vm_file;
-
        /* Once vma denies write, undo our temporary denial count */
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
+       if (file) {
+               if (vm_flags & VM_SHARED)
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+       }
+       file = vma->vm_file;
 out:
        perf_event_mmap(vma);
 
@@ -1608,23 +1682,37 @@ out:
                                        vma == get_gate_vma(current->mm)))
                        mm->locked_vm += (len >> PAGE_SHIFT);
                else
-                       vma->vm_flags &= ~VM_LOCKED;
+                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
        }
 
        if (file)
                uprobe_mmap(vma);
 
+       /*
+        * New (or expanded) vma always get soft dirty status.
+        * Otherwise user-space soft-dirty page tracker won't
+        * be able to distinguish situation when vma area unmapped,
+        * then new mapped in-place (which must be aimed as
+        * a completely new data area).
+        */
+       vma->vm_flags |= VM_SOFTDIRTY;
+
+       vma_set_page_prot(vma);
+
        return addr;
 
 unmap_and_free_vma:
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
        vma->vm_file = NULL;
        fput(file);
 
        /* Undo any partial mapping done by a device driver. */
        unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
        charged = 0;
+       if (vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
 free_vma:
        kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
@@ -1853,7 +1941,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
        struct vm_area_struct *vma;
        struct vm_unmapped_area_info info;
 
-       if (len > TASK_SIZE)
+       if (len > TASK_SIZE - mmap_min_addr)
                return -ENOMEM;
 
        if (flags & MAP_FIXED)
@@ -1862,28 +1950,19 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
        if (addr) {
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
-               if (TASK_SIZE - len >= addr &&
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
                    (!vma || addr + len <= vma->vm_start))
                        return addr;
        }
 
        info.flags = 0;
        info.length = len;
-       info.low_limit = TASK_UNMAPPED_BASE;
+       info.low_limit = mm->mmap_base;
        info.high_limit = TASK_SIZE;
        info.align_mask = 0;
        return vm_unmapped_area(&info);
 }
-#endif 
-
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
-       /*
-        * Is this a new hole at the lowest possible address?
-        */
-       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
-               mm->free_area_cache = addr;
-}
+#endif
 
 /*
  * This mmap-allocator allocates new areas top-down from below the
@@ -1901,7 +1980,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        struct vm_unmapped_area_info info;
 
        /* requested length too big for entire address space */
-       if (len > TASK_SIZE)
+       if (len > TASK_SIZE - mmap_min_addr)
                return -ENOMEM;
 
        if (flags & MAP_FIXED)
@@ -1911,14 +1990,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        if (addr) {
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
-               if (TASK_SIZE - len >= addr &&
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
                                (!vma || addr + len <= vma->vm_start))
                        return addr;
        }
 
        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
-       info.low_limit = PAGE_SIZE;
+       info.low_limit = max(PAGE_SIZE, mmap_min_addr);
        info.high_limit = mm->mmap_base;
        info.align_mask = 0;
        addr = vm_unmapped_area(&info);
@@ -1929,7 +2008,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       if (addr & ~PAGE_MASK) {
+       if (offset_in_page(addr)) {
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = TASK_UNMAPPED_BASE;
@@ -1941,19 +2020,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 }
 #endif
 
-void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
-{
-       /*
-        * Is this a new hole at the highest possible address?
-        */
-       if (addr > mm->free_area_cache)
-               mm->free_area_cache = addr;
-
-       /* dont allow allocations above current base */
-       if (mm->free_area_cache > mm->mmap_base)
-               mm->free_area_cache = mm->mmap_base;
-}
-
 unsigned long
 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags)
@@ -1970,7 +2036,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                return -ENOMEM;
 
        get_area = current->mm->get_unmapped_area;
-       if (file && file->f_op && file->f_op->get_unmapped_area)
+       if (file && file->f_op->get_unmapped_area)
                get_area = file->f_op->get_unmapped_area;
        addr = get_area(file, addr, len, pgoff, flags);
        if (IS_ERR_VALUE(addr))
@@ -1978,7 +2044,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
        if (addr > TASK_SIZE - len)
                return -ENOMEM;
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return -EINVAL;
 
        addr = arch_rebalance_pgtables(addr, len);
@@ -1991,34 +2057,32 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct *vma = NULL;
+       struct rb_node *rb_node;
+       struct vm_area_struct *vma;
 
        /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
 
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       rb_node = mm->mm_rb.rb_node;
 
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       while (rb_node) {
+               struct vm_area_struct *tmp;
+
+               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (tmp->vm_end > addr) {
+                       vma = tmp;
+                       if (tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
        }
+
+       if (vma)
+               vmacache_update(addr, vma);
        return vma;
 }
 
@@ -2056,14 +2120,17 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 {
        struct mm_struct *mm = vma->vm_mm;
        struct rlimit *rlim = current->signal->rlim;
-       unsigned long new_start;
+       unsigned long new_start, actual_size;
 
        /* address space limit tests */
        if (!may_expand_vm(mm, grow))
                return -ENOMEM;
 
        /* Stack limit test */
-       if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+       actual_size = size;
+       if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+               actual_size -= PAGE_SIZE;
+       if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
                return -ENOMEM;
 
        /* mlock limit tests */
@@ -2071,7 +2138,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
                unsigned long locked;
                unsigned long limit;
                locked = mm->locked_vm + grow;
-               limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+               limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
                limit >>= PAGE_SHIFT;
                if (locked > limit && !capable(CAP_IPC_LOCK))
                        return -ENOMEM;
@@ -2090,10 +2157,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
        if (security_vm_enough_memory_mm(mm, grow))
                return -ENOMEM;
 
-       /* Ok, everything looks good - let it rip */
-       if (vma->vm_flags & VM_LOCKED)
-               mm->locked_vm += grow;
-       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
 
@@ -2104,6 +2167,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  */
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
+       struct mm_struct *mm = vma->vm_mm;
        int error;
 
        if (!(vma->vm_flags & VM_GROWSUP))
@@ -2153,23 +2217,27 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                anon_vma_interval_tree_post_update_vma(vma);
                                if (vma->vm_next)
                                        vma_gap_update(vma->vm_next);
                                else
-                                       vma->vm_mm->highest_vm_end = address;
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                                       mm->highest_vm_end = address;
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
        vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
-       validate_mm(vma->vm_mm);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
+       validate_mm(mm);
        return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2180,6 +2248,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
+       struct mm_struct *mm = vma->vm_mm;
        int error;
 
        /*
@@ -2224,21 +2293,25 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                anon_vma_interval_tree_post_update_vma(vma);
                                vma_gap_update(vma);
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
        vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
-       validate_mm(vma->vm_mm);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
+       validate_mm(mm);
        return error;
 }
 
@@ -2279,7 +2352,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
        if (!prev || expand_stack(prev, addr))
                return NULL;
        if (prev->vm_flags & VM_LOCKED)
-               __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
+               populate_vma_page_range(prev, addr, prev->vm_end, NULL);
        return prev;
 }
 #else
@@ -2297,13 +2370,13 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
 }
 
 struct vm_area_struct *
-find_extend_vma(struct mm_struct * mm, unsigned long addr)
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct * vma;
+       struct vm_area_struct *vma;
        unsigned long start;
 
        addr &= PAGE_MASK;
-       vma = find_vma(mm,addr);
+       vma = find_vma(mm, addr);
        if (!vma)
                return NULL;
        if (vma->vm_start <= addr)
@@ -2314,11 +2387,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
        if (expand_stack(vma, addr))
                return NULL;
        if (vma->vm_flags & VM_LOCKED)
-               __mlock_vma_pages_range(vma, addr, start, NULL);
+               populate_vma_page_range(vma, addr, start, NULL);
        return vma;
 }
 #endif
 
+EXPORT_SYMBOL_GPL(find_extend_vma);
+
 /*
  * Ok - we have the memory areas we should free on the vma list,
  * so release them, and do the vma updates.
@@ -2352,11 +2427,11 @@ static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
+       struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
        struct mmu_gather tlb;
 
        lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, start, end);
        update_hiwater_rss(mm);
        unmap_vmas(&tlb, vma, start, end);
        free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
@@ -2374,7 +2449,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct vm_area_struct **insertion_point;
        struct vm_area_struct *tail_vma = NULL;
-       unsigned long addr;
 
        insertion_point = (prev ? &prev->vm_next : &mm->mmap);
        vma->vm_prev = NULL;
@@ -2391,24 +2465,20 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-       if (mm->unmap_area == arch_unmap_area)
-               addr = prev ? prev->vm_end : mm->mmap_base;
-       else
-               addr = vma ?  vma->vm_start : mm->mmap_base;
-       mm->unmap_area(mm, addr);
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
  * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
  * munmap path where it doesn't make sense to fail.
  */
-static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
              unsigned long addr, int new_below)
 {
-       struct mempolicy *pol;
        struct vm_area_struct *new;
-       int err = -ENOMEM;
+       int err;
 
        if (is_vm_hugetlb_page(vma) && (addr &
                                        ~(huge_page_mask(hstate_vma(vma)))))
@@ -2416,7 +2486,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
        new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
        if (!new)
-               goto out_err;
+               return -ENOMEM;
 
        /* most fields are the same, copy all, and then fixup */
        *new = *vma;
@@ -2430,14 +2500,12 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }
 
-       pol = mpol_dup(vma_policy(vma));
-       if (IS_ERR(pol)) {
-               err = PTR_ERR(pol);
+       err = vma_dup_policy(vma, new);
+       if (err)
                goto out_free_vma;
-       }
-       vma_set_policy(new, pol);
 
-       if (anon_vma_clone(new, vma))
+       err = anon_vma_clone(new, vma);
+       if (err)
                goto out_free_mpol;
 
        if (new->vm_file)
@@ -2463,10 +2531,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                fput(new->vm_file);
        unlink_anon_vmas(new);
  out_free_mpol:
-       mpol_put(pol);
+       mpol_put(vma_policy(new));
  out_free_vma:
        kmem_cache_free(vm_area_cachep, new);
- out_err:
        return err;
 }
 
@@ -2493,10 +2560,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        unsigned long end;
        struct vm_area_struct *vma, *prev, *last;
 
-       if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
                return -EINVAL;
 
-       if ((len = PAGE_ALIGN(len)) == 0)
+       len = PAGE_ALIGN(len);
+       if (len == 0)
                return -EINVAL;
 
        /* Find the first overlapping VMA */
@@ -2542,7 +2610,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
                if (error)
                        return error;
        }
-       vma = prev? prev->vm_next: mm->mmap;
+       vma = prev ? prev->vm_next : mm->mmap;
 
        /*
         * unlock any mlock()ed ranges before detaching vmas
@@ -2564,6 +2632,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        detach_vmas_to_be_unmapped(mm, vma, prev, end);
        unmap_region(mm, vma, prev, start, end);
 
+       arch_unmap(mm, vma, start, end);
+
        /* Fix up all other VM information */
        remove_vma_list(mm, vma);
 
@@ -2588,6 +2658,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
        return vm_munmap(addr, len);
 }
 
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+               unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long populate = 0;
+       unsigned long ret = -EINVAL;
+       struct file *file;
+
+       pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+                       "See Documentation/vm/remap_file_pages.txt.\n",
+                       current->comm, current->pid);
+
+       if (prot)
+               return ret;
+       start = start & PAGE_MASK;
+       size = size & PAGE_MASK;
+
+       if (start + size <= start)
+               return ret;
+
+       /* Does pgoff wrap? */
+       if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+               return ret;
+
+       down_write(&mm->mmap_sem);
+       vma = find_vma(mm, start);
+
+       if (!vma || !(vma->vm_flags & VM_SHARED))
+               goto out;
+
+       if (start < vma->vm_start || start + size > vma->vm_end)
+               goto out;
+
+       if (pgoff == linear_page_index(vma, start)) {
+               ret = 0;
+               goto out;
+       }
+
+       prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+       prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+       prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+       flags &= MAP_NONBLOCK;
+       flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+       if (vma->vm_flags & VM_LOCKED) {
+               flags |= MAP_LOCKED;
+               /* drop PG_Mlocked flag for over-mapped range */
+               munlock_vma_pages_range(vma, start, start + size);
+       }
+
+       file = get_file(vma->vm_file);
+       ret = do_mmap_pgoff(vma->vm_file, start, size,
+                       prot, flags, pgoff, &populate);
+       fput(file);
+out:
+       up_write(&mm->mmap_sem);
+       if (populate)
+               mm_populate(ret, populate);
+       if (!IS_ERR_VALUE(ret))
+               ret = 0;
+       return ret;
+}
+
 static inline void verify_mm_writelocked(struct mm_struct *mm)
 {
 #ifdef CONFIG_DEBUG_VM
@@ -2605,10 +2744,10 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
  */
 static unsigned long do_brk(unsigned long addr, unsigned long len)
 {
-       struct mm_struct * mm = current->mm;
-       struct vm_area_struct * vma, * prev;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma, *prev;
        unsigned long flags;
-       struct rb_node ** rb_link, * rb_parent;
+       struct rb_node **rb_link, *rb_parent;
        pgoff_t pgoff = addr >> PAGE_SHIFT;
        int error;
 
@@ -2619,21 +2758,12 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 
        error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
-       if (error & ~PAGE_MASK)
+       if (offset_in_page(error))
                return error;
 
-       /*
-        * mlock MCL_FUTURE?
-        */
-       if (mm->def_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
+       error = mlock_future_check(mm, mm->def_flags, len);
+       if (error)
+               return error;
 
        /*
         * mm->mmap_sem is required to protect against another thread
@@ -2644,11 +2774,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        /*
         * Clear old maps.  this also does some error checking for us
         */
- munmap_back:
-       if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
+       while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
+                             &rb_parent)) {
                if (do_munmap(mm, addr, len))
                        return -ENOMEM;
-               goto munmap_back;
        }
 
        /* Check against address space limits *after* clearing old maps... */
@@ -2663,7 +2792,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
        /* Can we just expand an old private anonymous mapping? */
        vma = vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL);
+                       NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
        if (vma)
                goto out;
 
@@ -2689,6 +2818,7 @@ out:
        mm->total_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
+       vma->vm_flags |= VM_SOFTDIRTY;
        return addr;
 }
 
@@ -2735,7 +2865,7 @@ void exit_mmap(struct mm_struct *mm)
 
        lru_add_drain();
        flush_cache_mm(mm);
-       tlb_gather_mmu(&tlb, mm, 1);
+       tlb_gather_mmu(&tlb, mm, 0, -1);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
        unmap_vmas(&tlb, vma, 0, -1);
@@ -2753,19 +2883,24 @@ void exit_mmap(struct mm_struct *mm)
                vma = remove_vma(vma);
        }
        vm_unacct_memory(nr_accounted);
-
-       WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_mutex is taken here.
+ * then i_mmap_rwsem is taken here.
  */
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
 
+       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
+                          &prev, &rb_link, &rb_parent))
+               return -ENOMEM;
+       if ((vma->vm_flags & VM_ACCOUNT) &&
+            security_vm_enough_memory_mm(mm, vma_pages(vma)))
+               return -ENOMEM;
+
        /*
         * The vm_pgoff of a purely anonymous vma should be irrelevant
         * until its first write fault, when page's anon_vma and index
@@ -2778,16 +2913,10 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         * using the existing file pgoff checks and manipulations.
         * Similarly in do_mmap_pgoff and in do_brk.
         */
-       if (!vma->vm_file) {
+       if (vma_is_anonymous(vma)) {
                BUG_ON(vma->anon_vma);
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
-               return -ENOMEM;
-       if ((vma->vm_flags & VM_ACCOUNT) &&
-            security_vm_enough_memory_mm(mm, vma_pages(vma)))
-               return -ENOMEM;
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
        return 0;
@@ -2806,14 +2935,13 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma, *prev;
        struct rb_node **rb_link, *rb_parent;
-       struct mempolicy *pol;
        bool faulted_in_anon_vma = true;
 
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
         */
-       if (unlikely(!vma->vm_file && !vma->anon_vma)) {
+       if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
                pgoff = addr >> PAGE_SHIFT;
                faulted_in_anon_vma = false;
        }
@@ -2821,7 +2949,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                return NULL;    /* should never get here */
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+                           vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                           vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
        if (new_vma) {
                /*
                 * Source vma may have been merged into new_vma
@@ -2840,38 +2969,37 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                         * safe. It is only safe to keep the vm_pgoff
                         * linear if there are no pages mapped yet.
                         */
-                       VM_BUG_ON(faulted_in_anon_vma);
+                       VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
                        *vmap = vma = new_vma;
                }
                *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
        } else {
                new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-               if (new_vma) {
-                       *new_vma = *vma;
-                       new_vma->vm_start = addr;
-                       new_vma->vm_end = addr + len;
-                       new_vma->vm_pgoff = pgoff;
-                       pol = mpol_dup(vma_policy(vma));
-                       if (IS_ERR(pol))
-                               goto out_free_vma;
-                       vma_set_policy(new_vma, pol);
-                       INIT_LIST_HEAD(&new_vma->anon_vma_chain);
-                       if (anon_vma_clone(new_vma, vma))
-                               goto out_free_mempol;
-                       if (new_vma->vm_file)
-                               get_file(new_vma->vm_file);
-                       if (new_vma->vm_ops && new_vma->vm_ops->open)
-                               new_vma->vm_ops->open(new_vma);
-                       vma_link(mm, new_vma, prev, rb_link, rb_parent);
-                       *need_rmap_locks = false;
-               }
+               if (!new_vma)
+                       goto out;
+               *new_vma = *vma;
+               new_vma->vm_start = addr;
+               new_vma->vm_end = addr + len;
+               new_vma->vm_pgoff = pgoff;
+               if (vma_dup_policy(vma, new_vma))
+                       goto out_free_vma;
+               INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+               if (anon_vma_clone(new_vma, vma))
+                       goto out_free_mempol;
+               if (new_vma->vm_file)
+                       get_file(new_vma->vm_file);
+               if (new_vma->vm_ops && new_vma->vm_ops->open)
+                       new_vma->vm_ops->open(new_vma);
+               vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               *need_rmap_locks = false;
        }
        return new_vma;
 
- out_free_mempol:
-       mpol_put(pol);
- out_free_vma:
+out_free_mempol:
+       mpol_put(vma_policy(new_vma));
+out_free_vma:
        kmem_cache_free(vm_area_cachep, new_vma);
+out:
        return NULL;
 }
 
@@ -2891,6 +3019,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
        return 1;
 }
 
+static int special_mapping_fault(struct vm_area_struct *vma,
+                                struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+       return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+       .close = special_mapping_close,
+       .fault = special_mapping_fault,
+       .name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+       .close = special_mapping_close,
+       .fault = special_mapping_fault,
+};
 
 static int special_mapping_fault(struct vm_area_struct *vma,
                                struct vm_fault *vmf)
@@ -2898,15 +3051,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        pgoff_t pgoff;
        struct page **pages;
 
-       /*
-        * special mappings have no vm_file, and in that case, the mm
-        * uses vm_pgoff internally. So we have to subtract it from here.
-        * We are allowed to do this because we are the mm; do not copy
-        * this code into drivers!
-        */
-       pgoff = vmf->pgoff - vma->vm_pgoff;
+       if (vma->vm_ops == &legacy_special_mapping_vmops)
+               pages = vma->vm_private_data;
+       else
+               pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+                       pages;
 
-       for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+       for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                pgoff--;
 
        if (*pages) {
@@ -2919,48 +3070,29 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        return VM_FAULT_SIGBUS;
 }
 
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
-       .close = special_mapping_close,
-       .fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-int install_special_mapping(struct mm_struct *mm,
-                           unsigned long addr, unsigned long len,
-                           unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+       struct mm_struct *mm,
+       unsigned long addr, unsigned long len,
+       unsigned long vm_flags, void *priv,
+       const struct vm_operations_struct *ops)
 {
        int ret;
        struct vm_area_struct *vma;
 
        vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
        if (unlikely(vma == NULL))
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&vma->anon_vma_chain);
        vma->vm_mm = mm;
        vma->vm_start = addr;
        vma->vm_end = addr + len;
 
-       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
-       vma->vm_ops = &special_mapping_vmops;
-       vma->vm_private_data = pages;
+       vma->vm_ops = ops;
+       vma->vm_private_data = priv;
 
        ret = insert_vm_struct(mm, vma);
        if (ret)
@@ -2970,11 +3102,40 @@ int install_special_mapping(struct mm_struct *mm,
 
        perf_event_mmap(vma);
 
-       return 0;
+       return vma;
 
 out:
        kmem_cache_free(vm_area_cachep, vma);
-       return ret;
+       return ERR_PTR(ret);
+}
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+       struct mm_struct *mm,
+       unsigned long addr, unsigned long len,
+       unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+       return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
+                                       &special_mapping_vmops);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+                           unsigned long addr, unsigned long len,
+                           unsigned long vm_flags, struct page **pages)
+{
+       struct vm_area_struct *vma = __install_special_mapping(
+               mm, addr, len, vm_flags, (void *)pages,
+               &legacy_special_mapping_vmops);
+
+       return PTR_ERR_OR_ZERO(vma);
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
@@ -3016,7 +3177,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                 */
                if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                        BUG();
-               mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
+               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
        }
 }
 
@@ -3031,8 +3192,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  *
  * mmap_sem in write mode is required in order to block all operations
  * that could modify pagetables and free pages without need of
- * altering the vma layout (for example populate_range() with
- * nonlinear vmas). It's also needed in write mode to avoid new
+ * altering the vma layout. It's also needed in write mode to avoid new
  * anon_vmas to be associated with existing vmas.
  *
  * A single task can't take more than one mm_take_all_locks() in a row
@@ -3043,7 +3203,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
+ * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -3112,7 +3272,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
                 * AS_MM_ALL_LOCKS can't change to 0 from under us
                 * because we hold the mm_all_locks_mutex.
                 */
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
                if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
                                        &mapping->flags))
                        BUG();
@@ -3149,7 +3309,7 @@ void __init mmap_init(void)
 {
        int ret;
 
-       ret = percpu_counter_init(&vm_committed_as, 0);
+       ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
        VM_BUG_ON(ret);
 }
 
@@ -3172,7 +3332,7 @@ static int init_user_reserve(void)
        sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
        return 0;
 }
-module_init(init_user_reserve)
+subsys_initcall(init_user_reserve);
 
 /*
  * Initialise sysctl_admin_reserve_kbytes.
@@ -3193,7 +3353,7 @@ static int init_admin_reserve(void)
        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
        return 0;
 }
-module_init(init_admin_reserve)
+subsys_initcall(init_admin_reserve);
 
 /*
  * Reinititalise user and admin reserves if memory is added or removed.
@@ -3259,8 +3419,8 @@ static struct notifier_block reserve_mem_nb = {
 static int __meminit init_reserve_notifier(void)
 {
        if (register_hotmemory_notifier(&reserve_mem_nb))
-               printk("Failed registering memory add/remove notifier for admin reserve");
+               pr_err("Failed registering memory add/remove notifier for admin reserve\n");
 
        return 0;
 }
-module_init(init_reserve_notifier)
+subsys_initcall(init_reserve_notifier);