ARM64: dts: rk3368: include mipi_dsi.h for mipi command mode of timing file
[firefly-linux-kernel-4.4.55.git] / mm / mmap.c
index 84a1c44242a8a8fd8979a4ada23b145abbf1c1e4..6c561acdca92730aaabd50926fd01b7fd9345fb3 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -6,10 +6,13 @@
  * Address space accounting code       <alan@lxorguk.ukuu.org.uk>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
@@ -28,6 +31,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
 #include <linux/perf_event.h>
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
@@ -36,6 +40,8 @@
 #include <linux/sched/sysctl.h>
 #include <linux/notifier.h>
 #include <linux/memory.h>
+#include <linux/printk.h>
+#include <linux/userfaultfd_k.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #define arch_rebalance_pgtables(addr, len)             (addr)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
+const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
+int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
+const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
+int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
+#endif
+
+
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                unsigned long start, unsigned long end);
@@ -65,7 +83,7 @@ static void unmap_region(struct mm_struct *mm,
  * MAP_SHARED  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  *             w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
  *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
- *             
+ *
  * MAP_PRIVATE r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  *             w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
  *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
@@ -84,8 +102,28 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
+static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
+{
+       return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
+}
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+void vma_set_page_prot(struct vm_area_struct *vma)
+{
+       unsigned long vm_flags = vma->vm_flags;
+
+       vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+       if (vma_wants_writenotify(vma)) {
+               vm_flags &= ~VM_SHARED;
+               vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
+                                                    vm_flags);
+       }
+}
+
+
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;        /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
@@ -129,6 +167,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
        long free, allowed, reserve;
 
+       VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                       -(s64)vm_committed_as_batch * num_online_cpus(),
+                       "memory commitment underflow");
+
        vm_acct_memory(pages);
 
        /*
@@ -179,14 +221,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                goto error;
        }
 
-       allowed = (totalram_pages - hugetlb_total_pages())
-               * sysctl_overcommit_ratio / 100;
+       allowed = vm_commit_limit();
        /*
         * Reserve some for root
         */
        if (!cap_sys_admin)
                allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
-       allowed += total_swap_pages;
 
        /*
         * Don't let a single process grow so big a user can't recover
@@ -205,7 +245,7 @@ error:
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_mutex
+ * Requires inode->i_mapping->i_mmap_rwsem
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                struct file *file, struct address_space *mapping)
@@ -213,13 +253,10 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
        if (vma->vm_flags & VM_DENYWRITE)
                atomic_inc(&file_inode(file)->i_writecount);
        if (vma->vm_flags & VM_SHARED)
-               mapping->i_mmap_writable--;
+               mapping_unmap_writable(mapping);
 
        flush_dcache_mmap_lock(mapping);
-       if (unlikely(vma->vm_flags & VM_NONLINEAR))
-               list_del_init(&vma->shared.nonlinear);
-       else
-               vma_interval_tree_remove(vma, &mapping->i_mmap);
+       vma_interval_tree_remove(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
 }
 
@@ -233,9 +270,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
 
        if (file) {
                struct address_space *mapping = file->f_mapping;
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
                __remove_shared_vm_struct(vma, file, mapping);
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
        }
 }
 
@@ -260,7 +297,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len);
 
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
-       unsigned long rlim, retval;
+       unsigned long retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
        unsigned long min_brk;
@@ -290,9 +327,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         * segment grow beyond its set limit the in case where the limit is
         * not page aligned -Ram Gupta
         */
-       rlim = rlimit(RLIMIT_DATA);
-       if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
-                       (mm->end_data - mm->start_data) > rlim)
+       if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
+                             mm->end_data, mm->start_data))
                goto out;
 
        newbrk = PAGE_ALIGN(brk);
@@ -361,20 +397,22 @@ static int browse_rb(struct rb_root *root)
                struct vm_area_struct *vma;
                vma = rb_entry(nd, struct vm_area_struct, vm_rb);
                if (vma->vm_start < prev) {
-                       printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+                       pr_emerg("vm_start %lx < prev %lx\n",
+                                 vma->vm_start, prev);
                        bug = 1;
                }
                if (vma->vm_start < pend) {
-                       printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+                       pr_emerg("vm_start %lx < pend %lx\n",
+                                 vma->vm_start, pend);
                        bug = 1;
                }
                if (vma->vm_start > vma->vm_end) {
-                       printk("vm_end %lx < vm_start %lx\n",
-                               vma->vm_end, vma->vm_start);
+                       pr_emerg("vm_start %lx > vm_end %lx\n",
+                                 vma->vm_start, vma->vm_end);
                        bug = 1;
                }
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
-                       printk("free gap %lx, correct %lx\n",
+                       pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
@@ -388,7 +426,7 @@ static int browse_rb(struct rb_root *root)
        for (nd = pn; nd; nd = rb_prev(nd))
                j++;
        if (i != j) {
-               printk("backwards %d, forwards %d\n", j, i);
+               pr_emerg("backwards %d, forwards %d\n", j, i);
                bug = 1;
        }
        return bug ? -1 : i;
@@ -401,42 +439,50 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
        for (nd = rb_first(root); nd; nd = rb_next(nd)) {
                struct vm_area_struct *vma;
                vma = rb_entry(nd, struct vm_area_struct, vm_rb);
-               BUG_ON(vma != ignore &&
-                      vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
+               VM_BUG_ON_VMA(vma != ignore &&
+                       vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
+                       vma);
        }
 }
 
-void validate_mm(struct mm_struct *mm)
+static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
        int i = 0;
        unsigned long highest_address = 0;
        struct vm_area_struct *vma = mm->mmap;
+
        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
        }
        if (i != mm->map_count) {
-               printk("map_count %d vm_next %d\n", mm->map_count, i);
+               pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
                bug = 1;
        }
        if (highest_address != mm->highest_vm_end) {
-               printk("mm->highest_vm_end %lx, found %lx\n",
-                      mm->highest_vm_end, highest_address);
+               pr_emerg("mm->highest_vm_end %lx, found %lx\n",
+                         mm->highest_vm_end, highest_address);
                bug = 1;
        }
        i = browse_rb(&mm->mm_rb);
        if (i != mm->map_count) {
-               printk("map_count %d rb %d\n", mm->map_count, i);
+               if (i != -1)
+                       pr_emerg("map_count %d rb %d\n", mm->map_count, i);
                bug = 1;
        }
-       BUG_ON(bug);
+       VM_BUG_ON_MM(bug, mm);
 }
 #else
 #define validate_mm_rb(root, ignore) do { } while (0)
@@ -614,13 +660,10 @@ static void __vma_link_file(struct vm_area_struct *vma)
                if (vma->vm_flags & VM_DENYWRITE)
                        atomic_dec(&file_inode(file)->i_writecount);
                if (vma->vm_flags & VM_SHARED)
-                       mapping->i_mmap_writable++;
+                       atomic_inc(&mapping->i_mmap_writable);
 
                flush_dcache_mmap_lock(mapping);
-               if (unlikely(vma->vm_flags & VM_NONLINEAR))
-                       vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
-               else
-                       vma_interval_tree_insert(vma, &mapping->i_mmap);
+               vma_interval_tree_insert(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
        }
 }
@@ -640,17 +683,16 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct address_space *mapping = NULL;
 
-       if (vma->vm_file)
+       if (vma->vm_file) {
                mapping = vma->vm_file->f_mapping;
-
-       if (mapping)
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
+       }
 
        __vma_link(mm, vma, prev, rb_link, rb_parent);
        __vma_link_file(vma);
 
        if (mapping)
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
 
        mm->map_count++;
        validate_mm(mm);
@@ -682,8 +724,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
@@ -733,7 +776,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                         * split_vma inserting another: so it must be
                         * mprotect case 4 shifting the boundary down.
                         */
-                       adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+                       adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
                        exporter = vma;
                        importer = next;
                }
@@ -744,24 +787,24 @@ again:                    remove_next = 1 + (end > next->vm_end);
                 * shrinking vma had, to cover any anon pages imported.
                 */
                if (exporter && exporter->anon_vma && !importer->anon_vma) {
-                       if (anon_vma_clone(importer, exporter))
-                               return -ENOMEM;
+                       int error;
+
                        importer->anon_vma = exporter->anon_vma;
+                       error = anon_vma_clone(importer, exporter);
+                       if (error)
+                               return error;
                }
        }
 
        if (file) {
                mapping = file->f_mapping;
-               if (!(vma->vm_flags & VM_NONLINEAR)) {
-                       root = &mapping->i_mmap;
-                       uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+               root = &mapping->i_mmap;
+               uprobe_munmap(vma, vma->vm_start, vma->vm_end);
 
-                       if (adjust_next)
-                               uprobe_munmap(next, next->vm_start,
-                                                       next->vm_end);
-               }
+               if (adjust_next)
+                       uprobe_munmap(next, next->vm_start, next->vm_end);
 
-               mutex_lock(&mapping->i_mmap_mutex);
+               i_mmap_lock_write(mapping);
                if (insert) {
                        /*
                         * Put into interval tree now, so instantiated pages
@@ -779,8 +822,8 @@ again:                      remove_next = 1 + (end > next->vm_end);
        if (!anon_vma && adjust_next)
                anon_vma = next->anon_vma;
        if (anon_vma) {
-               VM_BUG_ON(adjust_next && next->anon_vma &&
-                         anon_vma != next->anon_vma);
+               VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
+                         anon_vma != next->anon_vma, next);
                anon_vma_lock_write(anon_vma);
                anon_vma_interval_tree_pre_update_vma(vma);
                if (adjust_next)
@@ -848,7 +891,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
                anon_vma_unlock_write(anon_vma);
        }
        if (mapping)
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
 
        if (root) {
                uprobe_mmap(vma);
@@ -893,15 +936,26 @@ again:                    remove_next = 1 + (end > next->vm_end);
  * per-vma resources, so we don't attempt to merge those.
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
-                       struct file *file, unsigned long vm_flags,
-                       const char __user *anon_name)
+                               struct file *file, unsigned long vm_flags,
+                               struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                               const char __user *anon_name)
 {
-       if (vma->vm_flags ^ vm_flags)
+       /*
+        * VM_SOFTDIRTY should not prevent from VMA merging, if we
+        * match the flags but dirty bit -- the caller should mark
+        * merged VMA as dirty. If dirty bit won't be excluded from
+        * comparison, we increase pressue on the memory system forcing
+        * the kernel to generate new VMAs when old one could be
+        * extended instead.
+        */
+       if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
                return 0;
        if (vma->vm_file != file)
                return 0;
        if (vma->vm_ops && vma->vm_ops->close)
                return 0;
+       if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
+               return 0;
        if (vma_get_anon_name(vma) != anon_name)
                return 0;
        return 1;
@@ -934,10 +988,12 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  */
 static int
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
-       const char __user *anon_name)
+                    struct anon_vma *anon_vma, struct file *file,
+                    pgoff_t vm_pgoff,
+                    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                    const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                if (vma->vm_pgoff == vm_pgoff)
                        return 1;
@@ -954,13 +1010,15 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  */
 static int
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
-       const char __user *anon_name)
+                   struct anon_vma *anon_vma, struct file *file,
+                   pgoff_t vm_pgoff,
+                   struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+                   const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
+       if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                pgoff_t vm_pglen;
-               vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               vm_pglen = vma_pages(vma);
                if (vma->vm_pgoff + vm_pglen == vm_pgoff)
                        return 1;
        }
@@ -999,8 +1057,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct vm_area_struct *prev, unsigned long addr,
                        unsigned long end, unsigned long vm_flags,
-                       struct anon_vma *anon_vma, struct file *file,
+                       struct anon_vma *anon_vma, struct file *file,
                        pgoff_t pgoff, struct mempolicy *policy,
+                       struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
                        const char __user *anon_name)
 {
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
@@ -1026,16 +1085,21 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         * Can it merge with the predecessor?
         */
        if (prev && prev->vm_end == addr &&
-                       mpol_equal(vma_policy(prev), policy) &&
-                       can_vma_merge_after(prev, vm_flags, anon_vma,
-                                               file, pgoff, anon_name)) {
+                       mpol_equal(vma_policy(prev), policy) &&
+                       can_vma_merge_after(prev, vm_flags,
+                                           anon_vma, file, pgoff,
+                                           vm_userfaultfd_ctx,
+                                           anon_name)) {
                /*
                 * OK, it can.  Can we now merge in the successor as well?
                 */
                if (next && end == next->vm_start &&
                                mpol_equal(policy, vma_policy(next)) &&
-                               can_vma_merge_before(next, vm_flags, anon_vma,
-                                               file, pgoff+pglen, anon_name) &&
+                               can_vma_merge_before(next, vm_flags,
+                                                    anon_vma, file,
+                                                    pgoff+pglen,
+                                                    vm_userfaultfd_ctx,
+                                                    anon_name) &&
                                is_mergeable_anon_vma(prev->anon_vma,
                                                      next->anon_vma, NULL)) {
                                                        /* cases 1, 6 */
@@ -1046,7 +1110,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                end, prev->vm_pgoff, NULL);
                if (err)
                        return NULL;
-               khugepaged_enter_vma_merge(prev);
+               khugepaged_enter_vma_merge(prev, vm_flags);
                return prev;
        }
 
@@ -1054,9 +1118,11 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         * Can this new request be merged in front of next?
         */
        if (next && end == next->vm_start &&
-                       mpol_equal(policy, vma_policy(next)) &&
-                       can_vma_merge_before(next, vm_flags, anon_vma,
-                                       file, pgoff+pglen, anon_name)) {
+                       mpol_equal(policy, vma_policy(next)) &&
+                       can_vma_merge_before(next, vm_flags,
+                                            anon_vma, file, pgoff+pglen,
+                                            vm_userfaultfd_ctx,
+                                            anon_name)) {
                if (prev && addr < prev->vm_end)        /* case 4 */
                        err = vma_adjust(prev, prev->vm_start,
                                addr, prev->vm_pgoff, NULL);
@@ -1065,7 +1131,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                next->vm_pgoff - pglen, NULL);
                if (err)
                        return NULL;
-               khugepaged_enter_vma_merge(area);
+               khugepaged_enter_vma_merge(area, vm_flags);
                return area;
        }
 
@@ -1090,7 +1156,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
        return a->vm_end == b->vm_start &&
                mpol_equal(vma_policy(a), vma_policy(b)) &&
                a->vm_file == b->vm_file &&
-               !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
+               !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
                b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
 }
 
@@ -1105,7 +1171,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
  * by another page fault trying to merge _that_. But that's ok: if it
  * is being set up, that automatically means that it will be a singleton
  * acceptable for merging, so we can do all of this optimistically. But
- * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
+ * we do that READ_ONCE() to make sure that we never re-load the pointer.
  *
  * IOW: that the "list_is_singular()" test on the anon_vma_chain only
  * matters for the 'stable anon_vma' case (ie the thing we want to avoid
@@ -1119,7 +1185,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
 static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
 {
        if (anon_vma_compatible(a, b)) {
-               struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
+               struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
 
                if (anon_vma && list_is_singular(&old->anon_vma_chain))
                        return anon_vma;
@@ -1198,21 +1264,39 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
        return hint;
 }
 
+static inline int mlock_future_check(struct mm_struct *mm,
+                                    unsigned long flags,
+                                    unsigned long len)
+{
+       unsigned long locked, lock_limit;
+
+       /*  mlock MCL_FUTURE? */
+       if (flags & VM_LOCKED) {
+               locked = len >> PAGE_SHIFT;
+               locked += mm->locked_vm;
+               lock_limit = rlimit(RLIMIT_MEMLOCK);
+               lock_limit >>= PAGE_SHIFT;
+               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+                       return -EAGAIN;
+       }
+       return 0;
+}
+
 /*
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
-
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap(struct file *file, unsigned long addr,
                        unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff,
-                       unsigned long *populate)
+                       unsigned long flags, vm_flags_t vm_flags,
+                       unsigned long pgoff, unsigned long *populate)
 {
-       struct mm_struct * mm = current->mm;
-       struct inode *inode;
-       vm_flags_t vm_flags;
+       struct mm_struct *mm = current->mm;
 
        *populate = 0;
 
+       if (!len)
+               return -EINVAL;
+
        /*
         * Does the application expect PROT_READ to imply PROT_EXEC?
         *
@@ -1220,12 +1304,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         *  mounted, in which case we dont add PROT_EXEC.)
         */
        if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-               if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
+               if (!(file && path_noexec(&file->f_path)))
                        prot |= PROT_EXEC;
 
-       if (!len)
-               return -EINVAL;
-
        if (!(flags & MAP_FIXED))
                addr = round_hint_to_min(addr);
 
@@ -1236,7 +1317,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 
        /* offset overflow? */
        if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
-               return -EOVERFLOW;
+               return -EOVERFLOW;
 
        /* Too many mappings? */
        if (mm->map_count > sysctl_max_map_count)
@@ -1246,34 +1327,26 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         * that it represents a valid section of the address space.
         */
        addr = get_unmapped_area(file, addr, len, pgoff, flags);
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return addr;
 
        /* Do simple checking here so the lower-level routines won't have
         * to. we assume access permissions have been handled by the open
         * of the memory object, so we don't do any here.
         */
-       vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+       vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
                        mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 
        if (flags & MAP_LOCKED)
                if (!can_do_mlock())
                        return -EPERM;
 
-       /* mlock MCL_FUTURE? */
-       if (vm_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
-
-       inode = file ? file_inode(file) : NULL;
+       if (mlock_future_check(mm, vm_flags, len))
+               return -EAGAIN;
 
        if (file) {
+               struct inode *inode = file_inode(file);
+
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
                        if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
@@ -1289,7 +1362,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        /*
                         * Make sure there are no mandatory locks on the file.
                         */
-                       if (locks_verify_locked(inode))
+                       if (locks_verify_locked(file))
                                return -EAGAIN;
 
                        vm_flags |= VM_SHARED | VM_MAYSHARE;
@@ -1300,14 +1373,16 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                case MAP_PRIVATE:
                        if (!(file->f_mode & FMODE_READ))
                                return -EACCES;
-                       if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+                       if (path_noexec(&file->f_path)) {
                                if (vm_flags & VM_EXEC)
                                        return -EPERM;
                                vm_flags &= ~VM_MAYEXEC;
                        }
 
-                       if (!file->f_op || !file->f_op->mmap)
+                       if (!file->f_op->mmap)
                                return -ENODEV;
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        break;
 
                default:
@@ -1316,6 +1391,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        } else {
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        /*
                         * Ignore pgoff.
                         */
@@ -1360,22 +1437,23 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                unsigned long, fd, unsigned long, pgoff)
 {
        struct file *file = NULL;
-       unsigned long retval = -EBADF;
+       unsigned long retval;
 
        if (!(flags & MAP_ANONYMOUS)) {
                audit_mmap_fd(fd, flags);
-               if (unlikely(flags & MAP_HUGETLB))
-                       return -EINVAL;
                file = fget(fd);
                if (!file)
-                       goto out;
+                       return -EBADF;
                if (is_file_hugepages(file))
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
+               retval = -EINVAL;
+               if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
+                       goto out_fput;
        } else if (flags & MAP_HUGETLB) {
                struct user_struct *user = NULL;
-               struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) &
-                                                  SHM_HUGE_MASK);
+               struct hstate *hs;
 
+               hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
                if (!hs)
                        return -EINVAL;
 
@@ -1397,9 +1475,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
        retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+out_fput:
        if (file)
                fput(file);
-out:
        return retval;
 }
 
@@ -1419,7 +1497,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 
        if (copy_from_user(&a, arg, sizeof(a)))
                return -EFAULT;
-       if (a.offset & ~PAGE_MASK)
+       if (offset_in_page(a.offset))
                return -EINVAL;
 
        return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
@@ -1436,20 +1514,26 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 int vma_wants_writenotify(struct vm_area_struct *vma)
 {
        vm_flags_t vm_flags = vma->vm_flags;
+       const struct vm_operations_struct *vm_ops = vma->vm_ops;
 
        /* If it was private or non-writable, the write bit is already clear */
        if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
                return 0;
 
        /* The backer wishes to know when pages are first written to? */
-       if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+       if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
                return 1;
 
-       /* The open routine did something to the protections already? */
+       /* The open routine did something to the protections that pgprot_modify
+        * won't preserve? */
        if (pgprot_val(vma->vm_page_prot) !=
-           pgprot_val(vm_get_page_prot(vm_flags)))
+           pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
                return 0;
 
+       /* Do we need to track softdirty? */
+       if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+               return 1;
+
        /* Specialty mapping? */
        if (vm_flags & VM_PFNMAP)
                return 0;
@@ -1480,11 +1564,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
-       int correct_wcount = 0;
        int error;
        struct rb_node **rb_link, *rb_parent;
        unsigned long charged = 0;
-       struct inode *inode =  file ? file_inode(file) : NULL;
 
        /* Check against address space limit. */
        if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
@@ -1504,12 +1586,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
        }
 
        /* Clear old maps */
-       error = -ENOMEM;
-munmap_back:
-       if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
+       while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
+                             &rb_parent)) {
                if (do_munmap(mm, addr, len))
                        return -ENOMEM;
-               goto munmap_back;
        }
 
        /*
@@ -1525,8 +1605,8 @@ munmap_back:
        /*
         * Can we just expand an old mapping?
         */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
-                       NULL, NULL);
+       vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+                       NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
        if (vma)
                goto out;
 
@@ -1549,17 +1629,23 @@ munmap_back:
        vma->vm_pgoff = pgoff;
        INIT_LIST_HEAD(&vma->anon_vma_chain);
 
-       error = -EINVAL;        /* when rejecting VM_GROWSDOWN|VM_GROWSUP */
-
        if (file) {
-               if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-                       goto free_vma;
                if (vm_flags & VM_DENYWRITE) {
                        error = deny_write_access(file);
                        if (error)
                                goto free_vma;
-                       correct_wcount = 1;
                }
+               if (vm_flags & VM_SHARED) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto allow_write_and_free_vma;
+               }
+
+               /* ->mmap() can change vma->vm_file, but must guarantee that
+                * vma_link() below can deny write-access if VM_DENYWRITE is set
+                * and map writably if VM_SHARED is set. This usually means the
+                * new file must not have been exposed to user-space, yet.
+                */
                vma->vm_file = get_file(file);
                error = file->f_op->mmap(file, vma);
                if (error)
@@ -1575,37 +1661,22 @@ munmap_back:
                WARN_ON_ONCE(addr != vma->vm_start);
 
                addr = vma->vm_start;
-               pgoff = vma->vm_pgoff;
                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
-               if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
-                       goto free_vma;
                error = shmem_zero_setup(vma);
                if (error)
                        goto free_vma;
        }
 
-       if (vma_wants_writenotify(vma)) {
-               pgprot_t pprot = vma->vm_page_prot;
-
-               /* Can vma->vm_page_prot have changed??
-                *
-                * Answer: Yes, drivers may have changed it in their
-                *         f_op->mmap method.
-                *
-                * Ensures that vmas marked as uncached stay that way.
-                */
-               vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
-               if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
-                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       }
-
        vma_link(mm, vma, prev, rb_link, rb_parent);
-       file = vma->vm_file;
-
        /* Once vma denies write, undo our temporary denial count */
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
+       if (file) {
+               if (vm_flags & VM_SHARED)
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+       }
+       file = vma->vm_file;
 out:
        perf_event_mmap(vma);
 
@@ -1615,23 +1686,37 @@ out:
                                        vma == get_gate_vma(current->mm)))
                        mm->locked_vm += (len >> PAGE_SHIFT);
                else
-                       vma->vm_flags &= ~VM_LOCKED;
+                       vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
        }
 
        if (file)
                uprobe_mmap(vma);
 
+       /*
+        * New (or expanded) vma always get soft dirty status.
+        * Otherwise user-space soft-dirty page tracker won't
+        * be able to distinguish situation when vma area unmapped,
+        * then new mapped in-place (which must be aimed as
+        * a completely new data area).
+        */
+       vma->vm_flags |= VM_SOFTDIRTY;
+
+       vma_set_page_prot(vma);
+
        return addr;
 
 unmap_and_free_vma:
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
        vma->vm_file = NULL;
        fput(file);
 
        /* Undo any partial mapping done by a device driver. */
        unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
        charged = 0;
+       if (vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
 free_vma:
        kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
@@ -1876,21 +1961,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
        info.flags = 0;
        info.length = len;
-       info.low_limit = TASK_UNMAPPED_BASE;
+       info.low_limit = mm->mmap_base;
        info.high_limit = TASK_SIZE;
        info.align_mask = 0;
        return vm_unmapped_area(&info);
 }
-#endif 
-
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
-       /*
-        * Is this a new hole at the lowest possible address?
-        */
-       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
-               mm->free_area_cache = addr;
-}
+#endif
 
 /*
  * This mmap-allocator allocates new areas top-down from below the
@@ -1936,7 +2012,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       if (addr & ~PAGE_MASK) {
+       if (offset_in_page(addr)) {
                VM_BUG_ON(addr != -ENOMEM);
                info.flags = 0;
                info.low_limit = TASK_UNMAPPED_BASE;
@@ -1948,19 +2024,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 }
 #endif
 
-void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
-{
-       /*
-        * Is this a new hole at the highest possible address?
-        */
-       if (addr > mm->free_area_cache)
-               mm->free_area_cache = addr;
-
-       /* dont allow allocations above current base */
-       if (mm->free_area_cache > mm->mmap_base)
-               mm->free_area_cache = mm->mmap_base;
-}
-
 unsigned long
 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags)
@@ -1977,7 +2040,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                return -ENOMEM;
 
        get_area = current->mm->get_unmapped_area;
-       if (file && file->f_op && file->f_op->get_unmapped_area)
+       if (file && file->f_op->get_unmapped_area)
                get_area = file->f_op->get_unmapped_area;
        addr = get_area(file, addr, len, pgoff, flags);
        if (IS_ERR_VALUE(addr))
@@ -1985,7 +2048,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
        if (addr > TASK_SIZE - len)
                return -ENOMEM;
-       if (addr & ~PAGE_MASK)
+       if (offset_in_page(addr))
                return -EINVAL;
 
        addr = arch_rebalance_pgtables(addr, len);
@@ -1998,34 +2061,32 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct *vma = NULL;
+       struct rb_node *rb_node;
+       struct vm_area_struct *vma;
 
        /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
 
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       rb_node = mm->mm_rb.rb_node;
 
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       while (rb_node) {
+               struct vm_area_struct *tmp;
+
+               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (tmp->vm_end > addr) {
+                       vma = tmp;
+                       if (tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
        }
+
+       if (vma)
+               vmacache_update(addr, vma);
        return vma;
 }
 
@@ -2073,7 +2134,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
        actual_size = size;
        if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
                actual_size -= PAGE_SIZE;
-       if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+       if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
                return -ENOMEM;
 
        /* mlock limit tests */
@@ -2081,7 +2142,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
                unsigned long locked;
                unsigned long limit;
                locked = mm->locked_vm + grow;
-               limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+               limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
                limit >>= PAGE_SHIFT;
                if (locked > limit && !capable(CAP_IPC_LOCK))
                        return -ENOMEM;
@@ -2100,10 +2161,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
        if (security_vm_enough_memory_mm(mm, grow))
                return -ENOMEM;
 
-       /* Ok, everything looks good - let it rip */
-       if (vma->vm_flags & VM_LOCKED)
-               mm->locked_vm += grow;
-       vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
 
@@ -2114,32 +2171,28 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
  */
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
-       int error;
+       struct mm_struct *mm = vma->vm_mm;
+       int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against wrapping around to address 0. */
+       if (address < PAGE_ALIGN(address+4))
+               address = PAGE_ALIGN(address+4);
+       else
+               return -ENOMEM;
+
+       /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
-       vma_lock_anon_vma(vma);
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
         */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address > vma->vm_end) {
@@ -2157,29 +2210,33 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                anon_vma_interval_tree_post_update_vma(vma);
                                if (vma->vm_next)
                                        vma_gap_update(vma->vm_next);
                                else
-                                       vma->vm_mm->highest_vm_end = address;
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                                       mm->highest_vm_end = address;
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
-       validate_mm(vma->vm_mm);
+       anon_vma_unlock_write(vma->anon_vma);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
+       validate_mm(mm);
        return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2190,27 +2247,24 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
+       struct mm_struct *mm = vma->vm_mm;
        int error;
 
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
        address &= PAGE_MASK;
        error = security_mmap_addr(address);
        if (error)
                return error;
 
-       vma_lock_anon_vma(vma);
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
 
        /*
         * vma->vm_start/vm_end cannot change under us because the caller
         * is required to hold the mmap_sem in read mode.  We need the
         * anon_vma lock to serialize against concurrent expand_stacks.
         */
+       anon_vma_lock_write(vma->anon_vma);
 
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
@@ -2228,27 +2282,31 @@ int expand_downwards(struct vm_area_struct *vma,
                                 * updates, but we only hold a shared mmap_sem
                                 * lock here, so we need to protect against
                                 * concurrent vma expansions.
-                                * vma_lock_anon_vma() doesn't help here, as
+                                * anon_vma_lock_write() doesn't help here, as
                                 * we don't guarantee that all growable vmas
                                 * in a mm share the same root anon vma.
                                 * So, we reuse mm->page_table_lock to guard
                                 * against concurrent vma expansions.
                                 */
-                               spin_lock(&vma->vm_mm->page_table_lock);
+                               spin_lock(&mm->page_table_lock);
+                               if (vma->vm_flags & VM_LOCKED)
+                                       mm->locked_vm += grow;
+                               vm_stat_account(mm, vma->vm_flags,
+                                               vma->vm_file, grow);
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                anon_vma_interval_tree_post_update_vma(vma);
                                vma_gap_update(vma);
-                               spin_unlock(&vma->vm_mm->page_table_lock);
+                               spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
                        }
                }
        }
-       vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
-       validate_mm(vma->vm_mm);
+       anon_vma_unlock_write(vma->anon_vma);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
+       validate_mm(mm);
        return error;
 }
 
@@ -2289,7 +2347,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
        if (!prev || expand_stack(prev, addr))
                return NULL;
        if (prev->vm_flags & VM_LOCKED)
-               __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
+               populate_vma_page_range(prev, addr, prev->vm_end, NULL);
        return prev;
 }
 #else
@@ -2307,13 +2365,13 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
 }
 
 struct vm_area_struct *
-find_extend_vma(struct mm_struct * mm, unsigned long addr)
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct * vma;
+       struct vm_area_struct *vma;
        unsigned long start;
 
        addr &= PAGE_MASK;
-       vma = find_vma(mm,addr);
+       vma = find_vma(mm, addr);
        if (!vma)
                return NULL;
        if (vma->vm_start <= addr)
@@ -2324,11 +2382,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
        if (expand_stack(vma, addr))
                return NULL;
        if (vma->vm_flags & VM_LOCKED)
-               __mlock_vma_pages_range(vma, addr, start, NULL);
+               populate_vma_page_range(vma, addr, start, NULL);
        return vma;
 }
 #endif
 
+EXPORT_SYMBOL_GPL(find_extend_vma);
+
 /*
  * Ok - we have the memory areas we should free on the vma list,
  * so release them, and do the vma updates.
@@ -2362,7 +2422,7 @@ static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
+       struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
        struct mmu_gather tlb;
 
        lru_add_drain();
@@ -2384,7 +2444,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct vm_area_struct **insertion_point;
        struct vm_area_struct *tail_vma = NULL;
-       unsigned long addr;
 
        insertion_point = (prev ? &prev->vm_next : &mm->mmap);
        vma->vm_prev = NULL;
@@ -2401,24 +2460,20 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-       if (mm->unmap_area == arch_unmap_area)
-               addr = prev ? prev->vm_end : mm->mmap_base;
-       else
-               addr = vma ?  vma->vm_start : mm->mmap_base;
-       mm->unmap_area(mm, addr);
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
  * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
  * munmap path where it doesn't make sense to fail.
  */
-static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
              unsigned long addr, int new_below)
 {
-       struct mempolicy *pol;
        struct vm_area_struct *new;
-       int err = -ENOMEM;
+       int err;
 
        if (is_vm_hugetlb_page(vma) && (addr &
                                        ~(huge_page_mask(hstate_vma(vma)))))
@@ -2426,7 +2481,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
        new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
        if (!new)
-               goto out_err;
+               return -ENOMEM;
 
        /* most fields are the same, copy all, and then fixup */
        *new = *vma;
@@ -2440,14 +2495,12 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }
 
-       pol = mpol_dup(vma_policy(vma));
-       if (IS_ERR(pol)) {
-               err = PTR_ERR(pol);
+       err = vma_dup_policy(vma, new);
+       if (err)
                goto out_free_vma;
-       }
-       vma_set_policy(new, pol);
 
-       if (anon_vma_clone(new, vma))
+       err = anon_vma_clone(new, vma);
+       if (err)
                goto out_free_mpol;
 
        if (new->vm_file)
@@ -2473,10 +2526,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                fput(new->vm_file);
        unlink_anon_vmas(new);
  out_free_mpol:
-       mpol_put(pol);
+       mpol_put(vma_policy(new));
  out_free_vma:
        kmem_cache_free(vm_area_cachep, new);
- out_err:
        return err;
 }
 
@@ -2503,10 +2555,11 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        unsigned long end;
        struct vm_area_struct *vma, *prev, *last;
 
-       if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
                return -EINVAL;
 
-       if ((len = PAGE_ALIGN(len)) == 0)
+       len = PAGE_ALIGN(len);
+       if (len == 0)
                return -EINVAL;
 
        /* Find the first overlapping VMA */
@@ -2552,7 +2605,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
                if (error)
                        return error;
        }
-       vma = prev? prev->vm_next: mm->mmap;
+       vma = prev ? prev->vm_next : mm->mmap;
 
        /*
         * unlock any mlock()ed ranges before detaching vmas
@@ -2574,6 +2627,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        detach_vmas_to_be_unmapped(mm, vma, prev, end);
        unmap_region(mm, vma, prev, start, end);
 
+       arch_unmap(mm, vma, start, end);
+
        /* Fix up all other VM information */
        remove_vma_list(mm, vma);
 
@@ -2598,6 +2653,99 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
        return vm_munmap(addr, len);
 }
 
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+               unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long populate = 0;
+       unsigned long ret = -EINVAL;
+       struct file *file;
+
+       pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+                       "See Documentation/vm/remap_file_pages.txt.\n",
+                       current->comm, current->pid);
+
+       if (prot)
+               return ret;
+       start = start & PAGE_MASK;
+       size = size & PAGE_MASK;
+
+       if (start + size <= start)
+               return ret;
+
+       /* Does pgoff wrap? */
+       if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+               return ret;
+
+       down_write(&mm->mmap_sem);
+       vma = find_vma(mm, start);
+
+       if (!vma || !(vma->vm_flags & VM_SHARED))
+               goto out;
+
+       if (start < vma->vm_start)
+               goto out;
+
+       if (start + size > vma->vm_end) {
+               struct vm_area_struct *next;
+
+               for (next = vma->vm_next; next; next = next->vm_next) {
+                       /* hole between vmas ? */
+                       if (next->vm_start != next->vm_prev->vm_end)
+                               goto out;
+
+                       if (next->vm_file != vma->vm_file)
+                               goto out;
+
+                       if (next->vm_flags != vma->vm_flags)
+                               goto out;
+
+                       if (start + size <= next->vm_end)
+                               break;
+               }
+
+               if (!next)
+                       goto out;
+       }
+
+       prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+       prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+       prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+       flags &= MAP_NONBLOCK;
+       flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+       if (vma->vm_flags & VM_LOCKED) {
+               struct vm_area_struct *tmp;
+               flags |= MAP_LOCKED;
+
+               /* drop PG_Mlocked flag for over-mapped range */
+               for (tmp = vma; tmp->vm_start >= start + size;
+                               tmp = tmp->vm_next) {
+                       munlock_vma_pages_range(tmp,
+                                       max(tmp->vm_start, start),
+                                       min(tmp->vm_end, start + size));
+               }
+       }
+
+       file = get_file(vma->vm_file);
+       ret = do_mmap_pgoff(vma->vm_file, start, size,
+                       prot, flags, pgoff, &populate);
+       fput(file);
+out:
+       up_write(&mm->mmap_sem);
+       if (populate)
+               mm_populate(ret, populate);
+       if (!IS_ERR_VALUE(ret))
+               ret = 0;
+       return ret;
+}
+
 static inline void verify_mm_writelocked(struct mm_struct *mm)
 {
 #ifdef CONFIG_DEBUG_VM
@@ -2615,10 +2763,10 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
  */
 static unsigned long do_brk(unsigned long addr, unsigned long len)
 {
-       struct mm_struct * mm = current->mm;
-       struct vm_area_struct * vma, * prev;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma, *prev;
        unsigned long flags;
-       struct rb_node ** rb_link, * rb_parent;
+       struct rb_node **rb_link, *rb_parent;
        pgoff_t pgoff = addr >> PAGE_SHIFT;
        int error;
 
@@ -2629,21 +2777,12 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 
        error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
-       if (error & ~PAGE_MASK)
+       if (offset_in_page(error))
                return error;
 
-       /*
-        * mlock MCL_FUTURE?
-        */
-       if (mm->def_flags & VM_LOCKED) {
-               unsigned long locked, lock_limit;
-               locked = len >> PAGE_SHIFT;
-               locked += mm->locked_vm;
-               lock_limit = rlimit(RLIMIT_MEMLOCK);
-               lock_limit >>= PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       return -EAGAIN;
-       }
+       error = mlock_future_check(mm, mm->def_flags, len);
+       if (error)
+               return error;
 
        /*
         * mm->mmap_sem is required to protect against another thread
@@ -2654,11 +2793,10 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
        /*
         * Clear old maps.  this also does some error checking for us
         */
- munmap_back:
-       if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
+       while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
+                             &rb_parent)) {
                if (do_munmap(mm, addr, len))
                        return -ENOMEM;
-               goto munmap_back;
        }
 
        /* Check against address space limits *after* clearing old maps... */
@@ -2673,7 +2811,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
        /* Can we just expand an old private anonymous mapping? */
        vma = vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL, NULL);
+                       NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
        if (vma)
                goto out;
 
@@ -2699,6 +2837,7 @@ out:
        mm->total_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
+       vma->vm_flags |= VM_SOFTDIRTY;
        return addr;
 }
 
@@ -2763,19 +2902,24 @@ void exit_mmap(struct mm_struct *mm)
                vma = remove_vma(vma);
        }
        vm_unacct_memory(nr_accounted);
-
-       WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_mutex is taken here.
+ * then i_mmap_rwsem is taken here.
  */
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
 
+       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
+                          &prev, &rb_link, &rb_parent))
+               return -ENOMEM;
+       if ((vma->vm_flags & VM_ACCOUNT) &&
+            security_vm_enough_memory_mm(mm, vma_pages(vma)))
+               return -ENOMEM;
+
        /*
         * The vm_pgoff of a purely anonymous vma should be irrelevant
         * until its first write fault, when page's anon_vma and index
@@ -2788,16 +2932,10 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
         * using the existing file pgoff checks and manipulations.
         * Similarly in do_mmap_pgoff and in do_brk.
         */
-       if (!vma->vm_file) {
+       if (vma_is_anonymous(vma)) {
                BUG_ON(vma->anon_vma);
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
-       if (find_vma_links(mm, vma->vm_start, vma->vm_end,
-                          &prev, &rb_link, &rb_parent))
-               return -ENOMEM;
-       if ((vma->vm_flags & VM_ACCOUNT) &&
-            security_vm_enough_memory_mm(mm, vma_pages(vma)))
-               return -ENOMEM;
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
        return 0;
@@ -2816,14 +2954,13 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma, *prev;
        struct rb_node **rb_link, *rb_parent;
-       struct mempolicy *pol;
        bool faulted_in_anon_vma = true;
 
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
         */
-       if (unlikely(!vma->vm_file && !vma->anon_vma)) {
+       if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
                pgoff = addr >> PAGE_SHIFT;
                faulted_in_anon_vma = false;
        }
@@ -2831,8 +2968,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                return NULL;    /* should never get here */
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-                       vma_get_anon_name(vma));
+                           vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                           vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
        if (new_vma) {
                /*
                 * Source vma may have been merged into new_vma
@@ -2851,38 +2988,37 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                         * safe. It is only safe to keep the vm_pgoff
                         * linear if there are no pages mapped yet.
                         */
-                       VM_BUG_ON(faulted_in_anon_vma);
+                       VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
                        *vmap = vma = new_vma;
                }
                *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
        } else {
                new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
-               if (new_vma) {
-                       *new_vma = *vma;
-                       new_vma->vm_start = addr;
-                       new_vma->vm_end = addr + len;
-                       new_vma->vm_pgoff = pgoff;
-                       pol = mpol_dup(vma_policy(vma));
-                       if (IS_ERR(pol))
-                               goto out_free_vma;
-                       vma_set_policy(new_vma, pol);
-                       INIT_LIST_HEAD(&new_vma->anon_vma_chain);
-                       if (anon_vma_clone(new_vma, vma))
-                               goto out_free_mempol;
-                       if (new_vma->vm_file)
-                               get_file(new_vma->vm_file);
-                       if (new_vma->vm_ops && new_vma->vm_ops->open)
-                               new_vma->vm_ops->open(new_vma);
-                       vma_link(mm, new_vma, prev, rb_link, rb_parent);
-                       *need_rmap_locks = false;
-               }
+               if (!new_vma)
+                       goto out;
+               *new_vma = *vma;
+               new_vma->vm_start = addr;
+               new_vma->vm_end = addr + len;
+               new_vma->vm_pgoff = pgoff;
+               if (vma_dup_policy(vma, new_vma))
+                       goto out_free_vma;
+               INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+               if (anon_vma_clone(new_vma, vma))
+                       goto out_free_mempol;
+               if (new_vma->vm_file)
+                       get_file(new_vma->vm_file);
+               if (new_vma->vm_ops && new_vma->vm_ops->open)
+                       new_vma->vm_ops->open(new_vma);
+               vma_link(mm, new_vma, prev, rb_link, rb_parent);
+               *need_rmap_locks = false;
        }
        return new_vma;
 
- out_free_mempol:
-       mpol_put(pol);
- out_free_vma:
+out_free_mempol:
+       mpol_put(vma_policy(new_vma));
+out_free_vma:
        kmem_cache_free(vm_area_cachep, new_vma);
+out:
        return NULL;
 }
 
@@ -2902,6 +3038,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
        return 1;
 }
 
+static int special_mapping_fault(struct vm_area_struct *vma,
+                                struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+       return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+       .close = special_mapping_close,
+       .fault = special_mapping_fault,
+       .name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+       .close = special_mapping_close,
+       .fault = special_mapping_fault,
+};
 
 static int special_mapping_fault(struct vm_area_struct *vma,
                                struct vm_fault *vmf)
@@ -2909,15 +3070,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        pgoff_t pgoff;
        struct page **pages;
 
-       /*
-        * special mappings have no vm_file, and in that case, the mm
-        * uses vm_pgoff internally. So we have to subtract it from here.
-        * We are allowed to do this because we are the mm; do not copy
-        * this code into drivers!
-        */
-       pgoff = vmf->pgoff - vma->vm_pgoff;
+       if (vma->vm_ops == &legacy_special_mapping_vmops)
+               pages = vma->vm_private_data;
+       else
+               pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+                       pages;
 
-       for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+       for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                pgoff--;
 
        if (*pages) {
@@ -2930,48 +3089,29 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        return VM_FAULT_SIGBUS;
 }
 
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
-       .close = special_mapping_close,
-       .fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-int install_special_mapping(struct mm_struct *mm,
-                           unsigned long addr, unsigned long len,
-                           unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+       struct mm_struct *mm,
+       unsigned long addr, unsigned long len,
+       unsigned long vm_flags, void *priv,
+       const struct vm_operations_struct *ops)
 {
        int ret;
        struct vm_area_struct *vma;
 
        vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
        if (unlikely(vma == NULL))
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&vma->anon_vma_chain);
        vma->vm_mm = mm;
        vma->vm_start = addr;
        vma->vm_end = addr + len;
 
-       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
-       vma->vm_ops = &special_mapping_vmops;
-       vma->vm_private_data = pages;
+       vma->vm_ops = ops;
+       vma->vm_private_data = priv;
 
        ret = insert_vm_struct(mm, vma);
        if (ret)
@@ -2981,11 +3121,40 @@ int install_special_mapping(struct mm_struct *mm,
 
        perf_event_mmap(vma);
 
-       return 0;
+       return vma;
 
 out:
        kmem_cache_free(vm_area_cachep, vma);
-       return ret;
+       return ERR_PTR(ret);
+}
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+       struct mm_struct *mm,
+       unsigned long addr, unsigned long len,
+       unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+       return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
+                                       &special_mapping_vmops);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+                           unsigned long addr, unsigned long len,
+                           unsigned long vm_flags, struct page **pages)
+{
+       struct vm_area_struct *vma = __install_special_mapping(
+               mm, addr, len, vm_flags, (void *)pages,
+               &legacy_special_mapping_vmops);
+
+       return PTR_ERR_OR_ZERO(vma);
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
@@ -3027,7 +3196,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                 */
                if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                        BUG();
-               mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
+               down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
        }
 }
 
@@ -3042,8 +3211,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  *
  * mmap_sem in write mode is required in order to block all operations
  * that could modify pagetables and free pages without need of
- * altering the vma layout (for example populate_range() with
- * nonlinear vmas). It's also needed in write mode to avoid new
+ * altering the vma layout. It's also needed in write mode to avoid new
  * anon_vmas to be associated with existing vmas.
  *
  * A single task can't take more than one mm_take_all_locks() in a row
@@ -3054,7 +3222,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
+ * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
@@ -3123,7 +3291,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
                 * AS_MM_ALL_LOCKS can't change to 0 from under us
                 * because we hold the mm_all_locks_mutex.
                 */
-               mutex_unlock(&mapping->i_mmap_mutex);
+               i_mmap_unlock_write(mapping);
                if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
                                        &mapping->flags))
                        BUG();
@@ -3160,7 +3328,7 @@ void __init mmap_init(void)
 {
        int ret;
 
-       ret = percpu_counter_init(&vm_committed_as, 0);
+       ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
        VM_BUG_ON(ret);
 }
 
@@ -3183,7 +3351,7 @@ static int init_user_reserve(void)
        sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
        return 0;
 }
-module_init(init_user_reserve)
+subsys_initcall(init_user_reserve);
 
 /*
  * Initialise sysctl_admin_reserve_kbytes.
@@ -3204,7 +3372,7 @@ static int init_admin_reserve(void)
        sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
        return 0;
 }
-module_init(init_admin_reserve)
+subsys_initcall(init_admin_reserve);
 
 /*
  * Reinititalise user and admin reserves if memory is added or removed.
@@ -3270,8 +3438,8 @@ static struct notifier_block reserve_mem_nb = {
 static int __meminit init_reserve_notifier(void)
 {
        if (register_hotmemory_notifier(&reserve_mem_nb))
-               printk("Failed registering memory add/remove notifier for admin reserve");
+               pr_err("Failed registering memory add/remove notifier for admin reserve\n");
 
        return 0;
 }
-module_init(init_reserve_notifier)
+subsys_initcall(init_reserve_notifier);