mm: add a field to store names for private anonymous memory
authorColin Cross <ccross@android.com>
Thu, 27 Jun 2013 00:26:01 +0000 (17:26 -0700)
committerColin Cross <ccross@android.com>
Thu, 19 Sep 2013 19:14:28 +0000 (14:14 -0500)
Userspace processes often have multiple allocators that each do
anonymous mmaps to get memory.  When examining memory usage of
individual processes or systems as a whole, it is useful to be
able to break down the various heaps that were allocated by
each layer and examine their size, RSS, and physical memory
usage.

This patch adds a user pointer to the shared union in
vm_area_struct that points to a null terminated string inside
the user process containing a name for the vma.  vmas that
point to the same address will be merged, but vmas that
point to equivalent strings at different addresses will
not be merged.

Userspace can set the name for a region of memory by calling
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, start, len, (unsigned long)name);
Setting the name to NULL clears it.

The names of named anonymous vmas are shown in /proc/pid/maps
as [anon:<name>] and in /proc/pid/smaps in a new "Name" field
that is only present for named vmas.  If the userspace pointer
is no longer valid all or part of the name will be replaced
with "<fault>".

The idea to store a userspace pointer to reduce the complexity
within mm (at the expense of the complexity of reading
/proc/pid/mem) came from Dave Hansen.  This results in no
runtime overhead in the mm subsystem other than comparing
the anon_name pointers when considering vma merging.  The pointer
is stored in a union with fieds that are only used on file-backed
mappings, so it does not increase memory usage.

Change-Id: Ie2ffc0967d4ffe7ee4c70781313c7b00cf7e3092
Signed-off-by: Colin Cross <ccross@android.com>
Documentation/filesystems/proc.txt
fs/proc/task_mmu.c
include/linux/mm.h
include/linux/mm_types.h
include/uapi/linux/prctl.h
kernel/sys.c
mm/madvise.c
mm/mempolicy.c
mm/mlock.c
mm/mmap.c
mm/mprotect.c

index fd8d0d594fc7c9fd6bd1a5baa107968a9c9fab01..e0eb9d287312edde869c2f16d21a436f3afd1ae7 100644 (file)
@@ -369,6 +369,8 @@ is not associated with a file:
  [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [anon:<name>]            = an anonymous mapping that has been
+                            named by userspace
 
  or if empty, the mapping is anonymous.
 
@@ -419,6 +421,7 @@ KernelPageSize:        4 kB
 MMUPageSize:           4 kB
 Locked:              374 kB
 VmFlags: rd ex mr mw me de
+Name:           name from userspace
 
 the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -469,6 +472,9 @@ Note that there is no guarantee that every flag and associated mnemonic will
 be present in all further kernel releases. Things get changed, the flags may
 be vanished or the reverse -- new added.
 
+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
index 3e636d864d5666aa2b7a58a5f61af214e6d30832..e2c925fa782714943f1c83d937f2427d77443e82 100644 (file)
@@ -134,6 +134,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
 }
 #endif
 
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+       const char __user *name = vma_get_anon_name(vma);
+       struct mm_struct *mm = vma->vm_mm;
+
+       unsigned long page_start_vaddr;
+       unsigned long page_offset;
+       unsigned long num_pages;
+       unsigned long max_len = NAME_MAX;
+       int i;
+
+       page_start_vaddr = (unsigned long)name & PAGE_MASK;
+       page_offset = (unsigned long)name - page_start_vaddr;
+       num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+       seq_puts(m, "[anon:");
+
+       for (i = 0; i < num_pages; i++) {
+               int len;
+               int write_len;
+               const char *kaddr;
+               long pages_pinned;
+               struct page *page;
+
+               pages_pinned = get_user_pages(current, mm, page_start_vaddr,
+                               1, 0, 0, &page, NULL);
+               if (pages_pinned < 1) {
+                       seq_puts(m, "<fault>]");
+                       return;
+               }
+
+               kaddr = (const char *)kmap(page);
+               len = min(max_len, PAGE_SIZE - page_offset);
+               write_len = strnlen(kaddr + page_offset, len);
+               seq_write(m, kaddr + page_offset, write_len);
+               kunmap(page);
+               put_page(page);
+
+               /* if strnlen hit a null terminator then we're done */
+               if (write_len != len)
+                       break;
+
+               max_len -= len;
+               page_offset = 0;
+               page_start_vaddr += PAGE_SIZE;
+       }
+
+       seq_putc(m, ']');
+}
+
 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
 {
        if (vma && vma != priv->tail_vma) {
@@ -335,6 +385,12 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                                pad_len_spaces(m, len);
                                seq_printf(m, "[stack:%d]", tid);
                        }
+                       goto done;
+               }
+
+               if (vma_get_anon_name(vma)) {
+                       pad_len_spaces(m, len);
+                       seq_print_vma_name(m, vma);
                }
        }
 
@@ -634,6 +690,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 
        show_smap_vma_flags(m, vma);
 
+       if (vma_get_anon_name(vma)) {
+               seq_puts(m, "Name:           ");
+               seq_print_vma_name(m, vma);
+               seq_putc(m, '\n');
+       }
+
        if (m->count < m->size)  /* vma is copied successfully */
                m->version = (vma != get_gate_vma(task->mm))
                        ? vma->vm_start : 0;
index bd5679ddcd3fb1cf6e652ce8c45c32a9897ee2c0..01eb01df92251a6d882e5c7c222d17cd57558901 100644 (file)
@@ -1486,7 +1486,7 @@ extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
        struct vm_area_struct *prev, unsigned long addr, unsigned long end,
        unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-       struct mempolicy *);
+       struct mempolicy *, const char __user *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int split_vma(struct mm_struct *,
        struct vm_area_struct *, unsigned long addr, int new_below);
index ace9a5f01c64fe1a8ae9d97fe45765ddc375df58..875ba48dd72b6e818accf82490106faaf5071665 100644 (file)
@@ -255,6 +255,10 @@ struct vm_area_struct {
         * For areas with an address space and backing store,
         * linkage into the address_space->i_mmap interval tree, or
         * linkage of vma in the address_space->i_mmap_nonlinear list.
+        *
+        * For private anonymous mappings, a pointer to a null terminated string
+        * in the user process containing the name given to the vma, or NULL
+        * if unnamed.
         */
        union {
                struct {
@@ -262,6 +266,7 @@ struct vm_area_struct {
                        unsigned long rb_subtree_last;
                } linear;
                struct list_head nonlinear;
+               const char __user *anon_name;
        } shared;
 
        /*
@@ -456,4 +461,14 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
        return mm->cpu_vm_mask_var;
 }
 
+
+/* Return the name for an anonymous mapping or NULL for a file-backed mapping */
+static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
+{
+       if (vma->vm_file)
+               return NULL;
+
+       return vma->shared.anon_name;
+}
+
 #endif /* _LINUX_MM_TYPES_H */
index 289760f424aaa3247d2e4f66841334c67295c9ef..253856a2a8ad07d720087e01a1d5e45c4420a1d9 100644 (file)
 
 #define PR_GET_TID_ADDRESS     40
 
+#define PR_SET_VMA             0x53564d41
+# define PR_SET_VMA_ANON_NAME          0
+
 #endif /* _LINUX_PRCTL_H */
index 2bbd9a73b54c27b0e75eb651d931f0987ff870ed..1c9090bc674607cd8a8276f996e241e4457757d1 100644 (file)
@@ -42,6 +42,8 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2099,6 +2101,146 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
+
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+               struct vm_area_struct **prev,
+               unsigned long start, unsigned long end,
+               const char __user *name_addr)
+{
+       struct mm_struct * mm = vma->vm_mm;
+       int error = 0;
+       pgoff_t pgoff;
+
+       if (name_addr == vma_get_anon_name(vma)) {
+               *prev = vma;
+               goto out;
+       }
+
+       pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+       *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+                               vma->vm_file, pgoff, vma_policy(vma),
+                               name_addr);
+       if (*prev) {
+               vma = *prev;
+               goto success;
+       }
+
+       *prev = vma;
+
+       if (start != vma->vm_start) {
+               error = split_vma(mm, vma, start, 1);
+               if (error)
+                       goto out;
+       }
+
+       if (end != vma->vm_end) {
+               error = split_vma(mm, vma, end, 0);
+               if (error)
+                       goto out;
+       }
+
+success:
+       if (!vma->vm_file)
+               vma->shared.anon_name = name_addr;
+
+out:
+       if (error == -ENOMEM)
+               error = -EAGAIN;
+       return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+                       unsigned long arg)
+{
+       unsigned long tmp;
+       struct vm_area_struct * vma, *prev;
+       int unmapped_error = 0;
+       int error = -EINVAL;
+
+       /*
+        * If the interval [start,end) covers some unmapped address
+        * ranges, just ignore them, but return -ENOMEM at the end.
+        * - this matches the handling in madvise.
+        */
+       vma = find_vma_prev(current->mm, start, &prev);
+       if (vma && start > vma->vm_start)
+               prev = vma;
+
+       for (;;) {
+               /* Still start < end. */
+               error = -ENOMEM;
+               if (!vma)
+                       return error;
+
+               /* Here start < (end|vma->vm_end). */
+               if (start < vma->vm_start) {
+                       unmapped_error = -ENOMEM;
+                       start = vma->vm_start;
+                       if (start >= end)
+                               return error;
+               }
+
+               /* Here vma->vm_start <= start < (end|vma->vm_end) */
+               tmp = vma->vm_end;
+               if (end < tmp)
+                       tmp = end;
+
+               /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+               error = prctl_update_vma_anon_name(vma, &prev, start, end,
+                               (const char __user *)arg);
+               if (error)
+                       return error;
+               start = tmp;
+               if (prev && start < prev->vm_end)
+                       start = prev->vm_end;
+               error = unmapped_error;
+               if (start >= end)
+                       return error;
+               if (prev)
+                       vma = prev->vm_next;
+               else    /* madvise_remove dropped mmap_sem */
+                       vma = find_vma(current->mm, start);
+       }
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+               unsigned long len_in, unsigned long arg)
+{
+       struct mm_struct *mm = current->mm;
+       int error;
+       unsigned long len;
+       unsigned long end;
+
+       if (start & ~PAGE_MASK)
+               return -EINVAL;
+       len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+       /* Check to see whether len was rounded up from small -ve to zero */
+       if (len_in && !len)
+               return -EINVAL;
+
+       end = start + len;
+       if (end < start)
+               return -EINVAL;
+
+       if (end == start)
+               return 0;
+
+       down_write(&mm->mmap_sem);
+
+       switch (opt) {
+       case PR_SET_VMA_ANON_NAME:
+               error = prctl_set_vma_anon_name(start, end, arg);
+               break;
+       default:
+               error = -EINVAL;
+       }
+
+       up_write(&mm->mmap_sem);
+
+       return error;
+}
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                unsigned long, arg4, unsigned long, arg5)
 {
@@ -2226,6 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                        else
                                return -EINVAL;
                        break;
+               case PR_SET_VMA:
+                       error = prctl_set_vma(arg2, arg3, arg4, arg5);
+                       break;
                default:
                        return -EINVAL;
                }
index 7055883e6e250f2914ed5c0a7814400e4945c9d1..85d1a5427c27faeff4f4d8d3b52cba058685022f 100644 (file)
@@ -102,7 +102,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
 
        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
        *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
-                               vma->vm_file, pgoff, vma_policy(vma));
+                               vma->vm_file, pgoff, vma_policy(vma),
+                               vma_get_anon_name(vma));
        if (*prev) {
                vma = *prev;
                goto success;
index 74310017296ee02317b79a2c28b25ef303fc7720..7b349aafe79d2f78c3c387c50a573625533502d7 100644 (file)
@@ -728,7 +728,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
                        ((vmstart - vma->vm_start) >> PAGE_SHIFT);
                prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
                                  vma->anon_vma, vma->vm_file, pgoff,
-                                 new_pol);
+                                 new_pol, vma_get_anon_name(name));
                if (prev) {
                        vma = prev;
                        next = vma->vm_next;
index 79b7cf7d1bca72cee9babfb60e21a38799c8eba1..33861c7800702df6887ff097982ccfb58837ad10 100644 (file)
@@ -287,7 +287,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 
        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
        *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
-                         vma->vm_file, pgoff, vma_policy(vma));
+                         vma->vm_file, pgoff, vma_policy(vma),
+                         vma_get_anon_name(vma));
        if (*prev) {
                vma = *prev;
                goto success;
index f681e1842fadc1ccd8d7188a7ca1688c73548111..25abb881c9e989e0deb7684a216cd1fcd8c87615 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -893,7 +893,8 @@ again:                      remove_next = 1 + (end > next->vm_end);
  * per-vma resources, so we don't attempt to merge those.
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
-                       struct file *file, unsigned long vm_flags)
+                       struct file *file, unsigned long vm_flags,
+                       const char __user *anon_name)
 {
        if (vma->vm_flags ^ vm_flags)
                return 0;
@@ -901,6 +902,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
                return 0;
        if (vma->vm_ops && vma->vm_ops->close)
                return 0;
+       if (vma_get_anon_name(vma) != anon_name)
+               return 0;
        return 1;
 }
 
@@ -931,9 +934,10 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  */
 static int
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+       const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                if (vma->vm_pgoff == vm_pgoff)
                        return 1;
@@ -950,9 +954,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  */
 static int
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+       struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+       const char __user *anon_name)
 {
-       if (is_mergeable_vma(vma, file, vm_flags) &&
+       if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
            is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
                pgoff_t vm_pglen;
                vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -963,9 +968,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 }
 
 /*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor.  Or both (it neatly fills a hole).
  *
  * In most cases - when called for mmap, brk or mremap - [addr,end) is
  * certain not to be mapped by the time vma_merge is called; but when
@@ -995,7 +1000,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct vm_area_struct *prev, unsigned long addr,
                        unsigned long end, unsigned long vm_flags,
                        struct anon_vma *anon_vma, struct file *file,
-                       pgoff_t pgoff, struct mempolicy *policy)
+                       pgoff_t pgoff, struct mempolicy *policy,
+                       const char __user *anon_name)
 {
        pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
        struct vm_area_struct *area, *next;
@@ -1021,15 +1027,15 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         */
        if (prev && prev->vm_end == addr &&
                        mpol_equal(vma_policy(prev), policy) &&
-                       can_vma_merge_after(prev, vm_flags,
-                                               anon_vma, file, pgoff)) {
+                       can_vma_merge_after(prev, vm_flags, anon_vma,
+                                               file, pgoff, anon_name)) {
                /*
                 * OK, it can.  Can we now merge in the successor as well?
                 */
                if (next && end == next->vm_start &&
                                mpol_equal(policy, vma_policy(next)) &&
-                               can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen) &&
+                               can_vma_merge_before(next, vm_flags, anon_vma,
+                                               file, pgoff+pglen, anon_name) &&
                                is_mergeable_anon_vma(prev->anon_vma,
                                                      next->anon_vma, NULL)) {
                                                        /* cases 1, 6 */
@@ -1049,8 +1055,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
         */
        if (next && end == next->vm_start &&
                        mpol_equal(policy, vma_policy(next)) &&
-                       can_vma_merge_before(next, vm_flags,
-                                       anon_vma, file, pgoff+pglen)) {
+                       can_vma_merge_before(next, vm_flags, anon_vma,
+                                       file, pgoff+pglen, anon_name)) {
                if (prev && addr < prev->vm_end)        /* case 4 */
                        err = vma_adjust(prev, prev->vm_start,
                                addr, prev->vm_pgoff, NULL);
@@ -1519,7 +1525,8 @@ munmap_back:
        /*
         * Can we just expand an old mapping?
         */
-       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
+       vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
+                       NULL, NULL);
        if (vma)
                goto out;
 
@@ -2663,7 +2670,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
        /* Can we just expand an old private anonymous mapping? */
        vma = vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL);
+                                       NULL, NULL, pgoff, NULL, NULL);
        if (vma)
                goto out;
 
@@ -2821,7 +2828,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                return NULL;    /* should never get here */
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                       vma_get_anon_name(vma));
        if (new_vma) {
                /*
                 * Source vma may have been merged into new_vma
index 94722a4d6b438311de1d1690d81a0d598a907339..94d50b7a6dad262fc7ffda63957d7bb304db811b 100644 (file)
@@ -271,7 +271,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
         */
        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
        *pprev = vma_merge(mm, *pprev, start, end, newflags,
-                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+                       vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+                       vma_get_anon_name(vma));
        if (*pprev) {
                vma = *pprev;
                goto success;