tmpfs: support fallocate FALLOC_FL_PUNCH_HOLE
authorHugh Dickins <hughd@google.com>
Tue, 29 May 2012 22:06:40 +0000 (15:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 May 2012 23:22:22 +0000 (16:22 -0700)
tmpfs has supported hole-punching since 2.6.16, via
madvise(,,MADV_REMOVE).

But nowadays fallocate(,FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,,) is
the agreed way to punch holes.

So add shmem_fallocate() to support that, and tweak shmem_truncate_range()
to support partial pages at both the beginning and end of range (never
needed for madvise, which demands rounded addr and rounds up length).

Based-on-patch-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Cong Wang <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/shmem.c

index 45c26476f0fca9c3a94642d4c07e847379365765..7e54ff1c63e1172269fa1db2a4676c0f00903470 100644 (file)
@@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/blkdev.h>
 #include <linux/pagevec.h>
 #include <linux/percpu_counter.h>
+#include <linux/falloc.h>
 #include <linux/splice.h>
 #include <linux/security.h>
 #include <linux/swapops.h>
@@ -432,21 +433,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
        struct address_space *mapping = inode->i_mapping;
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
-       pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
+       pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
+       unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
+       unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
        struct pagevec pvec;
        pgoff_t indices[PAGEVEC_SIZE];
        long nr_swaps_freed = 0;
        pgoff_t index;
        int i;
 
-       BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
+       if (lend == -1)
+               end = -1;       /* unsigned, so actually very big */
 
        pagevec_init(&pvec, 0);
        index = start;
-       while (index <= end) {
+       while (index < end) {
                pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
                                                        pvec.pages, indices);
                if (!pvec.nr)
                        break;
@@ -455,7 +458,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index > end)
+                       if (index >= end)
                                break;
 
                        if (radix_tree_exceptional_entry(page)) {
@@ -479,22 +482,39 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
                index++;
        }
 
-       if (partial) {
+       if (partial_start) {
                struct page *page = NULL;
                shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
                if (page) {
-                       zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+                       unsigned int top = PAGE_CACHE_SIZE;
+                       if (start > end) {
+                               top = partial_end;
+                               partial_end = 0;
+                       }
+                       zero_user_segment(page, partial_start, top);
+                       set_page_dirty(page);
+                       unlock_page(page);
+                       page_cache_release(page);
+               }
+       }
+       if (partial_end) {
+               struct page *page = NULL;
+               shmem_getpage(inode, end, &page, SGP_READ, NULL);
+               if (page) {
+                       zero_user_segment(page, 0, partial_end);
                        set_page_dirty(page);
                        unlock_page(page);
                        page_cache_release(page);
                }
        }
+       if (start >= end)
+               return;
 
        index = start;
        for ( ; ; ) {
                cond_resched();
                pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
                                                        pvec.pages, indices);
                if (!pvec.nr) {
                        if (index == start)
@@ -502,7 +522,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
                        index = start;
                        continue;
                }
-               if (index == start && indices[0] > end) {
+               if (index == start && indices[0] >= end) {
                        shmem_deswap_pagevec(&pvec);
                        pagevec_release(&pvec);
                        break;
@@ -512,7 +532,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index > end)
+                       if (index >= end)
                                break;
 
                        if (radix_tree_exceptional_entry(page)) {
@@ -1578,6 +1598,31 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
        return error;
 }
 
+static long shmem_fallocate(struct file *file, int mode, loff_t offset,
+                                                        loff_t len)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       int error = -EOPNOTSUPP;
+
+       mutex_lock(&inode->i_mutex);
+
+       if (mode & FALLOC_FL_PUNCH_HOLE) {
+               struct address_space *mapping = file->f_mapping;
+               loff_t unmap_start = round_up(offset, PAGE_SIZE);
+               loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+
+               if ((u64)unmap_end > (u64)unmap_start)
+                       unmap_mapping_range(mapping, unmap_start,
+                                           1 + unmap_end - unmap_start, 0);
+               shmem_truncate_range(inode, offset, offset + len - 1);
+               /* No need to unmap again: hole-punching leaves COWed pages */
+               error = 0;
+       }
+
+       mutex_unlock(&inode->i_mutex);
+       return error;
+}
+
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2490,6 +2535,7 @@ static const struct file_operations shmem_file_operations = {
        .fsync          = noop_fsync,
        .splice_read    = shmem_file_splice_read,
        .splice_write   = generic_file_splice_write,
+       .fallocate      = shmem_fallocate,
 #endif
 };