drm/rockchip: vop: fixup plane zpos
[firefly-linux-kernel-4.4.55.git] / fs / ext4 / file.c
index b1b4d51b5d86b4e54c179ddce5f5b574238b3629..0d24ebcd7c9e535dc3126d5c10b18c745aef8a2e 100644 (file)
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
-#include <linux/aio.h>
+#include <linux/dax.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
        return 0;
 }
 
-void ext4_unwritten_wait(struct inode *inode)
+static void ext4_unwritten_wait(struct inode *inode)
 {
        wait_queue_head_t *wq = ext4_ioend_wq(inode);
 
@@ -74,144 +74,286 @@ void ext4_unwritten_wait(struct inode *inode)
  * or one thread will zero the other's data, causing corruption.
  */
 static int
-ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
-                  unsigned long nr_segs, loff_t pos)
+ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
 {
        struct super_block *sb = inode->i_sb;
        int blockmask = sb->s_blocksize - 1;
-       size_t count = iov_length(iov, nr_segs);
-       loff_t final_size = pos + count;
 
-       if (pos >= inode->i_size)
+       if (pos >= i_size_read(inode))
                return 0;
 
-       if ((pos & blockmask) || (final_size & blockmask))
+       if ((pos | iov_iter_alignment(from)) & blockmask)
                return 1;
 
        return 0;
 }
 
 static ssize_t
-ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
-                   unsigned long nr_segs, loff_t pos)
+ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct mutex *aio_mutex = NULL;
        struct blk_plug plug;
-       int unaligned_aio = 0;
-       ssize_t ret;
+       int o_direct = iocb->ki_flags & IOCB_DIRECT;
        int overwrite = 0;
-       size_t length = iov_length(iov, nr_segs);
-
-       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
-           !is_sync_kiocb(iocb))
-               unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
+       ssize_t ret;
 
-       /* Unaligned direct AIO must be serialized; see comment above */
-       if (unaligned_aio) {
-               mutex_lock(ext4_aio_mutex(inode));
+       /*
+        * Unaligned direct AIO must be serialized; see comment above
+        * In the case of O_APPEND, assume that we must always serialize
+        */
+       if (o_direct &&
+           ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+           !is_sync_kiocb(iocb) &&
+           (iocb->ki_flags & IOCB_APPEND ||
+            ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
+               aio_mutex = ext4_aio_mutex(inode);
+               mutex_lock(aio_mutex);
                ext4_unwritten_wait(inode);
        }
 
-       BUG_ON(iocb->ki_pos != pos);
-
        mutex_lock(&inode->i_mutex);
-       blk_start_plug(&plug);
-
-       iocb->private = &overwrite;
+       ret = generic_write_checks(iocb, from);
+       if (ret <= 0)
+               goto out;
 
-       /* check whether we do a DIO overwrite or not */
-       if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
-           !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
-               struct ext4_map_blocks map;
-               unsigned int blkbits = inode->i_blkbits;
-               int err, len;
+       /*
+        * If we have encountered a bitmap-format file, the size limit
+        * is smaller than s_maxbytes, which is for extent-mapped files.
+        */
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 
-               map.m_lblk = pos >> blkbits;
-               map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
-                       - map.m_lblk;
-               len = map.m_len;
+               if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
+                       ret = -EFBIG;
+                       goto out;
+               }
+               iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
+       }
 
-               err = ext4_map_blocks(NULL, inode, &map, 0);
-               /*
-                * 'err==len' means that all of blocks has been preallocated no
-                * matter they are initialized or not.  For excluding
-                * uninitialized extents, we need to check m_flags.  There are
-                * two conditions that indicate for initialized extents.
-                * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
-                * 2) If we do a real lookup, non-flags are returned.
-                * So we should check these two conditions.
-                */
-               if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
-                       overwrite = 1;
+       iocb->private = &overwrite;
+       if (o_direct) {
+               size_t length = iov_iter_count(from);
+               loff_t pos = iocb->ki_pos;
+               blk_start_plug(&plug);
+
+               /* check whether we do a DIO overwrite or not */
+               if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
+                   !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
+                       struct ext4_map_blocks map;
+                       unsigned int blkbits = inode->i_blkbits;
+                       int err, len;
+
+                       map.m_lblk = pos >> blkbits;
+                       map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
+                               - map.m_lblk;
+                       len = map.m_len;
+
+                       err = ext4_map_blocks(NULL, inode, &map, 0);
+                       /*
+                        * 'err==len' means that all of blocks has
+                        * been preallocated no matter they are
+                        * initialized or not.  For excluding
+                        * unwritten extents, we need to check
+                        * m_flags.  There are two conditions that
+                        * indicate for initialized extents.  1) If we
+                        * hit extent cache, EXT4_MAP_MAPPED flag is
+                        * returned; 2) If we do a real lookup,
+                        * non-flags are returned.  So we should check
+                        * these two conditions.
+                        */
+                       if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
+                               overwrite = 1;
+               }
        }
 
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);
 
-       if (ret > 0 || ret == -EIOCBQUEUED) {
+       if (ret > 0) {
                ssize_t err;
 
-               err = generic_write_sync(file, pos, ret);
-               if (err < 0 && ret > 0)
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+               if (err < 0)
                        ret = err;
        }
-       blk_finish_plug(&plug);
+       if (o_direct)
+               blk_finish_plug(&plug);
 
-       if (unaligned_aio)
-               mutex_unlock(ext4_aio_mutex(inode));
+       if (aio_mutex)
+               mutex_unlock(aio_mutex);
+       return ret;
 
+out:
+       mutex_unlock(&inode->i_mutex);
+       if (aio_mutex)
+               mutex_unlock(aio_mutex);
        return ret;
 }
 
-static ssize_t
-ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+#ifdef CONFIG_FS_DAX
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 {
-       struct inode *inode = file_inode(iocb->ki_filp);
-       ssize_t ret;
-
-       /*
-        * If we have encountered a bitmap-format file, the size limit
-        * is smaller than s_maxbytes, which is for extent-mapped files.
-        */
+       struct inode *inode = bh->b_assoc_map->host;
+       /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
+       loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+       int err;
+       if (!uptodate)
+               return;
+       WARN_ON(!buffer_unwritten(bh));
+       err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
 
-       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
-               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-               size_t length = iov_length(iov, nr_segs);
+static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       int result;
+       handle_t *handle = NULL;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+       if (write) {
+               sb_start_pagefault(sb);
+               file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
+               handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+                                               EXT4_DATA_TRANS_BLOCKS(sb));
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
+
+       if (IS_ERR(handle))
+               result = VM_FAULT_SIGBUS;
+       else
+               result = __dax_fault(vma, vmf, ext4_get_block_dax,
+                                               ext4_end_io_unwritten);
 
-               if ((pos > sbi->s_bitmap_maxbytes ||
-                   (pos == sbi->s_bitmap_maxbytes && length > 0)))
-                       return -EFBIG;
+       if (write) {
+               if (!IS_ERR(handle))
+                       ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
+               sb_end_pagefault(sb);
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
 
-               if (pos + length > sbi->s_bitmap_maxbytes) {
-                       nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
-                                             sbi->s_bitmap_maxbytes - pos);
-               }
-       }
+       return result;
+}
 
-       if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
-               ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
+static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+                                               pmd_t *pmd, unsigned int flags)
+{
+       int result;
+       handle_t *handle = NULL;
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       bool write = flags & FAULT_FLAG_WRITE;
+
+       if (write) {
+               sb_start_pagefault(sb);
+               file_update_time(vma->vm_file);
+               down_read(&EXT4_I(inode)->i_mmap_sem);
+               handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+                               ext4_chunk_trans_blocks(inode,
+                                                       PMD_SIZE / PAGE_SIZE));
+       } else
+               down_read(&EXT4_I(inode)->i_mmap_sem);
+
+       if (IS_ERR(handle))
+               result = VM_FAULT_SIGBUS;
        else
-               ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+               result = __dax_pmd_fault(vma, addr, pmd, flags,
+                               ext4_get_block_dax, ext4_end_io_unwritten);
+
+       if (write) {
+               if (!IS_ERR(handle))
+                       ext4_journal_stop(handle);
+               up_read(&EXT4_I(inode)->i_mmap_sem);
+               sb_end_pagefault(sb);
+       } else
+               up_read(&EXT4_I(inode)->i_mmap_sem);
+
+       return result;
+}
+
+static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       int err;
+       struct inode *inode = file_inode(vma->vm_file);
+
+       sb_start_pagefault(inode->i_sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
+                           ext4_end_io_unwritten);
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(inode->i_sb);
+
+       return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+                               struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       struct super_block *sb = inode->i_sb;
+       int ret = VM_FAULT_NOPAGE;
+       loff_t size;
+
+       sb_start_pagefault(sb);
+       file_update_time(vma->vm_file);
+       down_read(&EXT4_I(inode)->i_mmap_sem);
+       size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       if (vmf->pgoff >= size)
+               ret = VM_FAULT_SIGBUS;
+       up_read(&EXT4_I(inode)->i_mmap_sem);
+       sb_end_pagefault(sb);
 
        return ret;
 }
 
+static const struct vm_operations_struct ext4_dax_vm_ops = {
+       .fault          = ext4_dax_fault,
+       .pmd_fault      = ext4_dax_pmd_fault,
+       .page_mkwrite   = ext4_dax_mkwrite,
+       .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
+};
+#else
+#define ext4_dax_vm_ops        ext4_file_vm_ops
+#endif
+
 static const struct vm_operations_struct ext4_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = ext4_filemap_fault,
+       .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
-       .remap_pages    = generic_file_remap_pages,
 };
 
 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-       struct address_space *mapping = file->f_mapping;
+       struct inode *inode = file->f_mapping->host;
 
-       if (!mapping->a_ops->readpage)
-               return -ENOEXEC;
+       if (ext4_encrypted_inode(inode)) {
+               int err = ext4_get_encryption_info(inode);
+               if (err)
+                       return 0;
+               if (ext4_encryption_info(inode) == NULL)
+                       return -ENOKEY;
+       }
        file_accessed(file);
-       vma->vm_ops = &ext4_file_vm_ops;
+       if (IS_DAX(file_inode(file))) {
+               vma->vm_ops = &ext4_dax_vm_ops;
+               vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+       } else {
+               vma->vm_ops = &ext4_file_vm_ops;
+       }
        return 0;
 }
 
@@ -219,10 +361,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 {
        struct super_block *sb = inode->i_sb;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       struct ext4_inode_info *ei = EXT4_I(inode);
        struct vfsmount *mnt = filp->f_path.mnt;
        struct path path;
        char buf[64], *cp;
+       int ret;
 
        if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
                     !(sb->s_flags & MS_RDONLY))) {
@@ -244,6 +386,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                        handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
                        if (IS_ERR(handle))
                                return PTR_ERR(handle);
+                       BUFFER_TRACE(sbi->s_sbh, "get_write_access");
                        err = ext4_journal_get_write_access(handle, sbi->s_sbh);
                        if (err) {
                                ext4_journal_stop(handle);
@@ -255,26 +398,21 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                        ext4_journal_stop(handle);
                }
        }
+       if (ext4_encrypted_inode(inode)) {
+               ret = ext4_get_encryption_info(inode);
+               if (ret)
+                       return -EACCES;
+               if (ext4_encryption_info(inode) == NULL)
+                       return -ENOKEY;
+       }
        /*
         * Set up the jbd2_inode if we are opening the inode for
         * writing and the journal is present
         */
-       if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
-               struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
-
-               spin_lock(&inode->i_lock);
-               if (!ei->jinode) {
-                       if (!jinode) {
-                               spin_unlock(&inode->i_lock);
-                               return -ENOMEM;
-                       }
-                       ei->jinode = jinode;
-                       jbd2_journal_init_jbd_inode(ei->jinode, inode);
-                       jinode = NULL;
-               }
-               spin_unlock(&inode->i_lock);
-               if (unlikely(jinode != NULL))
-                       jbd2_free_inode(jinode);
+       if (filp->f_mode & FMODE_WRITE) {
+               ret = ext4_inode_attach_jinode(inode);
+               if (ret < 0)
+                       return ret;
        }
        return dquot_file_open(inode, filp);
 }
@@ -312,7 +450,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
        blkbits = inode->i_sb->s_blocksize_bits;
        startoff = *offset;
        lastoff = startoff;
-       endoff = (map->m_lblk + map->m_len) << blkbits;
+       endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
 
        index = startoff >> PAGE_CACHE_SHIFT;
        end = endoff >> PAGE_CACHE_SHIFT;
@@ -457,7 +595,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                ret = ext4_map_blocks(NULL, inode, &map, 0);
                if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
                        if (last != start)
-                               dataoff = last << blkbits;
+                               dataoff = (loff_t)last << blkbits;
                        break;
                }
 
@@ -468,7 +606,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                ext4_es_find_delayed_extent_range(inode, last, last, &es);
                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
                        if (last != start)
-                               dataoff = last << blkbits;
+                               dataoff = (loff_t)last << blkbits;
                        break;
                }
 
@@ -486,7 +624,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
                }
 
                last++;
-               dataoff = last << blkbits;
+               dataoff = (loff_t)last << blkbits;
        } while (last <= end);
 
        mutex_unlock(&inode->i_mutex);
@@ -494,17 +632,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
        if (dataoff > isize)
                return -ENXIO;
 
-       if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
-               return -EINVAL;
-       if (dataoff > maxsize)
-               return -EINVAL;
-
-       if (dataoff != file->f_pos) {
-               file->f_pos = dataoff;
-               file->f_version = 0;
-       }
-
-       return dataoff;
+       return vfs_setpos(file, dataoff, maxsize);
 }
 
 /*
@@ -540,7 +668,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                ret = ext4_map_blocks(NULL, inode, &map, 0);
                if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
                        last += ret;
-                       holeoff = last << blkbits;
+                       holeoff = (loff_t)last << blkbits;
                        continue;
                }
 
@@ -551,7 +679,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                ext4_es_find_delayed_extent_range(inode, last, last, &es);
                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
                        last = es.es_lblk + es.es_len;
-                       holeoff = last << blkbits;
+                       holeoff = (loff_t)last << blkbits;
                        continue;
                }
 
@@ -566,7 +694,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
                                                              &map, &holeoff);
                        if (!unwritten) {
                                last += ret;
-                               holeoff = last << blkbits;
+                               holeoff = (loff_t)last << blkbits;
                                continue;
                        }
                }
@@ -580,17 +708,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
        if (holeoff > isize)
                holeoff = isize;
 
-       if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
-               return -EINVAL;
-       if (holeoff > maxsize)
-               return -EINVAL;
-
-       if (holeoff != file->f_pos) {
-               file->f_pos = holeoff;
-               file->f_version = 0;
-       }
-
-       return holeoff;
+       return vfs_setpos(file, holeoff, maxsize);
 }
 
 /*
@@ -625,10 +743,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 
 const struct file_operations ext4_file_operations = {
        .llseek         = ext4_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = ext4_file_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = ext4_file_write_iter,
        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
@@ -638,7 +754,7 @@ const struct file_operations ext4_file_operations = {
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = ext4_fallocate,
 };
 
@@ -650,6 +766,7 @@ const struct inode_operations ext4_file_inode_operations = {
        .listxattr      = ext4_listxattr,
        .removexattr    = generic_removexattr,
        .get_acl        = ext4_get_acl,
+       .set_acl        = ext4_set_acl,
        .fiemap         = ext4_fiemap,
 };