ext4: don't take the i_mutex lock when doing DIO overwrites
[firefly-linux-kernel-4.4.55.git] / fs / ext4 / inode.c
index c77b0bd2c7110975476b7f6312ddde7da515ca7a..bed574dd4c22c9998fb8644656e73a05bf451835 100644 (file)
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
+static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
+                             struct ext4_inode_info *ei)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       __u16 csum_lo;
+       __u16 csum_hi = 0;
+       __u32 csum;
+
+       csum_lo = raw->i_checksum_lo;
+       raw->i_checksum_lo = 0;
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+               csum_hi = raw->i_checksum_hi;
+               raw->i_checksum_hi = 0;
+       }
+
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
+                          EXT4_INODE_SIZE(inode->i_sb));
+
+       raw->i_checksum_lo = csum_lo;
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               raw->i_checksum_hi = csum_hi;
+
+       return csum;
+}
+
+static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
+                                 struct ext4_inode_info *ei)
+{
+       __u32 provided, calculated;
+
+       if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+           cpu_to_le32(EXT4_OS_LINUX) ||
+           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       provided = le16_to_cpu(raw->i_checksum_lo);
+       calculated = ext4_inode_csum(inode, raw, ei);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
+       else
+               calculated &= 0xFFFF;
+
+       return provided == calculated;
+}
+
+static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
+                               struct ext4_inode_info *ei)
+{
+       __u32 csum;
+
+       if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+           cpu_to_le32(EXT4_OS_LINUX) ||
+           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       csum = ext4_inode_csum(inode, raw, ei);
+       raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               raw->i_checksum_hi = cpu_to_le16(csum >> 16);
+}
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
@@ -477,7 +544,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
-       down_read((&EXT4_I(inode)->i_data_sem));
+       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+               down_read((&EXT4_I(inode)->i_data_sem));
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -485,7 +553,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                retval = ext4_ind_map_blocks(handle, inode, map, flags &
                                             EXT4_GET_BLOCKS_KEEP_SIZE);
        }
-       up_read((&EXT4_I(inode)->i_data_sem));
+       if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+               up_read((&EXT4_I(inode)->i_data_sem));
 
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                int ret = check_block_validity(inode, map);
@@ -2751,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
                               EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
+static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh_result, int flags)
+{
+       handle_t *handle = ext4_journal_current_handle();
+       struct ext4_map_blocks map;
+       int ret = 0;
+
+       ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
+                  inode->i_ino, flags);
+
+       flags = EXT4_GET_BLOCKS_NO_LOCK;
+
+       map.m_lblk = iblock;
+       map.m_len = bh_result->b_size >> inode->i_blkbits;
+
+       ret = ext4_map_blocks(handle, inode, &map, flags);
+       if (ret > 0) {
+               map_bh(bh_result, inode->i_sb, map.m_pblk);
+               bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+                                       map.m_flags;
+               bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
+               ret = 0;
+       }
+       return ret;
+}
+
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private, int ret,
                            bool is_async)
@@ -2899,6 +2994,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
        loff_t final_size = offset + count;
        if (rw == WRITE && final_size <= inode->i_size) {
+               int overwrite = 0;
+
+               BUG_ON(iocb->private == NULL);
+
+               /* If we do a overwrite dio, i_mutex locking can be released */
+               overwrite = *((int *)iocb->private);
+
+               if (overwrite) {
+                       down_read(&EXT4_I(inode)->i_data_sem);
+                       mutex_unlock(&inode->i_mutex);
+               }
+
                /*
                 * We could direct write to holes and fallocate.
                 *
@@ -2924,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                if (!is_sync_kiocb(iocb)) {
                        ext4_io_end_t *io_end =
                                ext4_init_io_end(inode, GFP_NOFS);
-                       if (!io_end)
-                               return -ENOMEM;
+                       if (!io_end) {
+                               ret = -ENOMEM;
+                               goto retake_lock;
+                       }
                        io_end->flag |= EXT4_IO_END_DIRECT;
                        iocb->private = io_end;
                        /*
@@ -2938,13 +3047,22 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                        EXT4_I(inode)->cur_aio_dio = iocb->private;
                }
 
-               ret = __blockdev_direct_IO(rw, iocb, inode,
-                                        inode->i_sb->s_bdev, iov,
-                                        offset, nr_segs,
-                                        ext4_get_block_write,
-                                        ext4_end_io_dio,
-                                        NULL,
-                                        DIO_LOCKING);
+               if (overwrite)
+                       ret = __blockdev_direct_IO(rw, iocb, inode,
+                                                inode->i_sb->s_bdev, iov,
+                                                offset, nr_segs,
+                                                ext4_get_block_write_nolock,
+                                                ext4_end_io_dio,
+                                                NULL,
+                                                0);
+               else
+                       ret = __blockdev_direct_IO(rw, iocb, inode,
+                                                inode->i_sb->s_bdev, iov,
+                                                offset, nr_segs,
+                                                ext4_get_block_write,
+                                                ext4_end_io_dio,
+                                                NULL,
+                                                DIO_LOCKING);
                if (iocb->private)
                        EXT4_I(inode)->cur_aio_dio = NULL;
                /*
@@ -2964,7 +3082,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
                        ext4_free_io_end(iocb->private);
                        iocb->private = NULL;
-               } else if (ret > 0 && ext4_test_inode_state(inode,
+               } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                EXT4_STATE_DIO_UNWRITTEN)) {
                        int err;
                        /*
@@ -2977,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                                ret = err;
                        ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                }
+
+       retake_lock:
+               /* take i_mutex locking again if we do a ovewrite dio */
+               if (overwrite) {
+                       up_read(&EXT4_I(inode)->i_data_sem);
+                       mutex_lock(&inode->i_mutex);
+               }
+
                return ret;
        }
 
@@ -3517,8 +3643,7 @@ make_io:
                                b = table;
                        end = b + EXT4_SB(sb)->s_inode_readahead_blks;
                        num = EXT4_INODES_PER_GROUP(sb);
-                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+                       if (ext4_has_group_desc_csum(sb))
                                num -= ext4_itable_unused_count(sb, gdp);
                        table += num / inodes_per_block;
                        if (end > table)
@@ -3630,6 +3755,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
        int block;
+       uid_t i_uid;
+       gid_t i_gid;
 
        inode = iget_locked(sb, ino);
        if (!inode)
@@ -3644,13 +3771,48 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        if (ret < 0)
                goto bad_inode;
        raw_inode = ext4_raw_inode(&iloc);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+                       EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
+                               EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
+                               EXT4_INODE_SIZE(inode->i_sb));
+                       ret = -EIO;
+                       goto bad_inode;
+               }
+       } else
+               ei->i_extra_isize = 0;
+
+       /* Precompute checksum seed for inode metadata */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+               __u32 csum;
+               __le32 inum = cpu_to_le32(inode->i_ino);
+               __le32 gen = raw_inode->i_generation;
+               csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
+                                  sizeof(inum));
+               ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
+                                             sizeof(gen));
+       }
+
+       if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
+               EXT4_ERROR_INODE(inode, "checksum invalid");
+               ret = -EIO;
+               goto bad_inode;
+       }
+
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
-       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+       i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+       i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
-               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
-               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+               i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+               i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
+       i_uid_write(inode, i_uid);
+       i_gid_write(inode, i_gid);
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 
        ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
@@ -3721,12 +3883,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        }
 
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
-               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
-               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                   EXT4_INODE_SIZE(inode->i_sb)) {
-                       ret = -EIO;
-                       goto bad_inode;
-               }
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
                        ei->i_extra_isize = sizeof(struct ext4_inode) -
@@ -3738,8 +3894,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
                                ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                }
-       } else
-               ei->i_extra_isize = 0;
+       }
 
        EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
@@ -3870,6 +4025,8 @@ static int ext4_do_update_inode(handle_t *handle,
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct buffer_head *bh = iloc->bh;
        int err = 0, rc, block;
+       uid_t i_uid;
+       gid_t i_gid;
 
        /* For fields not not tracking in the in-memory inode,
         * initialise them to zero for new inodes. */
@@ -3878,27 +4035,27 @@ static int ext4_do_update_inode(handle_t *handle,
 
        ext4_get_inode_flags(ei);
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+       i_uid = i_uid_read(inode);
+       i_gid = i_gid_read(inode);
        if (!(test_opt(inode->i_sb, NO_UID32))) {
-               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
-               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
+               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
 /*
  * Fix up interoperability with old kernels. Otherwise, old inodes get
  * re-used with the upper 16 bits of the uid/gid intact
  */
                if (!ei->i_dtime) {
                        raw_inode->i_uid_high =
-                               cpu_to_le16(high_16_bits(inode->i_uid));
+                               cpu_to_le16(high_16_bits(i_uid));
                        raw_inode->i_gid_high =
-                               cpu_to_le16(high_16_bits(inode->i_gid));
+                               cpu_to_le16(high_16_bits(i_gid));
                } else {
                        raw_inode->i_uid_high = 0;
                        raw_inode->i_gid_high = 0;
                }
        } else {
-               raw_inode->i_uid_low =
-                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
-               raw_inode->i_gid_low =
-                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
+               raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
+               raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
@@ -3936,7 +4093,7 @@ static int ext4_do_update_inode(handle_t *handle,
                        EXT4_SET_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
                        ext4_handle_sync(handle);
-                       err = ext4_handle_dirty_super(handle, sb);
+                       err = ext4_handle_dirty_super_now(handle, sb);
                }
        }
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -3963,6 +4120,8 @@ static int ext4_do_update_inode(handle_t *handle,
                raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
        }
 
+       ext4_inode_csum_set(inode, raw_inode, ei);
+
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
        rc = ext4_handle_dirty_metadata(handle, NULL, bh);
        if (!err)
@@ -4084,8 +4243,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
        if (is_quota_modification(inode, attr))
                dquot_initialize(inode);
-       if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+       if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
+           (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
                handle_t *handle;
 
                /* (user+group)*(old+new) structure, inode write (sb,
@@ -4207,7 +4366,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
         * will return the blocks that include the delayed allocation
         * blocks for this file.
         */
-       delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+       delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
+                               EXT4_I(inode)->i_reserved_data_blocks);
 
        stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
        return 0;