Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2014 16:50:11 +0000 (09:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2014 16:50:11 +0000 (09:50 -0700)
Pull ext4 updates from Ted Ts'o:
 "A large number of cleanups and bug fixes, with some (minor) journal
  optimizations"

[ This got sent to me before -rc1, but was stuck in my spam folder.   - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (67 commits)
  ext4: check s_chksum_driver when looking for bg csum presence
  ext4: move error report out of atomic context in ext4_init_block_bitmap()
  ext4: Replace open coded mdata csum feature to helper function
  ext4: delete useless comments about ext4_move_extents
  ext4: fix reservation overflow in ext4_da_write_begin
  ext4: add ext4_iget_normal() which is to be used for dir tree lookups
  ext4: don't orphan or truncate the boot loader inode
  ext4: grab missed write_count for EXT4_IOC_SWAP_BOOT
  ext4: optimize block allocation on grow indepth
  ext4: get rid of code duplication
  ext4: fix over-defensive complaint after journal abort
  ext4: fix return value of ext4_do_update_inode
  ext4: fix mmap data corruption when blocksize < pagesize
  vfs: fix data corruption when blocksize < pagesize for mmaped data
  ext4: fold ext4_nojournal_sops into ext4_sops
  ext4: support freezing ext2 (nojournal) file systems
  ext4: fold ext4_sync_fs_nojournal() into ext4_sync_fs()
  ext4: don't check quota format when there are no quota files
  jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list
  jbd2: avoid pointless scanning of checkpoint lists
  ...

33 files changed:
fs/buffer.c
fs/ext4/balloc.c
fs/ext4/bitmap.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/ext4_jbd2.c
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/extents_status.h
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/migrate.c
fs/ext4/mmp.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/jbd/journal.c
fs/jbd2/checkpoint.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
include/linux/buffer_head.h
include/linux/jbd2.h
include/linux/mm.h
include/trace/events/ext4.h
mm/truncate.c

index 9614adc7e7544f3e253260e2d43064a6ad6bd96e..6c48f20eddd4b60256c0e825e53dbbf724d115cb 100644 (file)
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
-               pgoff_t index, int size, int sizebits)
+             pgoff_t index, int size, int sizebits, gfp_t gfp)
 {
        struct inode *inode = bdev->bd_inode;
        struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
        int ret = 0;            /* Will call free_more_memory() */
        gfp_t gfp_mask;
 
-       gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
-       gfp_mask |= __GFP_MOVABLE;
+       gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+
        /*
         * XXX: __getblk_slow() can not really deal with failure and
         * will endlessly loop on improvised global reclaim.  Prefer
@@ -1060,7 +1060,7 @@ failed:
  * that page was dirty, the buffers are set dirty also.
  */
 static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
        pgoff_t index;
        int sizebits;
@@ -1087,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
        }
 
        /* Create a page with the proper size buffers.. */
-       return grow_dev_page(bdev, block, index, size, sizebits);
+       return grow_dev_page(bdev, block, index, size, sizebits, gfp);
 }
 
-static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+struct buffer_head *
+__getblk_slow(struct block_device *bdev, sector_t block,
+            unsigned size, gfp_t gfp)
 {
        /* Size must be multiple of hard sectorsize */
        if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1113,13 +1114,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
                if (bh)
                        return bh;
 
-               ret = grow_buffers(bdev, block, size);
+               ret = grow_buffers(bdev, block, size, gfp);
                if (ret < 0)
                        return NULL;
                if (ret == 0)
                        free_more_memory();
        }
 }
+EXPORT_SYMBOL(__getblk_slow);
 
 /*
  * The relationship between dirty buffers and dirty pages:
@@ -1373,24 +1375,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__find_get_block);
 
 /*
- * __getblk will locate (and, if necessary, create) the buffer_head
+ * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
  * which corresponds to the passed block_device, block and size. The
  * returned buffer has its reference count incremented.
  *
- * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
- * attempt is failing.  FIXME, perhaps?
+ * __getblk_gfp() will lock up the machine if grow_dev_page's
+ * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
  */
 struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, unsigned size)
+__getblk_gfp(struct block_device *bdev, sector_t block,
+            unsigned size, gfp_t gfp)
 {
        struct buffer_head *bh = __find_get_block(bdev, block, size);
 
        might_sleep();
        if (bh == NULL)
-               bh = __getblk_slow(bdev, block, size);
+               bh = __getblk_slow(bdev, block, size, gfp);
        return bh;
 }
-EXPORT_SYMBOL(__getblk);
+EXPORT_SYMBOL(__getblk_gfp);
 
 /*
  * Do async read-ahead on a buffer..
@@ -1406,24 +1409,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__breadahead);
 
 /**
- *  __bread() - reads a specified block and returns the bh
+ *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
  *  @block: number of block
  *  @size: size (in bytes) to read
- * 
+ *  @gfp: page allocation flag
+ *
  *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache can be allocated from non-movable area
+ *  not to prevent page migration if you set gfp to zero.
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+__bread_gfp(struct block_device *bdev, sector_t block,
+                  unsigned size, gfp_t gfp)
 {
-       struct buffer_head *bh = __getblk(bdev, block, size);
+       struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
 
        if (likely(bh) && !buffer_uptodate(bh))
                bh = __bread_slow(bh);
        return bh;
 }
-EXPORT_SYMBOL(__bread);
+EXPORT_SYMBOL(__bread_gfp);
 
 /*
  * invalidate_bh_lrus() is called rarely - but not only at unmount.
@@ -2082,6 +2089,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
                        struct page *page, void *fsdata)
 {
        struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
        int i_size_changed = 0;
 
        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2101,6 +2109,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
        unlock_page(page);
        page_cache_release(page);
 
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
        /*
         * Don't mark the inode dirty under page lock. First, it unnecessarily
         * makes the holding time of page lock longer. Second, it forces lock
index 581ef40fbe90b8b45f3d3474d48b8c80f6639ba8..83a6f497c4e0e6803345d4b69bde90504ffd02ce 100644 (file)
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
 }
 
 /* Initializes an uninitialized block bitmap */
-static void ext4_init_block_bitmap(struct super_block *sb,
+static int ext4_init_block_bitmap(struct super_block *sb,
                                   struct buffer_head *bh,
                                   ext4_group_t block_group,
                                   struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks used to prevent allocation
         * essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-               ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                        percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
                                           count);
                }
                set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-               return;
+               return -EIO;
        }
        memset(bh->b_data, 0, sb->s_blocksize);
 
@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
                             sb->s_blocksize * 8, bh->b_data);
        ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
        ext4_group_desc_csum_set(sb, block_group, gdp);
+       return 0;
 }
 
 /* Return the number of free blocks in a block group.  It is used when
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        }
        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-               ext4_init_block_bitmap(sb, bh, block_group, desc);
+               int err;
+
+               err = ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
+               if (err)
+                       ext4_error(sb, "Checksum bad for grp %u", block_group);
                return bh;
        }
        ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
         * Account for the allocated meta blocks.  We will never
         * fail EDQUOT for metdata, but we do account for it.
         */
-       if (!(*errp) &&
-           ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
+       if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
                spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                dquot_alloc_block_nofail(inode,
index 3285aa5a706af3f97c6a182a12304c7478177d9d..b610779a958c32fbfa64696e554b62c1398cecd2 100644 (file)
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
        __u32 provided, calculated;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return 1;
 
        provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
        __u32 csum;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return;
 
        csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return 1;
 
        provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
        __u32 csum;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return;
 
        csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
index 0bb3f9ea08329a46d27f75160a6b42718fed6042..c24143ea9c08e4e38823a42cecfb102e6d8db710 100644 (file)
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
                                        &file->f_ra, file,
                                        index, 1);
                        file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
-                       bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
+                       bh = ext4_bread(NULL, inode, map.m_lblk, 0);
+                       if (IS_ERR(bh))
+                               return PTR_ERR(bh);
                }
 
-               /*
-                * We ignore I/O errors on directories so users have a chance
-                * of recovering data when there's a bad sector
-                */
                if (!bh) {
                        if (!dir_has_error) {
                                EXT4_ERROR_FILE(file, 0,
index b0c225cdb52cd20ba5927149c338d9749e951f57..c55a1faaed583778d482b0fb6368bd72b454fc19 100644 (file)
@@ -572,15 +572,15 @@ enum {
 
 /*
  * The bit position of these flags must not overlap with any of the
- * EXT4_GET_BLOCKS_*.  They are used by ext4_ext_find_extent(),
+ * EXT4_GET_BLOCKS_*.  They are used by ext4_find_extent(),
  * read_extent_tree_block(), ext4_split_extent_at(),
  * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
  * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
  * caching the extents when reading from the extent tree while a
  * truncate or punch hole operation is in progress.
  */
-#define EXT4_EX_NOCACHE                                0x0400
-#define EXT4_EX_FORCE_CACHE                    0x0800
+#define EXT4_EX_NOCACHE                                0x40000000
+#define EXT4_EX_FORCE_CACHE                    0x20000000
 
 /*
  * Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@ struct ext4_inode_info {
        struct ext4_es_tree i_es_tree;
        rwlock_t i_es_lock;
        struct list_head i_es_lru;
+       unsigned int i_es_all_nr;       /* protected by i_es_lock */
        unsigned int i_es_lru_nr;       /* protected by i_es_lock */
        unsigned long i_touch_when;     /* jiffies of last accessing */
 
@@ -1174,6 +1175,9 @@ struct ext4_super_block {
 #define EXT4_MF_MNTDIR_SAMPLED 0x0001
 #define EXT4_MF_FS_ABORTED     0x0002  /* Fatal error detected */
 
+/* Number of quota types we support */
+#define EXT4_MAXQUOTAS 2
+
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1237,7 +1241,7 @@ struct ext4_sb_info {
        u32 s_min_batch_time;
        struct block_device *journal_bdev;
 #ifdef CONFIG_QUOTA
-       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       char *s_qf_names[EXT4_MAXQUOTAS];       /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
 #endif
        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1334,7 @@ struct ext4_sb_info {
        /* Reclaim extents from extent status tree */
        struct shrinker s_es_shrinker;
        struct list_head s_es_lru;
-       unsigned long s_es_last_sorted;
-       struct percpu_counter s_extent_cache_cnt;
+       struct ext4_es_stats s_es_stats;
        struct mb_cache *s_mb_cache;
        spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
 
@@ -1399,7 +1402,6 @@ enum {
        EXT4_STATE_EXT_MIGRATE,         /* Inode is migrating */
        EXT4_STATE_DIO_UNWRITTEN,       /* need convert on dio done*/
        EXT4_STATE_NEWENTRY,            /* File just added to dir */
-       EXT4_STATE_DELALLOC_RESERVED,   /* blks already reserved for delalloc */
        EXT4_STATE_DIOREAD_LOCK,        /* Disable support for dio read
                                           nolocking */
        EXT4_STATE_MAY_INLINE_DATA,     /* may have in-inode data */
@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 
 /* inode.c */
-struct buffer_head *ext4_getblk(handle_t *, struct inode *,
-                                               ext4_lblk_t, int, int *);
-struct buffer_head *ext4_bread(handle_t *, struct inode *,
-                                               ext4_lblk_t, int, int *);
+struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
+struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle,
 #define CONVERT_INLINE_DATA     2
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb,
 static inline int ext4_has_group_desc_csum(struct super_block *sb)
 {
        return EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                         EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
-                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+                                         EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+              (EXT4_SB(sb)->s_chksum_driver != NULL);
 }
 
+static inline int ext4_has_metadata_csum(struct super_block *sb)
+{
+       WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+                    !EXT4_SB(sb)->s_chksum_driver);
+
+       return (EXT4_SB(sb)->s_chksum_driver != NULL);
+}
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
        return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
                                      struct ext4_extent *ex1,
                                      struct ext4_extent *ex2);
 extern int ext4_ext_insert_extent(handle_t *, struct inode *,
-                                 struct ext4_ext_path *,
+                                 struct ext4_ext_path **,
                                  struct ext4_extent *, int);
-extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
-                                                 struct ext4_ext_path *,
-                                                 int flags);
+extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
+                                             struct ext4_ext_path **,
+                                             int flags);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
 extern int ext4_find_delalloc_range(struct inode *inode,
                                    ext4_lblk_t lblk_start,
                                    ext4_lblk_t lblk_end);
 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
+                               struct inode *inode2, ext4_lblk_t lblk1,
+                            ext4_lblk_t lblk2,  ext4_lblk_t count,
+                            int mark_unwritten,int *err);
 
 /* move_extent.c */
 extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
                             __u64 start_orig, __u64 start_donor,
                             __u64 len, __u64 *moved_len);
-extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
-                           struct ext4_extent **extent);
 
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
index a867f5ca99919d05e2e6709f80f08566a33c435d..3c938154709478ba39f5eb18d8d2b53678932ee8 100644 (file)
@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh)
 struct ext4_ext_path {
        ext4_fsblk_t                    p_block;
        __u16                           p_depth;
+       __u16                           p_maxdepth;
        struct ext4_extent              *p_ext;
        struct ext4_extent_idx          *p_idx;
        struct ext4_extent_header       *p_hdr;
index 0074e0d23d6ef77eb41f4c79357cac0b70696176..3445035c7e015e9460f2cd3f74b698bf823cabb6 100644 (file)
@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
        set_buffer_prio(bh);
        if (ext4_handle_valid(handle)) {
                err = jbd2_journal_dirty_metadata(handle, bh);
-               /* Errors can only happen if there is a bug */
-               if (WARN_ON_ONCE(err)) {
+               /* Errors can only happen due to aborted journal or a nasty bug */
+               if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
                        ext4_journal_abort_handle(where, line, __func__, bh,
                                                  handle, err);
                        if (inode == NULL) {
index 17c00ff202f22042c30b563f7bc4a39492542215..9c5b49fb281e16f1717b989d31dd55ade6c03f8d 100644 (file)
 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
 #endif
-#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
 
 static inline int ext4_jbd2_credits_xattr(struct inode *inode)
 {
index 74292a71b384e2fd480357a8115c5e4936fa6f20..37043d0b2be8f034f6936d21594a20ae4053f3f8 100644 (file)
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
 {
        struct ext4_extent_tail *et;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return 1;
 
        et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
 {
        struct ext4_extent_tail *et;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return;
 
        et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,
 
 static int ext4_split_extent(handle_t *handle,
                                struct inode *inode,
-                               struct ext4_ext_path *path,
+                               struct ext4_ext_path **ppath,
                                struct ext4_map_blocks *map,
                                int split_flag,
                                int flags);
 
 static int ext4_split_extent_at(handle_t *handle,
                             struct inode *inode,
-                            struct ext4_ext_path *path,
+                            struct ext4_ext_path **ppath,
                             ext4_lblk_t split,
                             int split_flag,
                             int flags);
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
        return size;
 }
 
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+                          struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+                          int nofail)
+{
+       struct ext4_ext_path *path = *ppath;
+       int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+
+       return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
+                       EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
+                       EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
+                       (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
 /*
  * Calculate the number of metadata blocks needed
  * to allocate @blocks
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 
 void ext4_ext_drop_refs(struct ext4_ext_path *path)
 {
-       int depth = path->p_depth;
-       int i;
+       int depth, i;
 
+       if (!path)
+               return;
+       depth = path->p_depth;
        for (i = 0; i <= depth; i++, path++)
                if (path->p_bh) {
                        brelse(path->p_bh);
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 }
 
 struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
-                    struct ext4_ext_path *path, int flags)
+ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+                struct ext4_ext_path **orig_path, int flags)
 {
        struct ext4_extent_header *eh;
        struct buffer_head *bh;
-       short int depth, i, ppos = 0, alloc = 0;
+       struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
+       short int depth, i, ppos = 0;
        int ret;
 
        eh = ext_inode_hdr(inode);
        depth = ext_depth(inode);
 
-       /* account possible depth increase */
+       if (path) {
+               ext4_ext_drop_refs(path);
+               if (depth > path[0].p_maxdepth) {
+                       kfree(path);
+                       *orig_path = path = NULL;
+               }
+       }
        if (!path) {
+               /* account possible depth increase */
                path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
                                GFP_NOFS);
-               if (!path)
+               if (unlikely(!path))
                        return ERR_PTR(-ENOMEM);
-               alloc = 1;
+               path[0].p_maxdepth = depth + 1;
        }
        path[0].p_hdr = eh;
        path[0].p_bh = NULL;
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
                bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
                                            flags);
-               if (IS_ERR(bh)) {
+               if (unlikely(IS_ERR(bh))) {
                        ret = PTR_ERR(bh);
                        goto err;
                }
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
 err:
        ext4_ext_drop_refs(path);
-       if (alloc)
-               kfree(path);
+       kfree(path);
+       if (orig_path)
+               *orig_path = NULL;
        return ERR_PTR(ret);
 }
 
@@ -1238,16 +1261,24 @@ cleanup:
  *   just created block
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
-                                unsigned int flags,
-                                struct ext4_extent *newext)
+                                unsigned int flags)
 {
        struct ext4_extent_header *neh;
        struct buffer_head *bh;
-       ext4_fsblk_t newblock;
+       ext4_fsblk_t newblock, goal = 0;
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
        int err = 0;
 
-       newblock = ext4_ext_new_meta_block(handle, inode, NULL,
-               newext, &err, flags);
+       /* Try to prepend new index to old one */
+       if (ext_depth(inode))
+               goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
+       if (goal > le32_to_cpu(es->s_first_data_block)) {
+               flags |= EXT4_MB_HINT_TRY_GOAL;
+               goal--;
+       } else
+               goal = ext4_inode_to_goal_block(inode);
+       newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
+                                       NULL, &err);
        if (newblock == 0)
                return err;
 
@@ -1314,9 +1345,10 @@ out:
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
                                    unsigned int mb_flags,
                                    unsigned int gb_flags,
-                                   struct ext4_ext_path *path,
+                                   struct ext4_ext_path **ppath,
                                    struct ext4_extent *newext)
 {
+       struct ext4_ext_path *path = *ppath;
        struct ext4_ext_path *curp;
        int depth, i, err = 0;
 
@@ -1340,23 +1372,21 @@ repeat:
                        goto out;
 
                /* refill path */
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode,
+               path = ext4_find_extent(inode,
                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path, gb_flags);
+                                   ppath, gb_flags);
                if (IS_ERR(path))
                        err = PTR_ERR(path);
        } else {
                /* tree is full, time to grow in depth */
-               err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
+               err = ext4_ext_grow_indepth(handle, inode, mb_flags);
                if (err)
                        goto out;
 
                /* refill path */
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode,
+               path = ext4_find_extent(inode,
                                   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path, gb_flags);
+                                   ppath, gb_flags);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -1559,7 +1589,7 @@ found_extent:
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static ext4_lblk_t
+ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
        int depth;
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
                sizeof(struct ext4_extent_idx);
        s += sizeof(struct ext4_extent_header);
 
+       path[1].p_maxdepth = path[0].p_maxdepth;
        memcpy(path[0].p_hdr, path[1].p_hdr, s);
        path[0].p_depth = 0;
        path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@ out:
  * creating new leaf in the no-space case.
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
-                               struct ext4_ext_path *path,
+                               struct ext4_ext_path **ppath,
                                struct ext4_extent *newext, int gb_flags)
 {
+       struct ext4_ext_path *path = *ppath;
        struct ext4_extent_header *eh;
        struct ext4_extent *ex, *fex;
        struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        ext4_lblk_t next;
        int mb_flags = 0, unwritten;
 
+       if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+               mb_flags |= EXT4_MB_DELALLOC_RESERVED;
        if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
                EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
                return -EIO;
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
                /*
                 * Try to see whether we should rather test the extent on
                 * right from ex, or from the left of ex. This is because
-                * ext4_ext_find_extent() can return either extent on the
+                * ext4_find_extent() can return either extent on the
                 * left, or on the right from the searched position. This
                 * will make merging more effective.
                 */
@@ -2008,7 +2042,7 @@ prepend:
        if (next != EXT_MAX_BLOCKS) {
                ext_debug("next leaf block - %u\n", next);
                BUG_ON(npath != NULL);
-               npath = ext4_ext_find_extent(inode, next, NULL, 0);
+               npath = ext4_find_extent(inode, next, NULL, 0);
                if (IS_ERR(npath))
                        return PTR_ERR(npath);
                BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@ prepend:
         * We're gonna add a new leaf in the tree.
         */
        if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
-               mb_flags = EXT4_MB_USE_RESERVED;
+               mb_flags |= EXT4_MB_USE_RESERVED;
        err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
-                                      path, newext);
+                                      ppath, newext);
        if (err)
                goto cleanup;
        depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@ merge:
        err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 
 cleanup:
-       if (npath) {
-               ext4_ext_drop_refs(npath);
-               kfree(npath);
-       }
+       ext4_ext_drop_refs(npath);
+       kfree(npath);
        return err;
 }
 
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                /* find extent for this block */
                down_read(&EXT4_I(inode)->i_data_sem);
 
-               if (path && ext_depth(inode) != depth) {
-                       /* depth was changed. we have to realloc path */
-                       kfree(path);
-                       path = NULL;
-               }
-
-               path = ext4_ext_find_extent(inode, block, path, 0);
+               path = ext4_find_extent(inode, block, &path, 0);
                if (IS_ERR(path)) {
                        up_read(&EXT4_I(inode)->i_data_sem);
                        err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                }
                ex = path[depth].p_ext;
                next = ext4_ext_next_allocated_block(path);
-               ext4_ext_drop_refs(path);
 
                flags = 0;
                exists = 0;
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                block = es.es_lblk + es.es_len;
        }
 
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
-
+       ext4_ext_drop_refs(path);
+       kfree(path);
        return err;
 }
 
@@ -2826,7 +2848,7 @@ again:
                ext4_lblk_t ee_block;
 
                /* find extent for this block */
-               path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
+               path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
                if (IS_ERR(path)) {
                        ext4_journal_stop(handle);
                        return PTR_ERR(path);
@@ -2854,24 +2876,14 @@ again:
                 */
                if (end >= ee_block &&
                    end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
-                       int split_flag = 0;
-
-                       if (ext4_ext_is_unwritten(ex))
-                               split_flag = EXT4_EXT_MARK_UNWRIT1 |
-                                            EXT4_EXT_MARK_UNWRIT2;
-
                        /*
                         * Split the extent in two so that 'end' is the last
                         * block in the first new extent. Also we should not
                         * fail removing space due to ENOSPC so try to use
                         * reserved block if that happens.
                         */
-                       err = ext4_split_extent_at(handle, inode, path,
-                                       end + 1, split_flag,
-                                       EXT4_EX_NOCACHE |
-                                       EXT4_GET_BLOCKS_PRE_IO |
-                                       EXT4_GET_BLOCKS_METADATA_NOFAIL);
-
+                       err = ext4_force_split_extent_at(handle, inode, &path,
+                                                        end + 1, 1);
                        if (err < 0)
                                goto out;
                }
@@ -2893,7 +2905,7 @@ again:
                        ext4_journal_stop(handle);
                        return -ENOMEM;
                }
-               path[0].p_depth = depth;
+               path[0].p_maxdepth = path[0].p_depth = depth;
                path[0].p_hdr = ext_inode_hdr(inode);
                i = 0;
 
@@ -3013,10 +3025,9 @@ again:
 out:
        ext4_ext_drop_refs(path);
        kfree(path);
-       if (err == -EAGAIN) {
-               path = NULL;
+       path = NULL;
+       if (err == -EAGAIN)
                goto again;
-       }
        ext4_journal_stop(handle);
 
        return err;
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  */
 static int ext4_split_extent_at(handle_t *handle,
                             struct inode *inode,
-                            struct ext4_ext_path *path,
+                            struct ext4_ext_path **ppath,
                             ext4_lblk_t split,
                             int split_flag,
                             int flags)
 {
+       struct ext4_ext_path *path = *ppath;
        ext4_fsblk_t newblock;
        ext4_lblk_t ee_block;
        struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
        if (split_flag & EXT4_EXT_MARK_UNWRIT2)
                ext4_ext_mark_unwritten(ex2);
 
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+       err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
        if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
                if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
                        if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@ fix_extent_len:
  */
 static int ext4_split_extent(handle_t *handle,
                              struct inode *inode,
-                             struct ext4_ext_path *path,
+                             struct ext4_ext_path **ppath,
                              struct ext4_map_blocks *map,
                              int split_flag,
                              int flags)
 {
+       struct ext4_ext_path *path = *ppath;
        ext4_lblk_t ee_block;
        struct ext4_extent *ex;
        unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,
                                       EXT4_EXT_MARK_UNWRIT2;
                if (split_flag & EXT4_EXT_DATA_VALID2)
                        split_flag1 |= EXT4_EXT_DATA_VALID1;
-               err = ext4_split_extent_at(handle, inode, path,
+               err = ext4_split_extent_at(handle, inode, ppath,
                                map->m_lblk + map->m_len, split_flag1, flags1);
                if (err)
                        goto out;
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,
         * Update path is required because previous ext4_split_extent_at() may
         * result in split of original leaf or extent zeroout.
         */
-       ext4_ext_drop_refs(path);
-       path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+       path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
        if (IS_ERR(path))
                return PTR_ERR(path);
        depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,
                        split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
                                                     EXT4_EXT_MARK_UNWRIT2);
                }
-               err = ext4_split_extent_at(handle, inode, path,
+               err = ext4_split_extent_at(handle, inode, ppath,
                                map->m_lblk, split_flag1, flags);
                if (err)
                        goto out;
@@ -3364,9 +3376,10 @@ out:
 static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct inode *inode,
                                           struct ext4_map_blocks *map,
-                                          struct ext4_ext_path *path,
+                                          struct ext4_ext_path **ppath,
                                           int flags)
 {
+       struct ext4_ext_path *path = *ppath;
        struct ext4_sb_info *sbi;
        struct ext4_extent_header *eh;
        struct ext4_map_blocks split_map;
@@ -3590,7 +3603,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                }
        }
 
-       allocated = ext4_split_extent(handle, inode, path,
+       allocated = ext4_split_extent(handle, inode, ppath,
                                      &split_map, split_flag, flags);
        if (allocated < 0)
                err = allocated;
@@ -3629,9 +3642,10 @@ out:
 static int ext4_split_convert_extents(handle_t *handle,
                                        struct inode *inode,
                                        struct ext4_map_blocks *map,
-                                       struct ext4_ext_path *path,
+                                       struct ext4_ext_path **ppath,
                                        int flags)
 {
+       struct ext4_ext_path *path = *ppath;
        ext4_lblk_t eof_block;
        ext4_lblk_t ee_block;
        struct ext4_extent *ex;
@@ -3665,74 +3679,15 @@ static int ext4_split_convert_extents(handle_t *handle,
                split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
        }
        flags |= EXT4_GET_BLOCKS_PRE_IO;
-       return ext4_split_extent(handle, inode, path, map, split_flag, flags);
-}
-
-static int ext4_convert_initialized_extents(handle_t *handle,
-                                           struct inode *inode,
-                                           struct ext4_map_blocks *map,
-                                           struct ext4_ext_path *path)
-{
-       struct ext4_extent *ex;
-       ext4_lblk_t ee_block;
-       unsigned int ee_len;
-       int depth;
-       int err = 0;
-
-       depth = ext_depth(inode);
-       ex = path[depth].p_ext;
-       ee_block = le32_to_cpu(ex->ee_block);
-       ee_len = ext4_ext_get_actual_len(ex);
-
-       ext_debug("%s: inode %lu, logical"
-               "block %llu, max_blocks %u\n", __func__, inode->i_ino,
-                 (unsigned long long)ee_block, ee_len);
-
-       if (ee_block != map->m_lblk || ee_len > map->m_len) {
-               err = ext4_split_convert_extents(handle, inode, map, path,
-                               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
-               if (err < 0)
-                       goto out;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
-               depth = ext_depth(inode);
-               ex = path[depth].p_ext;
-               if (!ex) {
-                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-                                        (unsigned long) map->m_lblk);
-                       err = -EIO;
-                       goto out;
-               }
-       }
-
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
-       /* first mark the extent as unwritten */
-       ext4_ext_mark_unwritten(ex);
-
-       /* note: ext4_ext_correct_indexes() isn't needed here because
-        * borders are not changed
-        */
-       ext4_ext_try_to_merge(handle, inode, path, ex);
-
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
-       ext4_ext_show_leaf(inode, path);
-       return err;
+       return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
 }
 
-
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                                                struct inode *inode,
                                                struct ext4_map_blocks *map,
-                                               struct ext4_ext_path *path)
+                                               struct ext4_ext_path **ppath)
 {
+       struct ext4_ext_path *path = *ppath;
        struct ext4_extent *ex;
        ext4_lblk_t ee_block;
        unsigned int ee_len;
@@ -3761,16 +3716,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                             inode->i_ino, (unsigned long long)ee_block, ee_len,
                             (unsigned long long)map->m_lblk, map->m_len);
 #endif
-               err = ext4_split_convert_extents(handle, inode, map, path,
+               err = ext4_split_convert_extents(handle, inode, map, ppath,
                                                 EXT4_GET_BLOCKS_CONVERT);
                if (err < 0)
-                       goto out;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
+                       return err;
+               path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
                depth = ext_depth(inode);
                ex = path[depth].p_ext;
        }
@@ -3963,12 +3915,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
 }
 
 static int
-ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
-                       struct ext4_map_blocks *map,
-                       struct ext4_ext_path *path, int flags,
-                       unsigned int allocated, ext4_fsblk_t newblock)
+convert_initialized_extent(handle_t *handle, struct inode *inode,
+                          struct ext4_map_blocks *map,
+                          struct ext4_ext_path **ppath, int flags,
+                          unsigned int allocated, ext4_fsblk_t newblock)
 {
-       int ret = 0;
+       struct ext4_ext_path *path = *ppath;
+       struct ext4_extent *ex;
+       ext4_lblk_t ee_block;
+       unsigned int ee_len;
+       int depth;
        int err = 0;
 
        /*
@@ -3978,28 +3934,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
        if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
                map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
 
-       ret = ext4_convert_initialized_extents(handle, inode, map,
-                                               path);
-       if (ret >= 0) {
-               ext4_update_inode_fsync_trans(handle, inode, 1);
-               err = check_eofblocks_fl(handle, inode, map->m_lblk,
-                                        path, map->m_len);
-       } else
-               err = ret;
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+
+       ext_debug("%s: inode %lu, logical"
+               "block %llu, max_blocks %u\n", __func__, inode->i_ino,
+                 (unsigned long long)ee_block, ee_len);
+
+       if (ee_block != map->m_lblk || ee_len > map->m_len) {
+               err = ext4_split_convert_extents(handle, inode, map, ppath,
+                               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+               if (err < 0)
+                       return err;
+               path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
+               depth = ext_depth(inode);
+               ex = path[depth].p_ext;
+               if (!ex) {
+                       EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                        (unsigned long) map->m_lblk);
+                       return -EIO;
+               }
+       }
+
+       err = ext4_ext_get_access(handle, inode, path + depth);
+       if (err)
+               return err;
+       /* first mark the extent as unwritten */
+       ext4_ext_mark_unwritten(ex);
+
+       /* note: ext4_ext_correct_indexes() isn't needed here because
+        * borders are not changed
+        */
+       ext4_ext_try_to_merge(handle, inode, path, ex);
+
+       /* Mark modified extent as dirty */
+       err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+       if (err)
+               return err;
+       ext4_ext_show_leaf(inode, path);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
+       err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
+       if (err)
+               return err;
        map->m_flags |= EXT4_MAP_UNWRITTEN;
        if (allocated > map->m_len)
                allocated = map->m_len;
        map->m_len = allocated;
-
-       return err ? err : allocated;
+       return allocated;
 }
 
 static int
 ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map,
-                       struct ext4_ext_path *path, int flags,
+                       struct ext4_ext_path **ppath, int flags,
                        unsigned int allocated, ext4_fsblk_t newblock)
 {
+       struct ext4_ext_path *path = *ppath;
        int ret = 0;
        int err = 0;
        ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4016,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
 
        /* get_block() before submit the IO, split the extent */
        if (flags & EXT4_GET_BLOCKS_PRE_IO) {
-               ret = ext4_split_convert_extents(handle, inode, map,
-                                        path, flags | EXT4_GET_BLOCKS_CONVERT);
+               ret = ext4_split_convert_extents(handle, inode, map, ppath,
+                                        flags | EXT4_GET_BLOCKS_CONVERT);
                if (ret <= 0)
                        goto out;
                /*
@@ -4040,7 +4035,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        /* IO end_io complete, convert the filled extent to written */
        if (flags & EXT4_GET_BLOCKS_CONVERT) {
                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
-                                                       path);
+                                                          ppath);
                if (ret >= 0) {
                        ext4_update_inode_fsync_trans(handle, inode, 1);
                        err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4073,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
        }
 
        /* buffered write, writepage time, convert*/
-       ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
+       ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
        if (ret >= 0)
                ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
@@ -4279,7 +4274,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* find extent for this block */
-       path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
+       path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
        if (IS_ERR(path)) {
                err = PTR_ERR(path);
                path = NULL;
@@ -4291,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        /*
         * consistent leaf must not be empty;
         * this situation is possible, though, _during_ tree modification;
-        * this is why assert can't be put in ext4_ext_find_extent()
+        * this is why assert can't be put in ext4_find_extent()
         */
        if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
                EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4326,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                         */
                        if ((!ext4_ext_is_unwritten(ex)) &&
                            (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
-                               allocated = ext4_ext_convert_initialized_extent(
-                                               handle, inode, map, path, flags,
-                                               allocated, newblock);
+                               allocated = convert_initialized_extent(
+                                               handle, inode, map, &path,
+                                               flags, allocated, newblock);
                                goto out2;
                        } else if (!ext4_ext_is_unwritten(ex))
                                goto out;
 
                        ret = ext4_ext_handle_unwritten_extents(
-                               handle, inode, map, path, flags,
+                               handle, inode, map, &path, flags,
                                allocated, newblock);
                        if (ret < 0)
                                err = ret;
@@ -4376,7 +4371,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
        /*
         * If we are doing bigalloc, check to see if the extent returned
-        * by ext4_ext_find_extent() implies a cluster we can use.
+        * by ext4_find_extent() implies a cluster we can use.
         */
        if (cluster_offset && ex &&
            get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4446,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ar.flags = 0;
        if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
                ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+               ar.flags |= EXT4_MB_DELALLOC_RESERVED;
        newblock = ext4_mb_new_blocks(handle, &ar, &err);
        if (!newblock)
                goto out2;
@@ -4486,7 +4483,7 @@ got_allocated_blocks:
                err = check_eofblocks_fl(handle, inode, map->m_lblk,
                                         path, ar.len);
        if (!err)
-               err = ext4_ext_insert_extent(handle, inode, path,
+               err = ext4_ext_insert_extent(handle, inode, &path,
                                             &newex, flags);
 
        if (!err && set_unwritten) {
@@ -4619,10 +4616,8 @@ out:
        map->m_pblk = newblock;
        map->m_len = allocated;
 out2:
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
+       ext4_ext_drop_refs(path);
+       kfree(path);
 
        trace_ext4_ext_map_blocks_exit(inode, flags, map,
                                       err ? err : allocated);
@@ -4799,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                max_blocks -= lblk;
 
        flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
-               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+               EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+               EXT4_EX_NOCACHE;
        if (mode & FALLOC_FL_KEEP_SIZE)
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
@@ -4837,15 +4833,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                ext4_inode_block_unlocked_dio(inode);
                inode_dio_wait(inode);
 
+               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+                                            flags, mode);
+               if (ret)
+                       goto out_dio;
                /*
                 * Remove entire range from the extent status tree.
+                *
+                * ext4_es_remove_extent(inode, lblk, max_blocks) is
+                * NOT sufficient.  I'm not sure why this is the case,
+                * but let's be conservative and remove the extent
+                * status tree for the entire inode.  There should be
+                * no outstanding delalloc extents thanks to the
+                * filemap_write_and_wait_range() call above.
                 */
-               ret = ext4_es_remove_extent(inode, lblk, max_blocks);
-               if (ret)
-                       goto out_dio;
-
-               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
-                                            flags, mode);
+               ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
                if (ret)
                        goto out_dio;
        }
@@ -5304,36 +5306,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
        struct ext4_ext_path *path;
        int ret = 0, depth;
        struct ext4_extent *extent;
-       ext4_lblk_t stop_block, current_block;
+       ext4_lblk_t stop_block;
        ext4_lblk_t ex_start, ex_end;
 
        /* Let path point to the last extent */
-       path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+       path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
        if (IS_ERR(path))
                return PTR_ERR(path);
 
        depth = path->p_depth;
        extent = path[depth].p_ext;
-       if (!extent) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-               return ret;
-       }
+       if (!extent)
+               goto out;
 
        stop_block = le32_to_cpu(extent->ee_block) +
                        ext4_ext_get_actual_len(extent);
-       ext4_ext_drop_refs(path);
-       kfree(path);
 
        /* Nothing to shift, if hole is at the end of file */
        if (start >= stop_block)
-               return ret;
+               goto out;
 
        /*
         * Don't start shifting extents until we make sure the hole is big
         * enough to accomodate the shift.
         */
-       path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+       path = ext4_find_extent(inode, start - 1, &path, 0);
        if (IS_ERR(path))
                return PTR_ERR(path);
        depth = path->p_depth;
@@ -5346,8 +5343,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                ex_start = 0;
                ex_end = 0;
        }
-       ext4_ext_drop_refs(path);
-       kfree(path);
 
        if ((start == ex_start && shift > ex_start) ||
            (shift > start - ex_end))
@@ -5355,7 +5350,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 
        /* Its safe to start updating extents */
        while (start < stop_block) {
-               path = ext4_ext_find_extent(inode, start, NULL, 0);
+               path = ext4_find_extent(inode, start, &path, 0);
                if (IS_ERR(path))
                        return PTR_ERR(path);
                depth = path->p_depth;
@@ -5365,27 +5360,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                                         (unsigned long) start);
                        return -EIO;
                }
-
-               current_block = le32_to_cpu(extent->ee_block);
-               if (start > current_block) {
+               if (start > le32_to_cpu(extent->ee_block)) {
                        /* Hole, move to the next extent */
-                       ret = mext_next_extent(inode, path, &extent);
-                       if (ret != 0) {
-                               ext4_ext_drop_refs(path);
-                               kfree(path);
-                               if (ret == 1)
-                                       ret = 0;
-                               break;
+                       if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
+                               path[depth].p_ext++;
+                       } else {
+                               start = ext4_ext_next_allocated_block(path);
+                               continue;
                        }
                }
                ret = ext4_ext_shift_path_extents(path, shift, inode,
                                handle, &start);
-               ext4_ext_drop_refs(path);
-               kfree(path);
                if (ret)
                        break;
        }
-
+out:
+       ext4_ext_drop_refs(path);
+       kfree(path);
        return ret;
 }
 
@@ -5508,3 +5499,199 @@ out_mutex:
        mutex_unlock(&inode->i_mutex);
        return ret;
 }
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1:    First inode
+ * @inode2:    Second inode
+ * @lblk1:     Start block for first inode
+ * @lblk2:     Start block for second inode
+ * @count:     Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp:       Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ *             i_mutex is held for both inodes
+ *             i_data_sem is locked for write for both inodes
+ * Assumptions:
+ *             All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+                    struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+                 ext4_lblk_t count, int unwritten, int *erp)
+{
+       struct ext4_ext_path *path1 = NULL;
+       struct ext4_ext_path *path2 = NULL;
+       int replaced_count = 0;
+
+       BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+       BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+       BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+
+       *erp = ext4_es_remove_extent(inode1, lblk1, count);
+       if (unlikely(*erp))
+               return 0;
+       *erp = ext4_es_remove_extent(inode2, lblk2, count);
+       if (unlikely(*erp))
+               return 0;
+
+       while (count) {
+               struct ext4_extent *ex1, *ex2, tmp_ex;
+               ext4_lblk_t e1_blk, e2_blk;
+               int e1_len, e2_len, len;
+               int split = 0;
+
+               path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+               if (unlikely(IS_ERR(path1))) {
+                       *erp = PTR_ERR(path1);
+                       path1 = NULL;
+               finish:
+                       count = 0;
+                       goto repeat;
+               }
+               path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+               if (unlikely(IS_ERR(path2))) {
+                       *erp = PTR_ERR(path2);
+                       path2 = NULL;
+                       goto finish;
+               }
+               ex1 = path1[path1->p_depth].p_ext;
+               ex2 = path2[path2->p_depth].p_ext;
+               /* Do we have somthing to swap ? */
+               if (unlikely(!ex2 || !ex1))
+                       goto finish;
+
+               e1_blk = le32_to_cpu(ex1->ee_block);
+               e2_blk = le32_to_cpu(ex2->ee_block);
+               e1_len = ext4_ext_get_actual_len(ex1);
+               e2_len = ext4_ext_get_actual_len(ex2);
+
+               /* Hole handling */
+               if (!in_range(lblk1, e1_blk, e1_len) ||
+                   !in_range(lblk2, e2_blk, e2_len)) {
+                       ext4_lblk_t next1, next2;
+
+                       /* if hole after extent, then go to next extent */
+                       next1 = ext4_ext_next_allocated_block(path1);
+                       next2 = ext4_ext_next_allocated_block(path2);
+                       /* If hole before extent, then shift to that extent */
+                       if (e1_blk > lblk1)
+                               next1 = e1_blk;
+                       if (e2_blk > lblk2)
+                               next2 = e1_blk;
+                       /* Do we have something to swap */
+                       if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+                               goto finish;
+                       /* Move to the rightest boundary */
+                       len = next1 - lblk1;
+                       if (len < next2 - lblk2)
+                               len = next2 - lblk2;
+                       if (len > count)
+                               len = count;
+                       lblk1 += len;
+                       lblk2 += len;
+                       count -= len;
+                       goto repeat;
+               }
+
+               /* Prepare left boundary */
+               if (e1_blk < lblk1) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode1,
+                                               &path1, lblk1, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               if (e2_blk < lblk2) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode2,
+                                               &path2,  lblk2, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               /* ext4_split_extent_at() may result in leaf extent split,
+                * path must to be revalidated. */
+               if (split)
+                       goto repeat;
+
+               /* Prepare right boundary */
+               len = count;
+               if (len > e1_blk + e1_len - lblk1)
+                       len = e1_blk + e1_len - lblk1;
+               if (len > e2_blk + e2_len - lblk2)
+                       len = e2_blk + e2_len - lblk2;
+
+               if (len != e1_len) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode1,
+                                               &path1, lblk1 + len, 0);
+                       if (unlikely(*erp))
+                               goto finish;
+               }
+               if (len != e2_len) {
+                       split = 1;
+                       *erp = ext4_force_split_extent_at(handle, inode2,
+                                               &path2, lblk2 + len, 0);
+                       if (*erp)
+                               goto finish;
+               }
+               /* ext4_split_extent_at() may result in leaf extent split,
+                * path must to be revalidated. */
+               if (split)
+                       goto repeat;
+
+               BUG_ON(e2_len != e1_len);
+               *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+               *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+
+               /* Both extents are fully inside boundaries. Swap it now */
+               tmp_ex = *ex1;
+               ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+               ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+               ex1->ee_len = cpu_to_le16(e2_len);
+               ex2->ee_len = cpu_to_le16(e1_len);
+               if (unwritten)
+                       ext4_ext_mark_unwritten(ex2);
+               if (ext4_ext_is_unwritten(&tmp_ex))
+                       ext4_ext_mark_unwritten(ex1);
+
+               ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+               ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+               *erp = ext4_ext_dirty(handle, inode2, path2 +
+                                     path2->p_depth);
+               if (unlikely(*erp))
+                       goto finish;
+               *erp = ext4_ext_dirty(handle, inode1, path1 +
+                                     path1->p_depth);
+               /*
+                * Looks scarry ah..? second inode already points to new blocks,
+                * and it was successfully dirtied. But luckily error may happen
+                * only due to journal error, so full transaction will be
+                * aborted anyway.
+                */
+               if (unlikely(*erp))
+                       goto finish;
+               lblk1 += len;
+               lblk2 += len;
+               replaced_count += len;
+               count -= len;
+
+       repeat:
+               ext4_ext_drop_refs(path1);
+               kfree(path1);
+               ext4_ext_drop_refs(path2);
+               kfree(path2);
+               path1 = path2 = NULL;
+       }
+       return replaced_count;
+}
index 0b7e28e7eaa4303938877743114bce5e367a8c2d..94e7855ae71b03e26559e02cbe6491e9d8be2767 100644 (file)
@@ -11,6 +11,8 @@
  */
 #include <linux/rbtree.h>
 #include <linux/list_sort.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include "ext4.h"
 #include "extents_status.h"
 
@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
         */
        if (!ext4_es_is_delayed(es)) {
                EXT4_I(inode)->i_es_lru_nr++;
-               percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+               percpu_counter_inc(&EXT4_SB(inode->i_sb)->
+                                       s_es_stats.es_stats_lru_cnt);
        }
 
+       EXT4_I(inode)->i_es_all_nr++;
+       percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
        return es;
 }
 
 static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 {
+       EXT4_I(inode)->i_es_all_nr--;
+       percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
        /* Decrease the lru counter when this es is not delayed */
        if (!ext4_es_is_delayed(es)) {
                BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
                EXT4_I(inode)->i_es_lru_nr--;
-               percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+               percpu_counter_dec(&EXT4_SB(inode->i_sb)->
+                                       s_es_stats.es_stats_lru_cnt);
        }
 
        kmem_cache_free(ext4_es_cachep, es);
@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
        unsigned short ee_len;
        int depth, ee_status, es_status;
 
-       path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
+       path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
        if (IS_ERR(path))
                return;
 
@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
                }
        }
 out:
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
+       ext4_ext_drop_refs(path);
+       kfree(path);
 }
 
 static void ext4_es_insert_extent_ind_check(struct inode *inode,
@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
                          struct extent_status *es)
 {
        struct ext4_es_tree *tree;
+       struct ext4_es_stats *stats;
        struct extent_status *es1 = NULL;
        struct rb_node *node;
        int found = 0;
@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
        }
 
 out:
+       stats = &EXT4_SB(inode->i_sb)->s_es_stats;
        if (found) {
                BUG_ON(!es1);
                es->es_lblk = es1->es_lblk;
                es->es_len = es1->es_len;
                es->es_pblk = es1->es_pblk;
+               stats->es_stats_cache_hits++;
+       } else {
+               stats->es_stats_cache_misses++;
        }
 
        read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
                            struct ext4_inode_info *locked_ei)
 {
        struct ext4_inode_info *ei;
+       struct ext4_es_stats *es_stats;
        struct list_head *cur, *tmp;
        LIST_HEAD(skipped);
+       ktime_t start_time;
+       u64 scan_time;
        int nr_shrunk = 0;
        int retried = 0, skip_precached = 1, nr_skipped = 0;
 
+       es_stats = &sbi->s_es_stats;
+       start_time = ktime_get();
        spin_lock(&sbi->s_es_lru_lock);
 
 retry:
@@ -948,7 +966,8 @@ retry:
                 * If we have already reclaimed all extents from extent
                 * status tree, just stop the loop immediately.
                 */
-               if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
+               if (percpu_counter_read_positive(
+                               &es_stats->es_stats_lru_cnt) == 0)
                        break;
 
                ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
@@ -958,7 +977,7 @@ retry:
                 * time.  Normally we try hard to avoid shrinking
                 * precached inodes, but we will as a last resort.
                 */
-               if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
+               if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
                    (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
                                                EXT4_STATE_EXT_PRECACHED))) {
                        nr_skipped++;
@@ -992,7 +1011,7 @@ retry:
        if ((nr_shrunk == 0) && nr_skipped && !retried) {
                retried++;
                list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
-               sbi->s_es_last_sorted = jiffies;
+               es_stats->es_stats_last_sorted = jiffies;
                ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
                                      i_es_lru);
                /*
@@ -1010,6 +1029,22 @@ retry:
        if (locked_ei && nr_shrunk == 0)
                nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
 
+       scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+       if (likely(es_stats->es_stats_scan_time))
+               es_stats->es_stats_scan_time = (scan_time +
+                               es_stats->es_stats_scan_time*3) / 4;
+       else
+               es_stats->es_stats_scan_time = scan_time;
+       if (scan_time > es_stats->es_stats_max_scan_time)
+               es_stats->es_stats_max_scan_time = scan_time;
+       if (likely(es_stats->es_stats_shrunk))
+               es_stats->es_stats_shrunk = (nr_shrunk +
+                               es_stats->es_stats_shrunk*3) / 4;
+       else
+               es_stats->es_stats_shrunk = nr_shrunk;
+
+       trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
+                            nr_skipped, retried);
        return nr_shrunk;
 }
 
@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
        struct ext4_sb_info *sbi;
 
        sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
-       nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
-       trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+       nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+       trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
        return nr;
 }
 
@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
        int nr_to_scan = sc->nr_to_scan;
        int ret, nr_shrunk;
 
-       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
-       trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+       ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+       trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
 
        if (!nr_to_scan)
                return ret;
 
        nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
 
-       trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+       trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
        return nr_shrunk;
 }
 
-void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
 {
+       return *pos ? NULL : SEQ_START_TOKEN;
+}
+
+static void *
+ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       return NULL;
+}
+
+static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
+{
+       struct ext4_sb_info *sbi = seq->private;
+       struct ext4_es_stats *es_stats = &sbi->s_es_stats;
+       struct ext4_inode_info *ei, *max = NULL;
+       unsigned int inode_cnt = 0;
+
+       if (v != SEQ_START_TOKEN)
+               return 0;
+
+       /* here we just find an inode that has the max nr. of objects */
+       spin_lock(&sbi->s_es_lru_lock);
+       list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
+               inode_cnt++;
+               if (max && max->i_es_all_nr < ei->i_es_all_nr)
+                       max = ei;
+               else if (!max)
+                       max = ei;
+       }
+       spin_unlock(&sbi->s_es_lru_lock);
+
+       seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
+                  percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
+                  percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
+       seq_printf(seq, "  %lu/%lu cache hits/misses\n",
+                  es_stats->es_stats_cache_hits,
+                  es_stats->es_stats_cache_misses);
+       if (es_stats->es_stats_last_sorted != 0)
+               seq_printf(seq, "  %u ms last sorted interval\n",
+                          jiffies_to_msecs(jiffies -
+                                           es_stats->es_stats_last_sorted));
+       if (inode_cnt)
+               seq_printf(seq, "  %d inodes on lru list\n", inode_cnt);
+
+       seq_printf(seq, "average:\n  %llu us scan time\n",
+           div_u64(es_stats->es_stats_scan_time, 1000));
+       seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
+       if (inode_cnt)
+               seq_printf(seq,
+                   "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
+                   "  %llu us max scan time\n",
+                   max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
+                   div_u64(es_stats->es_stats_max_scan_time, 1000));
+
+       return 0;
+}
+
+static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v)
+{
+}
+
+static const struct seq_operations ext4_es_seq_shrinker_info_ops = {
+       .start = ext4_es_seq_shrinker_info_start,
+       .next  = ext4_es_seq_shrinker_info_next,
+       .stop  = ext4_es_seq_shrinker_info_stop,
+       .show  = ext4_es_seq_shrinker_info_show,
+};
+
+static int
+ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file)
+{
+       int ret;
+
+       ret = seq_open(file, &ext4_es_seq_shrinker_info_ops);
+       if (!ret) {
+               struct seq_file *m = file->private_data;
+               m->private = PDE_DATA(inode);
+       }
+
+       return ret;
+}
+
+static int
+ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file)
+{
+       return seq_release(inode, file);
+}
+
+static const struct file_operations ext4_es_seq_shrinker_info_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ext4_es_seq_shrinker_info_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = ext4_es_seq_shrinker_info_release,
+};
+
+int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+{
+       int err;
+
        INIT_LIST_HEAD(&sbi->s_es_lru);
        spin_lock_init(&sbi->s_es_lru_lock);
-       sbi->s_es_last_sorted = 0;
+       sbi->s_es_stats.es_stats_last_sorted = 0;
+       sbi->s_es_stats.es_stats_shrunk = 0;
+       sbi->s_es_stats.es_stats_cache_hits = 0;
+       sbi->s_es_stats.es_stats_cache_misses = 0;
+       sbi->s_es_stats.es_stats_scan_time = 0;
+       sbi->s_es_stats.es_stats_max_scan_time = 0;
+       err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
+       if (err)
+               return err;
+       err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
+       if (err)
+               goto err1;
+
        sbi->s_es_shrinker.scan_objects = ext4_es_scan;
        sbi->s_es_shrinker.count_objects = ext4_es_count;
        sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
-       register_shrinker(&sbi->s_es_shrinker);
+       err = register_shrinker(&sbi->s_es_shrinker);
+       if (err)
+               goto err2;
+
+       if (sbi->s_proc)
+               proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
+                                &ext4_es_seq_shrinker_info_fops, sbi);
+
+       return 0;
+
+err2:
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
+err1:
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+       return err;
 }
 
 void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
 {
+       if (sbi->s_proc)
+               remove_proc_entry("es_shrinker_info", sbi->s_proc);
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+       percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
        unregister_shrinker(&sbi->s_es_shrinker);
 }
 
index f1b62a41992059f3cbebd16ee6caaf25ee156283..efd5f970b5013085dec20e421b8ff3ced53246c2 100644 (file)
@@ -64,6 +64,17 @@ struct ext4_es_tree {
        struct extent_status *cache_es; /* recently accessed extent */
 };
 
+struct ext4_es_stats {
+       unsigned long es_stats_last_sorted;
+       unsigned long es_stats_shrunk;
+       unsigned long es_stats_cache_hits;
+       unsigned long es_stats_cache_misses;
+       u64 es_stats_scan_time;
+       u64 es_stats_max_scan_time;
+       struct percpu_counter es_stats_all_cnt;
+       struct percpu_counter es_stats_lru_cnt;
+};
+
 extern int __init ext4_init_es(void);
 extern void ext4_exit_es(void);
 extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
                       (pb & ~ES_MASK));
 }
 
-extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
+extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_lru_add(struct inode *inode);
 extern void ext4_es_lru_del(struct inode *inode);
index 5b87fc36aab863d073de371bd8e9ae0159281917..8012a5daf4019d953d3b3d79d5d2a2b4d41ac59f 100644 (file)
@@ -1011,8 +1011,7 @@ got:
        spin_unlock(&sbi->s_next_gen_lock);
 
        /* Precompute checksum seed for inode metadata */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+       if (ext4_has_metadata_csum(sb)) {
                __u32 csum;
                __le32 inum = cpu_to_le32(inode->i_ino);
                __le32 gen = cpu_to_le32(inode->i_generation);
index e75f840000a02f4fa1313ff027a5b071a6a7abc3..36b369697a131523f26f6a6336eec17b4da3ae4f 100644 (file)
@@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
  *     ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *     as described above and return 0.
  */
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
-                            ext4_lblk_t iblock, int indirect_blks,
-                            int *blks, ext4_fsblk_t goal,
-                            ext4_lblk_t *offsets, Indirect *branch)
+static int ext4_alloc_branch(handle_t *handle,
+                            struct ext4_allocation_request *ar,
+                            int indirect_blks, ext4_lblk_t *offsets,
+                            Indirect *branch)
 {
-       struct ext4_allocation_request  ar;
        struct buffer_head *            bh;
        ext4_fsblk_t                    b, new_blocks[4];
        __le32                          *p;
        int                             i, j, err, len = 1;
 
-       /*
-        * Set up for the direct block allocation
-        */
-       memset(&ar, 0, sizeof(ar));
-       ar.inode = inode;
-       ar.len = *blks;
-       ar.logical = iblock;
-       if (S_ISREG(inode->i_mode))
-               ar.flags = EXT4_MB_HINT_DATA;
-
        for (i = 0; i <= indirect_blks; i++) {
                if (i == indirect_blks) {
-                       ar.goal = goal;
-                       new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
+                       new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
                } else
-                       goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
-                                                       goal, 0, NULL, &err);
+                       ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
+                                       ar->inode, ar->goal,
+                                       ar->flags & EXT4_MB_DELALLOC_RESERVED,
+                                       NULL, &err);
                if (err) {
                        i--;
                        goto failed;
@@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                if (i == 0)
                        continue;
 
-               bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
+               bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
                if (unlikely(!bh)) {
                        err = -ENOMEM;
                        goto failed;
@@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                b = new_blocks[i];
 
                if (i == indirect_blks)
-                       len = ar.len;
+                       len = ar->len;
                for (j = 0; j < len; j++)
                        *p++ = cpu_to_le32(b++);
 
@@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                unlock_buffer(bh);
 
                BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, inode, bh);
+               err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
                if (err)
                        goto failed;
        }
-       *blks = ar.len;
        return 0;
 failed:
        for (; i >= 0; i--) {
@@ -396,10 +385,10 @@ failed:
                 * existing before ext4_alloc_branch() was called.
                 */
                if (i > 0 && i != indirect_blks && branch[i].bh)
-                       ext4_forget(handle, 1, inode, branch[i].bh,
+                       ext4_forget(handle, 1, ar->inode, branch[i].bh,
                                    branch[i].bh->b_blocknr);
-               ext4_free_blocks(handle, inode, NULL, new_blocks[i],
-                                (i == indirect_blks) ? ar.len : 1, 0);
+               ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
+                                (i == indirect_blks) ? ar->len : 1, 0);
        }
        return err;
 }
@@ -419,9 +408,9 @@ failed:
  * inode (->i_blocks, etc.). In case of success we end up with the full
  * chain to new block and return 0.
  */
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
-                             ext4_lblk_t block, Indirect *where, int num,
-                             int blks)
+static int ext4_splice_branch(handle_t *handle,
+                             struct ext4_allocation_request *ar,
+                             Indirect *where, int num)
 {
        int i;
        int err = 0;
@@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
         * Update the host buffer_head or inode to point to more just allocated
         * direct blocks blocks
         */
-       if (num == 0 && blks > 1) {
+       if (num == 0 && ar->len > 1) {
                current_block = le32_to_cpu(where->key) + 1;
-               for (i = 1; i < blks; i++)
+               for (i = 1; i < ar->len; i++)
                        *(where->p + i) = cpu_to_le32(current_block++);
        }
 
@@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
                 */
                jbd_debug(5, "splicing indirect only\n");
                BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, inode, where->bh);
+               err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
                if (err)
                        goto err_out;
        } else {
                /*
                 * OK, we spliced it into the inode itself on a direct block.
                 */
-               ext4_mark_inode_dirty(handle, inode);
+               ext4_mark_inode_dirty(handle, ar->inode);
                jbd_debug(5, "splicing direct\n");
        }
        return err;
@@ -484,11 +473,11 @@ err_out:
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
                 */
-               ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+               ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
-       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
-                        blks, 0);
+       ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
+                        ar->len, 0);
 
        return err;
 }
@@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map,
                        int flags)
 {
+       struct ext4_allocation_request ar;
        int err = -EIO;
        ext4_lblk_t offsets[4];
        Indirect chain[4];
        Indirect *partial;
-       ext4_fsblk_t goal;
        int indirect_blks;
        int blocks_to_boundary = 0;
        int depth;
@@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
                return -ENOSPC;
        }
 
-       goal = ext4_find_goal(inode, map->m_lblk, partial);
+       /* Set up for the direct block allocation */
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.logical = map->m_lblk;
+       if (S_ISREG(inode->i_mode))
+               ar.flags = EXT4_MB_HINT_DATA;
+       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+               ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+
+       ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
 
        /* the number of blocks need to allocate for [d,t]indirect blocks */
        indirect_blks = (chain + depth) - partial - 1;
@@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
         * Next look up the indirect map to count the totoal number of
         * direct blocks to allocate for this branch.
         */
-       count = ext4_blks_to_allocate(partial, indirect_blks,
-                                     map->m_len, blocks_to_boundary);
+       ar.len = ext4_blks_to_allocate(partial, indirect_blks,
+                                      map->m_len, blocks_to_boundary);
+
        /*
         * Block out ext4_truncate while we alter the tree
         */
-       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
-                               &count, goal,
+       err = ext4_alloc_branch(handle, &ar, indirect_blks,
                                offsets + (partial - chain), partial);
 
        /*
@@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
         * may need to return -EAGAIN upwards in the worst case.  --sct
         */
        if (!err)
-               err = ext4_splice_branch(handle, inode, map->m_lblk,
-                                        partial, indirect_blks, count);
+               err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
        if (err)
                goto cleanup;
 
        map->m_flags |= EXT4_MAP_NEW;
 
        ext4_update_inode_fsync_trans(handle, inode, 1);
+       count = ar.len;
 got_it:
        map->m_flags |= EXT4_MAP_MAPPED;
        map->m_pblk = le32_to_cpu(chain[depth-1].key);
index bea662bd0ca6b921b15c1bda0aab950a2fec2c78..3ea62695abce7b40e8d0e4969ad3fc1041358e4a 100644 (file)
@@ -594,6 +594,7 @@ retry:
        if (ret) {
                unlock_page(page);
                page_cache_release(page);
+               page = NULL;
                ext4_orphan_add(handle, inode);
                up_write(&EXT4_I(inode)->xattr_sem);
                sem_held = 0;
@@ -613,7 +614,8 @@ retry:
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
 
-       block_commit_write(page, from, to);
+       if (page)
+               block_commit_write(page, from, to);
 out:
        if (page) {
                unlock_page(page);
@@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
        memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
                inline_size - EXT4_INLINE_DOTDOT_SIZE);
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        inode->i_size = inode->i_sb->s_blocksize;
index 3aa26e9117c440b7145cd4196433cd1568a9b20b..e9777f93cf05a2e9c9776c9a57fa2798b14ab703 100644 (file)
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
 
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_LINUX) ||
-           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+           !ext4_has_metadata_csum(inode->i_sb))
                return 1;
 
        provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
 
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_LINUX) ||
-           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+           !ext4_has_metadata_csum(inode->i_sb))
                return;
 
        csum = ext4_inode_csum(inode, raw, ei);
@@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
-       if (!is_bad_inode(inode))
-               dquot_initialize(inode);
+       if (is_bad_inode(inode))
+               goto no_delete;
+       dquot_initialize(inode);
 
        if (ext4_should_order_data(inode))
                ext4_begin_ordered_truncate(inode, 0);
        truncate_inode_pages_final(&inode->i_data);
 
        WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
-       if (is_bad_inode(inode))
-               goto no_delete;
 
        /*
         * Protect us against freezing - iput() caller didn't have to have any
@@ -590,19 +587,11 @@ found:
        /*
         * New blocks allocate and/or writing to unwritten extent
         * will possibly result in updating i_data, so we take
-        * the write lock of i_data_sem, and call get_blocks()
+        * the write lock of i_data_sem, and call get_block()
         * with create == 1 flag.
         */
        down_write(&EXT4_I(inode)->i_data_sem);
 
-       /*
-        * if the caller is from delayed allocation writeout path
-        * we have already reserved fs blocks for allocation
-        * let the underlying get_block() function know to
-        * avoid double accounting
-        */
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
        /*
         * We need to check for EXT4 here because migrate
         * could have changed the inode type in between
@@ -631,8 +620,6 @@ found:
                        (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
                        ext4_da_update_reserve_space(inode, retval, 1);
        }
-       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-               ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
        if (retval > 0) {
                unsigned int status;
@@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
  * `handle' can be NULL if create is zero
  */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
-                               ext4_lblk_t block, int create, int *errp)
+                               ext4_lblk_t block, int create)
 {
        struct ext4_map_blocks map;
        struct buffer_head *bh;
-       int fatal = 0, err;
+       int err;
 
        J_ASSERT(handle != NULL || create == 0);
 
@@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
        err = ext4_map_blocks(handle, inode, &map,
                              create ? EXT4_GET_BLOCKS_CREATE : 0);
 
-       /* ensure we send some value back into *errp */
-       *errp = 0;
-
-       if (create && err == 0)
-               err = -ENOSPC;  /* should never happen */
+       if (err == 0)
+               return create ? ERR_PTR(-ENOSPC) : NULL;
        if (err < 0)
-               *errp = err;
-       if (err <= 0)
-               return NULL;
+               return ERR_PTR(err);
 
        bh = sb_getblk(inode->i_sb, map.m_pblk);
-       if (unlikely(!bh)) {
-               *errp = -ENOMEM;
-               return NULL;
-       }
+       if (unlikely(!bh))
+               return ERR_PTR(-ENOMEM);
        if (map.m_flags & EXT4_MAP_NEW) {
                J_ASSERT(create != 0);
                J_ASSERT(handle != NULL);
@@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                 */
                lock_buffer(bh);
                BUFFER_TRACE(bh, "call get_create_access");
-               fatal = ext4_journal_get_create_access(handle, bh);
-               if (!fatal && !buffer_uptodate(bh)) {
+               err = ext4_journal_get_create_access(handle, bh);
+               if (unlikely(err)) {
+                       unlock_buffer(bh);
+                       goto errout;
+               }
+               if (!buffer_uptodate(bh)) {
                        memset(bh->b_data, 0, inode->i_sb->s_blocksize);
                        set_buffer_uptodate(bh);
                }
                unlock_buffer(bh);
                BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
                err = ext4_handle_dirty_metadata(handle, inode, bh);
-               if (!fatal)
-                       fatal = err;
-       } else {
+               if (unlikely(err))
+                       goto errout;
+       } else
                BUFFER_TRACE(bh, "not a new buffer");
-       }
-       if (fatal) {
-               *errp = fatal;
-               brelse(bh);
-               bh = NULL;
-       }
        return bh;
+errout:
+       brelse(bh);
+       return ERR_PTR(err);
 }
 
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t block, int create, int *err)
+                              ext4_lblk_t block, int create)
 {
        struct buffer_head *bh;
 
-       bh = ext4_getblk(handle, inode, block, create, err);
-       if (!bh)
+       bh = ext4_getblk(handle, inode, block, create);
+       if (IS_ERR(bh))
                return bh;
-       if (buffer_uptodate(bh))
+       if (!bh || buffer_uptodate(bh))
                return bh;
        ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
        put_bh(bh);
-       *err = -EIO;
-       return NULL;
+       return ERR_PTR(-EIO);
 }
 
 int ext4_walk_page_buffers(handle_t *handle,
@@ -1536,7 +1516,7 @@ out_unlock:
 }
 
 /*
- * This is a special get_blocks_t callback which is used by
+ * This is a special get_block_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
  * reserve space for a single block.
  *
@@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
         * in data loss.  So use reserved blocks to allocate metadata if
         * possible.
         *
-        * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
-        * in question are delalloc blocks.  This affects functions in many
-        * different parts of the allocation call path.  This flag exists
-        * primarily because we don't want to change *many* call functions, so
-        * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
-        * once the inode's allocation semaphore is taken.
+        * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
+        * the blocks in question are delalloc blocks.  This indicates
+        * that the blocks and quotas has already been checked when
+        * the data was copied into the page cache.
         */
        get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
                           EXT4_GET_BLOCKS_METADATA_NOFAIL;
@@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb)
        return 0;
 }
 
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+       if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+               return 1;
+
+       if (pos + len <= 0x7fffffffULL)
+               return 1;
+
+       /* We might need to update the superblock to set LARGE_FILE */
+       return 2;
+}
+
 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
@@ -2565,7 +2557,8 @@ retry_grab:
         * of file which has an already mapped buffer.
         */
 retry_journal:
-       handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+       handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+                               ext4_da_write_credits(inode, pos, len));
        if (IS_ERR(handle)) {
                page_cache_release(page);
                return PTR_ERR(handle);
@@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file,
        if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
                if (ext4_has_inline_data(inode) ||
                    ext4_da_should_update_i_disksize(page, end)) {
-                       down_write(&EXT4_I(inode)->i_data_sem);
-                       if (new_i_size > EXT4_I(inode)->i_disksize)
-                               EXT4_I(inode)->i_disksize = new_i_size;
-                       up_write(&EXT4_I(inode)->i_data_sem);
+                       ext4_update_i_disksize(inode, new_i_size);
                        /* We need to mark inode dirty even if
                         * new_i_size is less that inode->i_size
                         * bu greater than i_disksize.(hint delalloc)
@@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                ei->i_extra_isize = 0;
 
        /* Precompute checksum seed for inode metadata */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+       if (ext4_has_metadata_csum(sb)) {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
                __u32 csum;
                __le32 inum = cpu_to_le32(inode->i_ino);
@@ -4127,6 +4116,13 @@ bad_inode:
        return ERR_PTR(ret);
 }
 
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+       if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+               return ERR_PTR(-EIO);
+       return ext4_iget(sb, ino);
+}
+
 static int ext4_inode_blocks_set(handle_t *handle,
                                struct ext4_inode *raw_inode,
                                struct ext4_inode_info *ei)
@@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle,
        EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
 
-       if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
+       err = ext4_inode_blocks_set(handle, raw_inode, ei);
+       if (err) {
                spin_unlock(&ei->i_raw_lock);
                goto out_brelse;
        }
@@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                                ext4_orphan_del(NULL, inode);
                                goto err_out;
                        }
-               } else
+               } else {
+                       loff_t oldsize = inode->i_size;
+
                        i_size_write(inode, attr->ia_size);
+                       pagecache_isize_extended(inode, oldsize, inode->i_size);
+               }
 
                /*
                 * Blocks are going to be removed from the inode. Wait
index 0f2252ec274d6c3bd0fc47a45a902ad043cd3ddf..bfda18a155922c1224cf12e6630d75c692c38d8f 100644 (file)
@@ -331,8 +331,7 @@ flags_out:
                if (!inode_owner_or_capable(inode))
                        return -EPERM;
 
-               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               if (ext4_has_metadata_csum(inode->i_sb)) {
                        ext4_warning(sb, "Setting inode version is not "
                                     "supported with metadata_csum enabled.");
                        return -ENOTTY;
@@ -532,9 +531,17 @@ group_add_out:
        }
 
        case EXT4_IOC_SWAP_BOOT:
+       {
+               int err;
                if (!(filp->f_mode & FMODE_WRITE))
                        return -EBADF;
-               return swap_inode_boot_loader(sb, inode);
+               err = mnt_want_write_file(filp);
+               if (err)
+                       return err;
+               err = swap_inode_boot_loader(sb, inode);
+               mnt_drop_write_file(filp);
+               return err;
+       }
 
        case EXT4_IOC_RESIZE_FS: {
                ext4_fsblk_t n_blocks_count;
index 748c9136a60a4e5fba2967fac108d298abf110ea..dbfe15c2533c93a299a484afb1ce32a7ca9a57f1 100644 (file)
@@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                         "start %lu, size %lu, fe_logical %lu",
                         (unsigned long) start, (unsigned long) size,
                         (unsigned long) ac->ac_o_ex.fe_logical);
+               BUG();
        }
-       BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
-                       start > ac->ac_o_ex.fe_logical);
        BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
        /* now prepare goal request */
@@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        if (IS_NOQUOTA(ar->inode))
                ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
 
-       /*
-        * For delayed allocation, we could skip the ENOSPC and
-        * EDQUOT check, as blocks and quotas have been already
-        * reserved when data being copied into pagecache.
-        */
-       if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
-               ar->flags |= EXT4_MB_DELALLOC_RESERVED;
-       else {
+       if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
                /* Without delayed allocation we need to verify
                 * there is enough free blocks to do block allocation
                 * and verify allocation doesn't exceed the quota limits.
@@ -4528,8 +4520,7 @@ out:
        if (inquota && ar->len < inquota)
                dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
        if (!ar->len) {
-               if (!ext4_test_inode_state(ar->inode,
-                                          EXT4_STATE_DELALLOC_RESERVED))
+               if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
                        /* release all the reserved blocks if non delalloc */
                        percpu_counter_sub(&sbi->s_dirtyclusters_counter,
                                                reserv_clstrs);
index d3567f27bae7071931bcc4b06470caf69d123d7b..a432634f2e6a7243d4e5f3beaa088b6911281a0d 100644 (file)
@@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
        ext4_ext_store_pblock(&newext, lb->first_pblock);
        /* Locking only for convinience since we are operating on temp inode */
        down_write(&EXT4_I(inode)->i_data_sem);
-       path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
-
+       path = ext4_find_extent(inode, lb->first_block, NULL, 0);
        if (IS_ERR(path)) {
                retval = PTR_ERR(path);
                path = NULL;
@@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
                                goto err_out;
                }
        }
-       retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
+       retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
 err_out:
        up_write((&EXT4_I(inode)->i_data_sem));
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
+       ext4_ext_drop_refs(path);
+       kfree(path);
        lb->first_pblock = 0;
        return retval;
 }
index 32bce844c2e13ce8147ec1977bb51838b040f7fb..8313ca3324ec96a1c3413d0041817a5f1726e4f4 100644 (file)
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
 
 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 {
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return 1;
 
        return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 
 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
 {
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return;
 
        mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
index 671a74b14fd768ef6515811500a37269bad680b8..9f2311bc9c4f3b5fbf2d839a7491da49c174cd4c 100644 (file)
  * @lblock:    logical block number to find an extent path
  * @path:      pointer to an extent path pointer (for output)
  *
- * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
+ * ext4_find_extent wrapper. Return 0 on success, or a negative error value
  * on failure.
  */
 static inline int
 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
-               struct ext4_ext_path **orig_path)
+               struct ext4_ext_path **ppath)
 {
-       int ret = 0;
        struct ext4_ext_path *path;
 
-       path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
+       path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
        if (IS_ERR(path))
-               ret = PTR_ERR(path);
-       else if (path[ext_depth(inode)].p_ext == NULL)
-               ret = -ENODATA;
-       else
-               *orig_path = path;
-
-       return ret;
-}
-
-/**
- * copy_extent_status - Copy the extent's initialization status
- *
- * @src:       an extent for getting initialize status
- * @dest:      an extent to be set the status
- */
-static void
-copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
-{
-       if (ext4_ext_is_unwritten(src))
-               ext4_ext_mark_unwritten(dest);
-       else
-               dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
-}
-
-/**
- * mext_next_extent - Search for the next extent and set it to "extent"
- *
- * @inode:     inode which is searched
- * @path:      this will obtain data for the next extent
- * @extent:    pointer to the next extent we have just gotten
- *
- * Search the next extent in the array of ext4_ext_path structure (@path)
- * and set it to ext4_extent structure (@extent). In addition, the member of
- * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
- * ext4_ext_path structure refers to the last extent, or a negative error
- * value on failure.
- */
-int
-mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
-                     struct ext4_extent **extent)
-{
-       struct ext4_extent_header *eh;
-       int ppos, leaf_ppos = path->p_depth;
-
-       ppos = leaf_ppos;
-       if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
-               /* leaf block */
-               *extent = ++path[ppos].p_ext;
-               path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
-               return 0;
-       }
-
-       while (--ppos >= 0) {
-               if (EXT_LAST_INDEX(path[ppos].p_hdr) >
-                   path[ppos].p_idx) {
-                       int cur_ppos = ppos;
-
-                       /* index block */
-                       path[ppos].p_idx++;
-                       path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
-                       if (path[ppos+1].p_bh)
-                               brelse(path[ppos+1].p_bh);
-                       path[ppos+1].p_bh =
-                               sb_bread(inode->i_sb, path[ppos].p_block);
-                       if (!path[ppos+1].p_bh)
-                               return -EIO;
-                       path[ppos+1].p_hdr =
-                               ext_block_hdr(path[ppos+1].p_bh);
-
-                       /* Halfway index block */
-                       while (++cur_ppos < leaf_ppos) {
-                               path[cur_ppos].p_idx =
-                                       EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
-                               path[cur_ppos].p_block =
-                                       ext4_idx_pblock(path[cur_ppos].p_idx);
-                               if (path[cur_ppos+1].p_bh)
-                                       brelse(path[cur_ppos+1].p_bh);
-                               path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
-                                       path[cur_ppos].p_block);
-                               if (!path[cur_ppos+1].p_bh)
-                                       return -EIO;
-                               path[cur_ppos+1].p_hdr =
-                                       ext_block_hdr(path[cur_ppos+1].p_bh);
-                       }
-
-                       path[leaf_ppos].p_ext = *extent = NULL;
-
-                       eh = path[leaf_ppos].p_hdr;
-                       if (le16_to_cpu(eh->eh_entries) == 0)
-                               /* empty leaf is found */
-                               return -ENODATA;
-
-                       /* leaf block */
-                       path[leaf_ppos].p_ext = *extent =
-                               EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
-                       path[leaf_ppos].p_block =
-                                       ext4_ext_pblock(path[leaf_ppos].p_ext);
-                       return 0;
-               }
+               return PTR_ERR(path);
+       if (path[ext_depth(inode)].p_ext == NULL) {
+               ext4_ext_drop_refs(path);
+               kfree(path);
+               *ppath = NULL;
+               return -ENODATA;
        }
-       /* We found the last extent */
-       return 1;
+       *ppath = path;
+       return 0;
 }
 
 /**
@@ -177,417 +83,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode,
        up_write(&EXT4_I(donor_inode)->i_data_sem);
 }
 
-/**
- * mext_insert_across_blocks - Insert extents across leaf block
- *
- * @handle:            journal handle
- * @orig_inode:                original inode
- * @o_start:           first original extent to be changed
- * @o_end:             last original extent to be changed
- * @start_ext:         first new extent to be inserted
- * @new_ext:           middle of new extent to be inserted
- * @end_ext:           last new extent to be inserted
- *
- * Allocate a new leaf block and insert extents into it. Return 0 on success,
- * or a negative error value on failure.
- */
-static int
-mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
-               struct ext4_extent *o_start, struct ext4_extent *o_end,
-               struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-               struct ext4_extent *end_ext)
-{
-       struct ext4_ext_path *orig_path = NULL;
-       ext4_lblk_t eblock = 0;
-       int new_flag = 0;
-       int end_flag = 0;
-       int err = 0;
-
-       if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
-               if (o_start == o_end) {
-
-                       /*       start_ext   new_ext    end_ext
-                        * donor |---------|-----------|--------|
-                        * orig  |------------------------------|
-                        */
-                       end_flag = 1;
-               } else {
-
-                       /*       start_ext   new_ext   end_ext
-                        * donor |---------|----------|---------|
-                        * orig  |---------------|--------------|
-                        */
-                       o_end->ee_block = end_ext->ee_block;
-                       o_end->ee_len = end_ext->ee_len;
-                       ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-               }
-
-               o_start->ee_len = start_ext->ee_len;
-               eblock = le32_to_cpu(start_ext->ee_block);
-               new_flag = 1;
-
-       } else if (start_ext->ee_len && new_ext->ee_len &&
-                  !end_ext->ee_len && o_start == o_end) {
-
-               /*       start_ext      new_ext
-                * donor |--------------|---------------|
-                * orig  |------------------------------|
-                */
-               o_start->ee_len = start_ext->ee_len;
-               eblock = le32_to_cpu(start_ext->ee_block);
-               new_flag = 1;
-
-       } else if (!start_ext->ee_len && new_ext->ee_len &&
-                  end_ext->ee_len && o_start == o_end) {
-
-               /*        new_ext       end_ext
-                * donor |--------------|---------------|
-                * orig  |------------------------------|
-                */
-               o_end->ee_block = end_ext->ee_block;
-               o_end->ee_len = end_ext->ee_len;
-               ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-
-               /*
-                * Set 0 to the extent block if new_ext was
-                * the first block.
-                */
-               if (new_ext->ee_block)
-                       eblock = le32_to_cpu(new_ext->ee_block);
-
-               new_flag = 1;
-       } else {
-               ext4_debug("ext4 move extent: Unexpected insert case\n");
-               return -EIO;
-       }
-
-       if (new_flag) {
-               err = get_ext_path(orig_inode, eblock, &orig_path);
-               if (err)
-                       goto out;
-
-               if (ext4_ext_insert_extent(handle, orig_inode,
-                                       orig_path, new_ext, 0))
-                       goto out;
-       }
-
-       if (end_flag) {
-               err = get_ext_path(orig_inode,
-                               le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
-               if (err)
-                       goto out;
-
-               if (ext4_ext_insert_extent(handle, orig_inode,
-                                          orig_path, end_ext, 0))
-                       goto out;
-       }
-out:
-       if (orig_path) {
-               ext4_ext_drop_refs(orig_path);
-               kfree(orig_path);
-       }
-
-       return err;
-
-}
-
-/**
- * mext_insert_inside_block - Insert new extent to the extent block
- *
- * @o_start:           first original extent to be moved
- * @o_end:             last original extent to be moved
- * @start_ext:         first new extent to be inserted
- * @new_ext:           middle of new extent to be inserted
- * @end_ext:           last new extent to be inserted
- * @eh:                        extent header of target leaf block
- * @range_to_move:     used to decide how to insert extent
- *
- * Insert extents into the leaf block. The extent (@o_start) is overwritten
- * by inserted extents.
- */
-static void
-mext_insert_inside_block(struct ext4_extent *o_start,
-                             struct ext4_extent *o_end,
-                             struct ext4_extent *start_ext,
-                             struct ext4_extent *new_ext,
-                             struct ext4_extent *end_ext,
-                             struct ext4_extent_header *eh,
-                             int range_to_move)
-{
-       int i = 0;
-       unsigned long len;
-
-       /* Move the existing extents */
-       if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
-               len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
-                       (unsigned long)(o_end + 1);
-               memmove(o_end + 1 + range_to_move, o_end + 1, len);
-       }
-
-       /* Insert start entry */
-       if (start_ext->ee_len)
-               o_start[i++].ee_len = start_ext->ee_len;
-
-       /* Insert new entry */
-       if (new_ext->ee_len) {
-               o_start[i] = *new_ext;
-               ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
-       }
-
-       /* Insert end entry */
-       if (end_ext->ee_len)
-               o_start[i] = *end_ext;
-
-       /* Increment the total entries counter on the extent block */
-       le16_add_cpu(&eh->eh_entries, range_to_move);
-}
-
-/**
- * mext_insert_extents - Insert new extent
- *
- * @handle:    journal handle
- * @orig_inode:        original inode
- * @orig_path: path indicates first extent to be changed
- * @o_start:   first original extent to be changed
- * @o_end:     last original extent to be changed
- * @start_ext: first new extent to be inserted
- * @new_ext:   middle of new extent to be inserted
- * @end_ext:   last new extent to be inserted
- *
- * Call the function to insert extents. If we cannot add more extents into
- * the leaf block, we call mext_insert_across_blocks() to create a
- * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
- * on success, or a negative error value on failure.
- */
-static int
-mext_insert_extents(handle_t *handle, struct inode *orig_inode,
-                        struct ext4_ext_path *orig_path,
-                        struct ext4_extent *o_start,
-                        struct ext4_extent *o_end,
-                        struct ext4_extent *start_ext,
-                        struct ext4_extent *new_ext,
-                        struct ext4_extent *end_ext)
-{
-       struct  ext4_extent_header *eh;
-       unsigned long need_slots, slots_range;
-       int     range_to_move, depth, ret;
-
-       /*
-        * The extents need to be inserted
-        * start_extent + new_extent + end_extent.
-        */
-       need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
-               (new_ext->ee_len ? 1 : 0);
-
-       /* The number of slots between start and end */
-       slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
-               / sizeof(struct ext4_extent);
-
-       /* Range to move the end of extent */
-       range_to_move = need_slots - slots_range;
-       depth = orig_path->p_depth;
-       orig_path += depth;
-       eh = orig_path->p_hdr;
-
-       if (depth) {
-               /* Register to journal */
-               BUFFER_TRACE(orig_path->p_bh, "get_write_access");
-               ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
-               if (ret)
-                       return ret;
-       }
-
-       /* Expansion */
-       if (range_to_move > 0 &&
-               (range_to_move > le16_to_cpu(eh->eh_max)
-                       - le16_to_cpu(eh->eh_entries))) {
-
-               ret = mext_insert_across_blocks(handle, orig_inode, o_start,
-                                       o_end, start_ext, new_ext, end_ext);
-               if (ret < 0)
-                       return ret;
-       } else
-               mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
-                                               end_ext, eh, range_to_move);
-
-       return ext4_ext_dirty(handle, orig_inode, orig_path);
-}
-
-/**
- * mext_leaf_block - Move one leaf extent block into the inode.
- *
- * @handle:            journal handle
- * @orig_inode:                original inode
- * @orig_path:         path indicates first extent to be changed
- * @dext:              donor extent
- * @from:              start offset on the target file
- *
- * In order to insert extents into the leaf block, we must divide the extent
- * in the leaf block into three extents. The one is located to be inserted
- * extents, and the others are located around it.
- *
- * Therefore, this function creates structures to save extents of the leaf
- * block, and inserts extents by calling mext_insert_extents() with
- * created extents. Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_leaf_block(handle_t *handle, struct inode *orig_inode,
-                    struct ext4_ext_path *orig_path, struct ext4_extent *dext,
-                    ext4_lblk_t *from)
-{
-       struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
-       struct ext4_extent new_ext, start_ext, end_ext;
-       ext4_lblk_t new_ext_end;
-       int oext_alen, new_ext_alen, end_ext_alen;
-       int depth = ext_depth(orig_inode);
-       int ret;
-
-       start_ext.ee_block = end_ext.ee_block = 0;
-       o_start = o_end = oext = orig_path[depth].p_ext;
-       oext_alen = ext4_ext_get_actual_len(oext);
-       start_ext.ee_len = end_ext.ee_len = 0;
-
-       new_ext.ee_block = cpu_to_le32(*from);
-       ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
-       new_ext.ee_len = dext->ee_len;
-       new_ext_alen = ext4_ext_get_actual_len(&new_ext);
-       new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-
-       /*
-        * Case: original extent is first
-        * oext      |--------|
-        * new_ext      |--|
-        * start_ext |--|
-        */
-       if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
-               le32_to_cpu(new_ext.ee_block) <
-               le32_to_cpu(oext->ee_block) + oext_alen) {
-               start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
-                                              le32_to_cpu(oext->ee_block));
-               start_ext.ee_block = oext->ee_block;
-               copy_extent_status(oext, &start_ext);
-       } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
-               prev_ext = oext - 1;
-               /*
-                * We can merge new_ext into previous extent,
-                * if these are contiguous and same extent type.
-                */
-               if (ext4_can_extents_be_merged(orig_inode, prev_ext,
-                                              &new_ext)) {
-                       o_start = prev_ext;
-                       start_ext.ee_len = cpu_to_le16(
-                               ext4_ext_get_actual_len(prev_ext) +
-                               new_ext_alen);
-                       start_ext.ee_block = oext->ee_block;
-                       copy_extent_status(prev_ext, &start_ext);
-                       new_ext.ee_len = 0;
-               }
-       }
-
-       /*
-        * Case: new_ext_end must be less than oext
-        * oext      |-----------|
-        * new_ext       |-------|
-        */
-       if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
-               EXT4_ERROR_INODE(orig_inode,
-                       "new_ext_end(%u) should be less than or equal to "
-                       "oext->ee_block(%u) + oext_alen(%d) - 1",
-                       new_ext_end, le32_to_cpu(oext->ee_block),
-                       oext_alen);
-               ret = -EIO;
-               goto out;
-       }
-
-       /*
-        * Case: new_ext is smaller than original extent
-        * oext    |---------------|
-        * new_ext |-----------|
-        * end_ext             |---|
-        */
-       if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
-               new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
-               end_ext.ee_len =
-                       cpu_to_le16(le32_to_cpu(oext->ee_block) +
-                       oext_alen - 1 - new_ext_end);
-               copy_extent_status(oext, &end_ext);
-               end_ext_alen = ext4_ext_get_actual_len(&end_ext);
-               ext4_ext_store_pblock(&end_ext,
-                       (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
-               end_ext.ee_block =
-                       cpu_to_le32(le32_to_cpu(o_end->ee_block) +
-                       oext_alen - end_ext_alen);
-       }
-
-       ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
-                               o_end, &start_ext, &new_ext, &end_ext);
-out:
-       return ret;
-}
-
-/**
- * mext_calc_swap_extents - Calculate extents for extent swapping.
- *
- * @tmp_dext:          the extent that will belong to the original inode
- * @tmp_oext:          the extent that will belong to the donor inode
- * @orig_off:          block offset of original inode
- * @donor_off:         block offset of donor inode
- * @max_count:         the maximum length of extents
- *
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_calc_swap_extents(struct ext4_extent *tmp_dext,
-                             struct ext4_extent *tmp_oext,
-                             ext4_lblk_t orig_off, ext4_lblk_t donor_off,
-                             ext4_lblk_t max_count)
-{
-       ext4_lblk_t diff, orig_diff;
-       struct ext4_extent dext_old, oext_old;
-
-       BUG_ON(orig_off != donor_off);
-
-       /* original and donor extents have to cover the same block offset */
-       if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
-           le32_to_cpu(tmp_oext->ee_block) +
-                       ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
-               return -ENODATA;
-
-       if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
-           le32_to_cpu(tmp_dext->ee_block) +
-                       ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
-               return -ENODATA;
-
-       dext_old = *tmp_dext;
-       oext_old = *tmp_oext;
-
-       /* When tmp_dext is too large, pick up the target range. */
-       diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
-
-       ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
-       le32_add_cpu(&tmp_dext->ee_block, diff);
-       le16_add_cpu(&tmp_dext->ee_len, -diff);
-
-       if (max_count < ext4_ext_get_actual_len(tmp_dext))
-               tmp_dext->ee_len = cpu_to_le16(max_count);
-
-       orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
-       ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
-
-       /* Adjust extent length if donor extent is larger than orig */
-       if (ext4_ext_get_actual_len(tmp_dext) >
-           ext4_ext_get_actual_len(tmp_oext) - orig_diff)
-               tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
-                                               orig_diff);
-
-       tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
-
-       copy_extent_status(&oext_old, tmp_dext);
-       copy_extent_status(&dext_old, tmp_oext);
-
-       return 0;
-}
-
 /**
  * mext_check_coverage - Check that all extents in range has the same type
  *
@@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
        }
        ret = 1;
 out:
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
+       ext4_ext_drop_refs(path);
+       kfree(path);
        return ret;
 }
 
-/**
- * mext_replace_branches - Replace original extents with new extents
- *
- * @handle:            journal handle
- * @orig_inode:                original inode
- * @donor_inode:       donor inode
- * @from:              block offset of orig_inode
- * @count:             block count to be replaced
- * @err:               pointer to save return value
- *
- * Replace original inode extents and donor inode extents page by page.
- * We implement this replacement in the following three steps:
- * 1. Save the block information of original and donor inodes into
- *    dummy extents.
- * 2. Change the block information of original inode to point at the
- *    donor inode blocks.
- * 3. Change the block information of donor inode to point at the saved
- *    original inode blocks in the dummy extents.
- *
- * Return replaced block count.
- */
-static int
-mext_replace_branches(handle_t *handle, struct inode *orig_inode,
-                          struct inode *donor_inode, ext4_lblk_t from,
-                          ext4_lblk_t count, int *err)
-{
-       struct ext4_ext_path *orig_path = NULL;
-       struct ext4_ext_path *donor_path = NULL;
-       struct ext4_extent *oext, *dext;
-       struct ext4_extent tmp_dext, tmp_oext;
-       ext4_lblk_t orig_off = from, donor_off = from;
-       int depth;
-       int replaced_count = 0;
-       int dext_alen;
-
-       *err = ext4_es_remove_extent(orig_inode, from, count);
-       if (*err)
-               goto out;
-
-       *err = ext4_es_remove_extent(donor_inode, from, count);
-       if (*err)
-               goto out;
-
-       /* Get the original extent for the block "orig_off" */
-       *err = get_ext_path(orig_inode, orig_off, &orig_path);
-       if (*err)
-               goto out;
-
-       /* Get the donor extent for the head */
-       *err = get_ext_path(donor_inode, donor_off, &donor_path);
-       if (*err)
-               goto out;
-       depth = ext_depth(orig_inode);
-       oext = orig_path[depth].p_ext;
-       tmp_oext = *oext;
-
-       depth = ext_depth(donor_inode);
-       dext = donor_path[depth].p_ext;
-       if (unlikely(!dext))
-               goto missing_donor_extent;
-       tmp_dext = *dext;
-
-       *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
-                                     donor_off, count);
-       if (*err)
-               goto out;
-
-       /* Loop for the donor extents */
-       while (1) {
-               /* The extent for donor must be found. */
-               if (unlikely(!dext)) {
-               missing_donor_extent:
-                       EXT4_ERROR_INODE(donor_inode,
-                                  "The extent for donor must be found");
-                       *err = -EIO;
-                       goto out;
-               } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
-                       EXT4_ERROR_INODE(donor_inode,
-                               "Donor offset(%u) and the first block of donor "
-                               "extent(%u) should be equal",
-                               donor_off,
-                               le32_to_cpu(tmp_dext.ee_block));
-                       *err = -EIO;
-                       goto out;
-               }
-
-               /* Set donor extent to orig extent */
-               *err = mext_leaf_block(handle, orig_inode,
-                                          orig_path, &tmp_dext, &orig_off);
-               if (*err)
-                       goto out;
-
-               /* Set orig extent to donor extent */
-               *err = mext_leaf_block(handle, donor_inode,
-                                          donor_path, &tmp_oext, &donor_off);
-               if (*err)
-                       goto out;
-
-               dext_alen = ext4_ext_get_actual_len(&tmp_dext);
-               replaced_count += dext_alen;
-               donor_off += dext_alen;
-               orig_off += dext_alen;
-
-               BUG_ON(replaced_count > count);
-               /* Already moved the expected blocks */
-               if (replaced_count >= count)
-                       break;
-
-               if (orig_path)
-                       ext4_ext_drop_refs(orig_path);
-               *err = get_ext_path(orig_inode, orig_off, &orig_path);
-               if (*err)
-                       goto out;
-               depth = ext_depth(orig_inode);
-               oext = orig_path[depth].p_ext;
-               tmp_oext = *oext;
-
-               if (donor_path)
-                       ext4_ext_drop_refs(donor_path);
-               *err = get_ext_path(donor_inode, donor_off, &donor_path);
-               if (*err)
-                       goto out;
-               depth = ext_depth(donor_inode);
-               dext = donor_path[depth].p_ext;
-               tmp_dext = *dext;
-
-               *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
-                                          donor_off, count - replaced_count);
-               if (*err)
-                       goto out;
-       }
-
-out:
-       if (orig_path) {
-               ext4_ext_drop_refs(orig_path);
-               kfree(orig_path);
-       }
-       if (donor_path) {
-               ext4_ext_drop_refs(donor_path);
-               kfree(donor_path);
-       }
-
-       return replaced_count;
-}
-
 /**
  * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
  *
  * @inode1:    the inode structure
  * @inode2:    the inode structure
- * @index:     page index
+ * @index1:    page index
+ * @index2:    page index
  * @page:      result page vector
  *
  * Grab two locked pages for inode's by inode order
  */
 static int
 mext_page_double_lock(struct inode *inode1, struct inode *inode2,
-                     pgoff_t index, struct page *page[2])
+                     pgoff_t index1, pgoff_t index2, struct page *page[2])
 {
        struct address_space *mapping[2];
        unsigned fl = AOP_FLAG_NOFS;
@@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
                mapping[0] = inode1->i_mapping;
                mapping[1] = inode2->i_mapping;
        } else {
+               pgoff_t tmp = index1;
+               index1 = index2;
+               index2 = tmp;
                mapping[0] = inode2->i_mapping;
                mapping[1] = inode1->i_mapping;
        }
 
-       page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
+       page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
        if (!page[0])
                return -ENOMEM;
 
-       page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
+       page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
        if (!page[1]) {
                unlock_page(page[0]);
                page_cache_release(page[0]);
@@ -893,25 +245,27 @@ out:
  * @o_filp:                    file structure of original file
  * @donor_inode:               donor inode
  * @orig_page_offset:          page index on original file
+ * @donor_page_offset:         page index on donor file
  * @data_offset_in_page:       block index where data swapping starts
  * @block_len_in_page:         the number of blocks to be swapped
  * @unwritten:                 orig extent is unwritten or not
  * @err:                       pointer to save return value
  *
  * Save the data in original inode blocks and replace original inode extents
- * with donor inode extents by calling mext_replace_branches().
+ * with donor inode extents by calling ext4_swap_extents().
  * Finally, write out the saved data in new original inode blocks. Return
  * replaced block count.
  */
 static int
 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
-                 pgoff_t orig_page_offset, int data_offset_in_page,
-                 int block_len_in_page, int unwritten, int *err)
+                    pgoff_t orig_page_offset, pgoff_t donor_page_offset,
+                    int data_offset_in_page,
+                    int block_len_in_page, int unwritten, int *err)
 {
        struct inode *orig_inode = file_inode(o_filp);
        struct page *pagep[2] = {NULL, NULL};
        handle_t *handle;
-       ext4_lblk_t orig_blk_offset;
+       ext4_lblk_t orig_blk_offset, donor_blk_offset;
        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
        unsigned int w_flags = 0;
        unsigned int tmp_data_size, data_size, replaced_size;
@@ -939,6 +293,9 @@ again:
        orig_blk_offset = orig_page_offset * blocks_per_page +
                data_offset_in_page;
 
+       donor_blk_offset = donor_page_offset * blocks_per_page +
+               data_offset_in_page;
+
        /* Calculate data_size */
        if ((orig_blk_offset + block_len_in_page - 1) ==
            ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
@@ -959,7 +316,7 @@ again:
        replaced_size = data_size;
 
        *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
-                                    pagep);
+                                    donor_page_offset, pagep);
        if (unlikely(*err < 0))
                goto stop_journal;
        /*
@@ -978,7 +335,7 @@ again:
                if (*err)
                        goto drop_data_sem;
 
-               unwritten &= mext_check_coverage(donor_inode, orig_blk_offset,
+               unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
                                                 block_len_in_page, 1, err);
                if (*err)
                        goto drop_data_sem;
@@ -994,9 +351,10 @@ again:
                        *err = -EBUSY;
                        goto drop_data_sem;
                }
-               replaced_count = mext_replace_branches(handle, orig_inode,
-                                               donor_inode, orig_blk_offset,
-                                               block_len_in_page, err);
+               replaced_count = ext4_swap_extents(handle, orig_inode,
+                                                  donor_inode, orig_blk_offset,
+                                                  donor_blk_offset,
+                                                  block_len_in_page, 1, err);
        drop_data_sem:
                ext4_double_up_write_data_sem(orig_inode, donor_inode);
                goto unlock_pages;
@@ -1014,9 +372,9 @@ data_copy:
                goto unlock_pages;
        }
        ext4_double_down_write_data_sem(orig_inode, donor_inode);
-       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
-                                              orig_blk_offset,
-                                              block_len_in_page, err);
+       replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
+                                              orig_blk_offset, donor_blk_offset,
+                                          block_len_in_page, 1, err);
        ext4_double_up_write_data_sem(orig_inode, donor_inode);
        if (*err) {
                if (replaced_count) {
@@ -1061,9 +419,9 @@ repair_branches:
         * Try to swap extents to it's original places
         */
        ext4_double_down_write_data_sem(orig_inode, donor_inode);
-       replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
-                                              orig_blk_offset,
-                                              block_len_in_page, &err2);
+       replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
+                                              orig_blk_offset, donor_blk_offset,
+                                          block_len_in_page, 0, &err2);
        ext4_double_up_write_data_sem(orig_inode, donor_inode);
        if (replaced_count != block_len_in_page) {
                EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode,
                     struct inode *donor_inode, __u64 orig_start,
                     __u64 donor_start, __u64 *len)
 {
-       ext4_lblk_t orig_blocks, donor_blocks;
+       __u64 orig_eof, donor_eof;
        unsigned int blkbits = orig_inode->i_blkbits;
        unsigned int blocksize = 1 << blkbits;
 
+       orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
+       donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
+
+
        if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
                ext4_debug("ext4 move extent: suid or sgid is set"
                           " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode,
                ext4_debug("ext4 move extent: The argument files should "
                        "not be swapfile [ino:orig %lu, donor %lu]\n",
                        orig_inode->i_ino, donor_inode->i_ino);
-               return -EINVAL;
+               return -EBUSY;
        }
 
        /* Ext4 move extent supports only extent based file */
@@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode,
        }
 
        /* Start offset should be same */
-       if (orig_start != donor_start) {
+       if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
+           (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
                ext4_debug("ext4 move extent: orig and donor's start "
-                       "offset are not same [ino:orig %lu, donor %lu]\n",
+                       "offset are not alligned [ino:orig %lu, donor %lu]\n",
                        orig_inode->i_ino, donor_inode->i_ino);
                return -EINVAL;
        }
 
        if ((orig_start >= EXT_MAX_BLOCKS) ||
+           (donor_start >= EXT_MAX_BLOCKS) ||
            (*len > EXT_MAX_BLOCKS) ||
+           (donor_start + *len >= EXT_MAX_BLOCKS) ||
            (orig_start + *len >= EXT_MAX_BLOCKS))  {
                ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
                        "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
                        orig_inode->i_ino, donor_inode->i_ino);
                return -EINVAL;
        }
-
-       if (orig_inode->i_size > donor_inode->i_size) {
-               donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
-               /* TODO: eliminate this artificial restriction */
-               if (orig_start >= donor_blocks) {
-                       ext4_debug("ext4 move extent: orig start offset "
-                       "[%llu] should be less than donor file blocks "
-                       "[%u] [ino:orig %lu, donor %lu]\n",
-                       orig_start, donor_blocks,
-                       orig_inode->i_ino, donor_inode->i_ino);
-                       return -EINVAL;
-               }
-
-               /* TODO: eliminate this artificial restriction */
-               if (orig_start + *len > donor_blocks) {
-                       ext4_debug("ext4 move extent: End offset [%llu] should "
-                               "be less than donor file blocks [%u]."
-                               "So adjust length from %llu to %llu "
-                               "[ino:orig %lu, donor %lu]\n",
-                               orig_start + *len, donor_blocks,
-                               *len, donor_blocks - orig_start,
-                               orig_inode->i_ino, donor_inode->i_ino);
-                       *len = donor_blocks - orig_start;
-               }
-       } else {
-               orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
-               if (orig_start >= orig_blocks) {
-                       ext4_debug("ext4 move extent: start offset [%llu] "
-                               "should be less than original file blocks "
-                               "[%u] [ino:orig %lu, donor %lu]\n",
-                                orig_start, orig_blocks,
-                               orig_inode->i_ino, donor_inode->i_ino);
-                       return -EINVAL;
-               }
-
-               if (orig_start + *len > orig_blocks) {
-                       ext4_debug("ext4 move extent: Adjust length "
-                               "from %llu to %llu. Because it should be "
-                               "less than original file blocks "
-                               "[ino:orig %lu, donor %lu]\n",
-                               *len, orig_blocks - orig_start,
-                               orig_inode->i_ino, donor_inode->i_ino);
-                       *len = orig_blocks - orig_start;
-               }
-       }
-
+       if (orig_eof < orig_start + *len - 1)
+               *len = orig_eof - orig_start;
+       if (donor_eof < donor_start + *len - 1)
+               *len = donor_eof - donor_start;
        if (!*len) {
                ext4_debug("ext4 move extent: len should not be 0 "
                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode,
  *
  * @o_filp:            file structure of the original file
  * @d_filp:            file structure of the donor file
- * @orig_start:                start offset in block for orig
- * @donor_start:       start offset in block for donor
+ * @orig_blk:          start offset in block for orig
+ * @donor_blk:         start offset in block for donor
  * @len:               the number of blocks to be moved
  * @moved_len:         moved block length
  *
  * This function returns 0 and moved block length is set in moved_len
  * if succeed, otherwise returns error value.
  *
- * Note: ext4_move_extents() proceeds the following order.
- * 1:ext4_move_extents() calculates the last block number of moving extent
- *   function by the start block number (orig_start) and the number of blocks
- *   to be moved (len) specified as arguments.
- *   If the {orig, donor}_start points a hole, the extent's start offset
- *   pointed by ext_cur (current extent), holecheck_path, orig_path are set
- *   after hole behind.
- * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
- *   or the ext_cur exceeds the block_end which is last logical block number.
- * 3:To get the length of continues area, call mext_next_extent()
- *   specified with the ext_cur (initial value is holecheck_path) re-cursive,
- *   until find un-continuous extent, the start logical block number exceeds
- *   the block_end or the extent points to the last extent.
- * 4:Exchange the original inode data with donor inode data
- *   from orig_page_offset to seq_end_page.
- *   The start indexes of data are specified as arguments.
- *   That of the original inode is orig_page_offset,
- *   and the donor inode is also orig_page_offset
- *   (To easily handle blocksize != pagesize case, the offset for the
- *   donor inode is block unit).
- * 5:Update holecheck_path and orig_path to points a next proceeding extent,
- *   then returns to step 2.
- * 6:Release holecheck_path, orig_path and set the len to moved_len
- *   which shows the number of moved blocks.
- *   The moved_len is useful for the command to calculate the file offset
- *   for starting next move extent ioctl.
- * 7:Return 0 on success, or a negative error value on failure.
  */
 int
-ext4_move_extents(struct file *o_filp, struct file *d_filp,
-                __u64 orig_start, __u64 donor_start, __u64 len,
-                __u64 *moved_len)
+ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
+                 __u64 donor_blk, __u64 len, __u64 *moved_len)
 {
        struct inode *orig_inode = file_inode(o_filp);
        struct inode *donor_inode = file_inode(d_filp);
-       struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
-       struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
-       ext4_lblk_t block_start = orig_start;
-       ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
-       ext4_lblk_t rest_blocks;
-       pgoff_t orig_page_offset = 0, seq_end_page;
-       int ret, depth, last_extent = 0;
+       struct ext4_ext_path *path = NULL;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
-       int data_offset_in_page;
-       int block_len_in_page;
-       int unwritten;
+       ext4_lblk_t o_end, o_start = orig_blk;
+       ext4_lblk_t d_start = donor_blk;
+       int ret;
 
        if (orig_inode->i_sb != donor_inode->i_sb) {
                ext4_debug("ext4 move extent: The argument files "
@@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
        /* Protect extent tree against block allocations via delalloc */
        ext4_double_down_write_data_sem(orig_inode, donor_inode);
        /* Check the filesystem environment whether move_extent can be done */
-       ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
-                                   donor_start, &len);
+       ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
+                                   donor_blk, &len);
        if (ret)
                goto out;
+       o_end = o_start + len;
 
-       file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
-       block_end = block_start + len - 1;
-       if (file_end < block_end)
-               len -= block_end - file_end;
+       while (o_start < o_end) {
+               struct ext4_extent *ex;
+               ext4_lblk_t cur_blk, next_blk;
+               pgoff_t orig_page_index, donor_page_index;
+               int offset_in_page;
+               int unwritten, cur_len;
 
-       ret = get_ext_path(orig_inode, block_start, &orig_path);
-       if (ret)
-               goto out;
-
-       /* Get path structure to check the hole */
-       ret = get_ext_path(orig_inode, block_start, &holecheck_path);
-       if (ret)
-               goto out;
-
-       depth = ext_depth(orig_inode);
-       ext_cur = holecheck_path[depth].p_ext;
-
-       /*
-        * Get proper starting location of block replacement if block_start was
-        * within the hole.
-        */
-       if (le32_to_cpu(ext_cur->ee_block) +
-               ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
-               /*
-                * The hole exists between extents or the tail of
-                * original file.
-                */
-               last_extent = mext_next_extent(orig_inode,
-                                       holecheck_path, &ext_cur);
-               if (last_extent < 0) {
-                       ret = last_extent;
-                       goto out;
-               }
-               last_extent = mext_next_extent(orig_inode, orig_path,
-                                                       &ext_dummy);
-               if (last_extent < 0) {
-                       ret = last_extent;
+               ret = get_ext_path(orig_inode, o_start, &path);
+               if (ret)
                        goto out;
-               }
-               seq_start = le32_to_cpu(ext_cur->ee_block);
-       } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
-               /* The hole exists at the beginning of original file. */
-               seq_start = le32_to_cpu(ext_cur->ee_block);
-       else
-               seq_start = block_start;
-
-       /* No blocks within the specified range. */
-       if (le32_to_cpu(ext_cur->ee_block) > block_end) {
-               ext4_debug("ext4 move extent: The specified range of file "
-                                                       "may be the hole\n");
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* Adjust start blocks */
-       add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
-                        ext4_ext_get_actual_len(ext_cur), block_end + 1) -
-                    max(le32_to_cpu(ext_cur->ee_block), block_start);
-
-       while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
-               seq_blocks += add_blocks;
-
-               /* Adjust tail blocks */
-               if (seq_start + seq_blocks - 1 > block_end)
-                       seq_blocks = block_end - seq_start + 1;
-
-               ext_prev = ext_cur;
-               last_extent = mext_next_extent(orig_inode, holecheck_path,
-                                               &ext_cur);
-               if (last_extent < 0) {
-                       ret = last_extent;
-                       break;
-               }
-               add_blocks = ext4_ext_get_actual_len(ext_cur);
-
-               /*
-                * Extend the length of contiguous block (seq_blocks)
-                * if extents are contiguous.
-                */
-               if (ext4_can_extents_be_merged(orig_inode,
-                                              ext_prev, ext_cur) &&
-                   block_end >= le32_to_cpu(ext_cur->ee_block) &&
-                   !last_extent)
+               ex = path[path->p_depth].p_ext;
+               next_blk = ext4_ext_next_allocated_block(path);
+               cur_blk = le32_to_cpu(ex->ee_block);
+               cur_len = ext4_ext_get_actual_len(ex);
+               /* Check hole before the start pos */
+               if (cur_blk + cur_len - 1 < o_start) {
+                       if (next_blk == EXT_MAX_BLOCKS) {
+                               o_start = o_end;
+                               ret = -ENODATA;
+                               goto out;
+                       }
+                       d_start += next_blk - o_start;
+                       o_start = next_blk;
                        continue;
-
-               /* Is original extent is unwritten */
-               unwritten = ext4_ext_is_unwritten(ext_prev);
-
-               data_offset_in_page = seq_start % blocks_per_page;
-
-               /*
-                * Calculate data blocks count that should be swapped
-                * at the first page.
-                */
-               if (data_offset_in_page + seq_blocks > blocks_per_page) {
-                       /* Swapped blocks are across pages */
-                       block_len_in_page =
-                                       blocks_per_page - data_offset_in_page;
-               } else {
-                       /* Swapped blocks are in a page */
-                       block_len_in_page = seq_blocks;
+               /* Check hole after the start pos */
+               } else if (cur_blk > o_start) {
+                       /* Skip hole */
+                       d_start += cur_blk - o_start;
+                       o_start = cur_blk;
+                       /* Extent inside requested range ?*/
+                       if (cur_blk >= o_end)
+                               goto out;
+               } else { /* in_range(o_start, o_blk, o_len) */
+                       cur_len += cur_blk - o_start;
                }
-
-               orig_page_offset = seq_start >>
-                               (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
-               seq_end_page = (seq_start + seq_blocks - 1) >>
-                               (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
-               seq_start = le32_to_cpu(ext_cur->ee_block);
-               rest_blocks = seq_blocks;
-
+               unwritten = ext4_ext_is_unwritten(ex);
+               if (o_end - o_start < cur_len)
+                       cur_len = o_end - o_start;
+
+               orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+                                              orig_inode->i_blkbits);
+               donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+                                              donor_inode->i_blkbits);
+               offset_in_page = o_start % blocks_per_page;
+               if (cur_len > blocks_per_page- offset_in_page)
+                       cur_len = blocks_per_page - offset_in_page;
                /*
                 * Up semaphore to avoid following problems:
                 * a. transaction deadlock among ext4_journal_start,
@@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                 *    in move_extent_per_page
                 */
                ext4_double_up_write_data_sem(orig_inode, donor_inode);
-
-               while (orig_page_offset <= seq_end_page) {
-
-                       /* Swap original branches with new branches */
-                       block_len_in_page = move_extent_per_page(
-                                               o_filp, donor_inode,
-                                               orig_page_offset,
-                                               data_offset_in_page,
-                                               block_len_in_page,
-                                               unwritten, &ret);
-
-                       /* Count how many blocks we have exchanged */
-                       *moved_len += block_len_in_page;
-                       if (ret < 0)
-                               break;
-                       if (*moved_len > len) {
-                               EXT4_ERROR_INODE(orig_inode,
-                                       "We replaced blocks too much! "
-                                       "sum of replaced: %llu requested: %llu",
-                                       *moved_len, len);
-                               ret = -EIO;
-                               break;
-                       }
-
-                       orig_page_offset++;
-                       data_offset_in_page = 0;
-                       rest_blocks -= block_len_in_page;
-                       if (rest_blocks > blocks_per_page)
-                               block_len_in_page = blocks_per_page;
-                       else
-                               block_len_in_page = rest_blocks;
-               }
-
+               /* Swap original branches with new branches */
+               move_extent_per_page(o_filp, donor_inode,
+                                    orig_page_index, donor_page_index,
+                                    offset_in_page, cur_len,
+                                    unwritten, &ret);
                ext4_double_down_write_data_sem(orig_inode, donor_inode);
                if (ret < 0)
                        break;
-
-               /* Decrease buffer counter */
-               if (holecheck_path)
-                       ext4_ext_drop_refs(holecheck_path);
-               ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
-               if (ret)
-                       break;
-               depth = holecheck_path->p_depth;
-
-               /* Decrease buffer counter */
-               if (orig_path)
-                       ext4_ext_drop_refs(orig_path);
-               ret = get_ext_path(orig_inode, seq_start, &orig_path);
-               if (ret)
-                       break;
-
-               ext_cur = holecheck_path[depth].p_ext;
-               add_blocks = ext4_ext_get_actual_len(ext_cur);
-               seq_blocks = 0;
-
+               o_start += cur_len;
+               d_start += cur_len;
        }
+       *moved_len = o_start - orig_blk;
+       if (*moved_len > len)
+               *moved_len = len;
+
 out:
        if (*moved_len) {
                ext4_discard_preallocations(orig_inode);
                ext4_discard_preallocations(donor_inode);
        }
 
-       if (orig_path) {
-               ext4_ext_drop_refs(orig_path);
-               kfree(orig_path);
-       }
-       if (holecheck_path) {
-               ext4_ext_drop_refs(holecheck_path);
-               kfree(holecheck_path);
-       }
+       ext4_ext_drop_refs(path);
+       kfree(path);
        ext4_double_up_write_data_sem(orig_inode, donor_inode);
        ext4_inode_resume_unlocked_dio(orig_inode);
        ext4_inode_resume_unlocked_dio(donor_inode);
index 603e4ebbd0ac1a8eb33f27f0e06cf302d3ce3352..adb559de23c1d83e65b8f79b6081526f496cb880 100644 (file)
@@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
                                        ext4_lblk_t *block)
 {
        struct buffer_head *bh;
-       int err = 0;
+       int err;
 
        if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
                     ((inode->i_size >> 10) >=
@@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle,
 
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-       bh = ext4_bread(handle, inode, *block, 1, &err);
-       if (!bh)
-               return ERR_PTR(err);
+       bh = ext4_bread(handle, inode, *block, 1);
+       if (IS_ERR(bh))
+               return bh;
        inode->i_size += inode->i_sb->s_blocksize;
        EXT4_I(inode)->i_disksize = inode->i_size;
        BUFFER_TRACE(bh, "get_write_access");
@@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 {
        struct buffer_head *bh;
        struct ext4_dir_entry *dirent;
-       int err = 0, is_dx_block = 0;
+       int is_dx_block = 0;
 
-       bh = ext4_bread(NULL, inode, block, 0, &err);
-       if (!bh) {
-               if (err == 0) {
-                       ext4_error_inode(inode, __func__, line, block,
-                                              "Directory hole found");
-                       return ERR_PTR(-EIO);
-               }
+       bh = ext4_bread(NULL, inode, block, 0);
+       if (IS_ERR(bh)) {
                __ext4_warning(inode->i_sb, __func__, line,
-                              "error reading directory block "
-                              "(ino %lu, block %lu)", inode->i_ino,
+                              "error %ld reading directory block "
+                              "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
                               (unsigned long) block);
-               return ERR_PTR(err);
+
+               return bh;
+       }
+       if (!bh) {
+               ext4_error_inode(inode, __func__, line, block, "Directory hole found");
+               return ERR_PTR(-EIO);
        }
        dirent = (struct ext4_dir_entry *) bh->b_data;
        /* Determine whether or not we have an index block */
@@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
                       "directory leaf block found instead of index block");
                return ERR_PTR(-EIO);
        }
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
+       if (!ext4_has_metadata_csum(inode->i_sb) ||
            buffer_verified(bh))
                return bh;
 
@@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir);
 static struct dx_frame *dx_probe(const struct qstr *d_name,
                                 struct inode *dir,
                                 struct dx_hash_info *hinfo,
-                                struct dx_frame *frame,
-                                int *err);
+                                struct dx_frame *frame);
 static void dx_release(struct dx_frame *frames);
 static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
@@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
                                 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
                const struct qstr *d_name,
-               struct ext4_dir_entry_2 **res_dir,
-               int *err);
+               struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
 
@@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
 {
        struct ext4_dir_entry_tail *t;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return 1;
 
        t = get_dirent_tail(inode, dirent);
@@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
 {
        struct ext4_dir_entry_tail *t;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return;
 
        t = get_dirent_tail(inode, dirent);
@@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
        struct dx_tail *t;
        int count_offset, limit, count;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return 1;
 
        c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
        struct dx_tail *t;
        int count_offset, limit, count;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return;
 
        c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
                EXT4_DIR_REC_LEN(2) - infosize;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(dir->i_sb))
                entry_space -= sizeof(struct dx_tail);
        return entry_space / sizeof(struct dx_entry);
 }
@@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(dir->i_sb))
                entry_space -= sizeof(struct dx_tail);
        return entry_space / sizeof(struct dx_entry);
 }
@@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
                struct stats stats;
                printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-               if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
+               bh = ext4_bread(NULL,dir, block, 0);
+               if (!bh || IS_ERR(bh))
+                       continue;
                stats = levels?
                   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
                   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
@@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
  */
 static struct dx_frame *
 dx_probe(const struct qstr *d_name, struct inode *dir,
-        struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+        struct dx_hash_info *hinfo, struct dx_frame *frame_in)
 {
        unsigned count, indirect;
        struct dx_entry *at, *entries, *p, *q, *m;
        struct dx_root *root;
-       struct buffer_head *bh;
        struct dx_frame *frame = frame_in;
+       struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
        u32 hash;
 
-       frame->bh = NULL;
-       bh = ext4_read_dirblock(dir, 0, INDEX);
-       if (IS_ERR(bh)) {
-               *err = PTR_ERR(bh);
-               goto fail;
-       }
-       root = (struct dx_root *) bh->b_data;
+       frame->bh = ext4_read_dirblock(dir, 0, INDEX);
+       if (IS_ERR(frame->bh))
+               return (struct dx_frame *) frame->bh;
+
+       root = (struct dx_root *) frame->bh->b_data;
        if (root->info.hash_version != DX_HASH_TEA &&
            root->info.hash_version != DX_HASH_HALF_MD4 &&
            root->info.hash_version != DX_HASH_LEGACY) {
                ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
                             root->info.hash_version);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
                goto fail;
        }
        hinfo->hash_version = root->info.hash_version;
@@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
        if (root->info.unused_flags & 1) {
                ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
                             root->info.unused_flags);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
                goto fail;
        }
 
        if ((indirect = root->info.indirect_levels) > 1) {
                ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
                             root->info.indirect_levels);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
                goto fail;
        }
 
@@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
        if (dx_get_limit(entries) != dx_root_limit(dir,
                                                   root->info.info_length)) {
                ext4_warning(dir->i_sb, "dx entry: limit != root limit");
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
                goto fail;
        }
 
        dxtrace(printk("Look up %x", hash));
-       while (1)
-       {
+       while (1) {
                count = dx_get_count(entries);
                if (!count || count > dx_get_limit(entries)) {
                        ext4_warning(dir->i_sb,
                                     "dx entry: no count or count > limit");
-                       brelse(bh);
-                       *err = ERR_BAD_DX_DIR;
-                       goto fail2;
+                       goto fail;
                }
 
                p = entries + 1;
                q = entries + count - 1;
-               while (p <= q)
-               {
+               while (p <= q) {
                        m = p + (q - p)/2;
                        dxtrace(printk("."));
                        if (dx_get_hash(m) > hash)
@@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                                p = m + 1;
                }
 
-               if (0) // linear search cross check
-               {
+               if (0) { // linear search cross check
                        unsigned n = count - 1;
                        at = entries;
                        while (n--)
@@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
 
                at = p - 1;
                dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-               frame->bh = bh;
                frame->entries = entries;
                frame->at = at;
-               if (!indirect--) return frame;
-               bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
-               if (IS_ERR(bh)) {
-                       *err = PTR_ERR(bh);
-                       goto fail2;
+               if (!indirect--)
+                       return frame;
+               frame++;
+               frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
+               if (IS_ERR(frame->bh)) {
+                       ret_err = (struct dx_frame *) frame->bh;
+                       frame->bh = NULL;
+                       goto fail;
                }
-               entries = ((struct dx_node *) bh->b_data)->entries;
+               entries = ((struct dx_node *) frame->bh->b_data)->entries;
 
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
                        ext4_warning(dir->i_sb,
                                     "dx entry: limit != node limit");
-                       brelse(bh);
-                       *err = ERR_BAD_DX_DIR;
-                       goto fail2;
+                       goto fail;
                }
-               frame++;
-               frame->bh = NULL;
        }
-fail2:
+fail:
        while (frame >= frame_in) {
                brelse(frame->bh);
                frame--;
        }
-fail:
-       if (*err == ERR_BAD_DX_DIR)
+       if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
                ext4_warning(dir->i_sb,
                             "Corrupt dir inode %lu, running e2fsck is "
                             "recommended.", dir->i_ino);
-       return NULL;
+       return ret_err;
 }
 
 static void dx_release (struct dx_frame *frames)
@@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        }
        hinfo.hash = start_hash;
        hinfo.minor_hash = 0;
-       frame = dx_probe(NULL, dir, &hinfo, frames, &err);
-       if (!frame)
-               return err;
+       frame = dx_probe(NULL, dir, &hinfo, frames);
+       if (IS_ERR(frame))
+               return PTR_ERR(frame);
 
        /* Add '.' and '..' from the htree header */
        if (!start_hash && !start_minor_hash) {
@@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                                   buffer */
        int num = 0;
        ext4_lblk_t  nblocks;
-       int i, err = 0;
-       int namelen;
+       int i, namelen;
 
        *res_dir = NULL;
        sb = dir->i_sb;
@@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                goto restart;
        }
        if (is_dx(dir)) {
-               bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
+               bh = ext4_dx_find_entry(dir, d_name, res_dir);
                /*
                 * On success, or if the error was file not found,
                 * return.  Otherwise, fall back to doing a search the
                 * old fashioned way.
                 */
-               if (err == -ENOENT)
-                       return NULL;
-               if (err && err != ERR_BAD_DX_DIR)
-                       return ERR_PTR(err);
-               if (bh)
+               if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
                        return bh;
                dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
                               "falling back\n"));
@@ -1298,10 +1268,10 @@ restart:
                                        break;
                                }
                                num++;
-                               bh = ext4_getblk(NULL, dir, b++, 0, &err);
-                               if (unlikely(err)) {
+                               bh = ext4_getblk(NULL, dir, b++, 0);
+                               if (unlikely(IS_ERR(bh))) {
                                        if (ra_max == 0)
-                                               return ERR_PTR(err);
+                                               return bh;
                                        break;
                                }
                                bh_use[ra_max] = bh;
@@ -1366,7 +1336,7 @@ cleanup_and_exit:
 }
 
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
-                      struct ext4_dir_entry_2 **res_dir, int *err)
+                      struct ext4_dir_entry_2 **res_dir)
 {
        struct super_block * sb = dir->i_sb;
        struct dx_hash_info     hinfo;
@@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
        ext4_lblk_t block;
        int retval;
 
-       if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
-               return NULL;
+       frame = dx_probe(d_name, dir, &hinfo, frames);
+       if (IS_ERR(frame))
+               return (struct buffer_head *) frame;
        do {
                block = dx_get_block(frame->at);
                bh = ext4_read_dirblock(dir, block, DIRENT);
-               if (IS_ERR(bh)) {
-                       *err = PTR_ERR(bh);
+               if (IS_ERR(bh))
                        goto errout;
-               }
+
                retval = search_dirblock(bh, dir, d_name,
                                         block << EXT4_BLOCK_SIZE_BITS(sb),
                                         res_dir);
-               if (retval == 1) {      /* Success! */
-                       dx_release(frames);
-                       return bh;
-               }
+               if (retval == 1)
+                       goto success;
                brelse(bh);
                if (retval == -1) {
-                       *err = ERR_BAD_DX_DIR;
+                       bh = ERR_PTR(ERR_BAD_DX_DIR);
                        goto errout;
                }
 
@@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                                               frames, NULL);
                if (retval < 0) {
                        ext4_warning(sb,
-                            "error reading index page in directory #%lu",
-                            dir->i_ino);
-                       *err = retval;
+                            "error %d reading index page in directory #%lu",
+                            retval, dir->i_ino);
+                       bh = ERR_PTR(retval);
                        goto errout;
                }
        } while (retval == 1);
 
-       *err = -ENOENT;
+       bh = NULL;
 errout:
        dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
-       dx_release (frames);
-       return NULL;
+success:
+       dx_release(frames);
+       return bh;
 }
 
 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
                                         dentry);
                        return ERR_PTR(-EIO);
                }
-               inode = ext4_iget(dir->i_sb, ino);
+               inode = ext4_iget_normal(dir->i_sb, ino);
                if (inode == ERR_PTR(-ESTALE)) {
                        EXT4_ERROR_INODE(dir,
                                         "deleted inode referenced: %u",
@@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
                return ERR_PTR(-EIO);
        }
 
-       return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
 }
 
 /*
@@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
  */
 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                        struct buffer_head **bh,struct dx_frame *frame,
-                       struct dx_hash_info *hinfo, int *error)
+                       struct dx_hash_info *hinfo)
 {
        unsigned blocksize = dir->i_sb->s_blocksize;
        unsigned count, continued;
@@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        int     csum_size = 0;
        int     err = 0, i;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(dir->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        bh2 = ext4_append(handle, dir, &newblock);
        if (IS_ERR(bh2)) {
                brelse(*bh);
                *bh = NULL;
-               *error = PTR_ERR(bh2);
-               return NULL;
+               return (struct ext4_dir_entry_2 *) bh2;
        }
 
        BUFFER_TRACE(*bh, "get_write_access");
@@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
        /* Which block gets the new entry? */
-       if (hinfo->hash >= hash2)
-       {
+       if (hinfo->hash >= hash2) {
                swap(*bh, bh2);
                de = de2;
        }
@@ -1638,8 +1604,7 @@ journal_error:
        brelse(bh2);
        *bh = NULL;
        ext4_std_error(dir->i_sb, err);
-       *error = err;
-       return NULL;
+       return ERR_PTR(err);
 }
 
 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
@@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        int             csum_size = 0;
        int             err;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        if (!de) {
@@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        struct fake_dirent *fde;
        int             csum_size = 0;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        blocksize =  dir->i_sb->s_blocksize;
@@ -1862,8 +1825,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        ext4_handle_dirty_dx_node(handle, dir, frame->bh);
        ext4_handle_dirty_dirent_node(handle, dir, bh);
 
-       de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-       if (!de) {
+       de = do_split(handle,dir, &bh, frame, &hinfo);
+       if (IS_ERR(de)) {
                /*
                 * Even if the block split failed, we have to properly write
                 * out all the changes we did so far. Otherwise we can end up
@@ -1871,7 +1834,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                 */
                ext4_mark_inode_dirty(handle, dir);
                dx_release(frames);
-               return retval;
+               return PTR_ERR(de);
        }
        dx_release(frames);
 
@@ -1904,8 +1867,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        ext4_lblk_t block, blocks;
        int     csum_size = 0;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(inode->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        sb = dir->i_sb;
@@ -1982,9 +1944,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
        struct ext4_dir_entry_2 *de;
        int err;
 
-       frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
-       if (!frame)
-               return err;
+       frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
+       if (IS_ERR(frame))
+               return PTR_ERR(frame);
        entries = frame->entries;
        at = frame->at;
        bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
@@ -2095,9 +2057,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        goto cleanup;
                }
        }
-       de = do_split(handle, dir, &bh, frame, &hinfo, &err);
-       if (!de)
+       de = do_split(handle, dir, &bh, frame, &hinfo);
+       if (IS_ERR(de)) {
+               err = PTR_ERR(de);
                goto cleanup;
+       }
        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
        goto cleanup;
 
@@ -2167,8 +2131,7 @@ static int ext4_delete_entry(handle_t *handle,
                        return err;
        }
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(dir->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        BUFFER_TRACE(bh, "get_write_access");
@@ -2387,8 +2350,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
        int csum_size = 0;
        int err;
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(dir->i_sb))
                csum_size = sizeof(struct ext4_dir_entry_tail);
 
        if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2403,10 +2365,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
        dir_block = ext4_append(handle, inode, &block);
        if (IS_ERR(dir_block))
                return PTR_ERR(dir_block);
-       BUFFER_TRACE(dir_block, "get_write_access");
-       err = ext4_journal_get_write_access(handle, dir_block);
-       if (err)
-               goto out;
        de = (struct ext4_dir_entry_2 *)dir_block->b_data;
        ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
        set_nlink(inode, 2);
@@ -2573,7 +2531,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        int err = 0, rc;
        bool dirty = false;
 
-       if (!sbi->s_journal)
+       if (!sbi->s_journal || is_bad_inode(inode))
                return 0;
 
        WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
index 1e43b905ff9854d7a6f45eab3da091b5867a0188..f298c60f907d9ec559186fe4e9f2026730346fde 100644 (file)
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
 {
        struct buffer_head *bh;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return 0;
 
        bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
index 05c159218bc267431ee4e7a865a0e20a5867c949..1eda6ab0ef9d3a3be99b89242127850c73630ff3 100644 (file)
@@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 static void ext4_clear_journal_err(struct super_block *sb,
                                   struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
 static int ext4_remount(struct super_block *sb, int *flags, char *data);
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
@@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
 static int ext4_superblock_csum_verify(struct super_block *sb,
                                       struct ext4_super_block *es)
 {
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return 1;
 
        return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
 {
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(sb))
                return;
 
        es->s_checksum = ext4_superblock_csum(sb, es);
@@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb)
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
-       percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
-       for (i = 0; i < MAXQUOTAS; i++)
+       for (i = 0; i < EXT4_MAXQUOTAS; i++)
                kfree(sbi->s_qf_names[i]);
 #endif
 
@@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ext4_es_init_tree(&ei->i_es_tree);
        rwlock_init(&ei->i_es_lock);
        INIT_LIST_HEAD(&ei->i_es_lru);
+       ei->i_es_all_nr = 0;
        ei->i_es_lru_nr = 0;
        ei->i_touch_when = 0;
        ei->i_reserved_data_blocks = 0;
@@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
         * Currently we don't know the generation for parent directory, so
         * a generation of 0 means "accept any"
         */
-       inode = ext4_iget(sb, ino);
+       inode = ext4_iget_normal(sb, ino);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
        if (generation && inode->i_generation != generation) {
@@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = {
        .bdev_try_to_free_page = bdev_try_to_free_page,
 };
 
-static const struct super_operations ext4_nojournal_sops = {
-       .alloc_inode    = ext4_alloc_inode,
-       .destroy_inode  = ext4_destroy_inode,
-       .write_inode    = ext4_write_inode,
-       .dirty_inode    = ext4_dirty_inode,
-       .drop_inode     = ext4_drop_inode,
-       .evict_inode    = ext4_evict_inode,
-       .sync_fs        = ext4_sync_fs_nojournal,
-       .put_super      = ext4_put_super,
-       .statfs         = ext4_statfs,
-       .remount_fs     = ext4_remount,
-       .show_options   = ext4_show_options,
-#ifdef CONFIG_QUOTA
-       .quota_read     = ext4_quota_read,
-       .quota_write    = ext4_quota_write,
-#endif
-       .bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
 static const struct export_operations ext4_export_ops = {
        .fh_to_dentry = ext4_fh_to_dentry,
        .fh_to_parent = ext4_fh_to_parent,
@@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb,
                                        "not specified");
                        return 0;
                }
-       } else {
-               if (sbi->s_jquota_fmt) {
-                       ext4_msg(sb, KERN_ERR, "journaled quota format "
-                                       "specified with no journaling "
-                                       "enabled");
-                       return 0;
-               }
        }
 #endif
        if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
        __u16 crc = 0;
        __le32 le_group = cpu_to_le32(block_group);
 
-       if ((sbi->s_es->s_feature_ro_compat &
-            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+       if (ext4_has_metadata_csum(sbi->s_sb)) {
                /* Use new metadata_csum algorithm */
                __le16 save_csum;
                __u32 csum32;
@@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
        }
 
        /* old crc16 code */
+       if (!(sbi->s_es->s_feature_ro_compat &
+             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+               return 0;
+
        offset = offsetof(struct ext4_group_desc, bg_checksum);
 
        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
                /* don't clear list on RO mount w/ errors */
                if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
-                       jbd_debug(1, "Errors on filesystem, "
+                       ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
                                  "clearing orphan list.\n");
                        es->s_last_orphan = 0;
                }
@@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
        /* Needed for iput() to work correctly and not trash data */
        sb->s_flags |= MS_ACTIVE;
        /* Turn on quotas so that they are updated correctly */
-       for (i = 0; i < MAXQUOTAS; i++) {
+       for (i = 0; i < EXT4_MAXQUOTAS; i++) {
                if (EXT4_SB(sb)->s_qf_names[i]) {
                        int ret = ext4_quota_on_mount(sb, i);
                        if (ret < 0)
@@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
        /* Turn quotas off */
-       for (i = 0; i < MAXQUOTAS; i++) {
+       for (i = 0; i < EXT4_MAXQUOTAS; i++) {
                if (sb_dqopt(sb)->files[i])
                        dquot_quota_off(sb, i);
        }
@@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
        return count;
 }
 
+static ssize_t es_ui_show(struct ext4_attr *a,
+                          struct ext4_sb_info *sbi, char *buf)
+{
+
+       unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
+                          a->u.offset);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
 static ssize_t reserved_clusters_show(struct ext4_attr *a,
                                  struct ext4_sb_info *sbi, char *buf)
 {
@@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = {                   \
                .offset = offsetof(struct ext4_sb_info, _elname),\
        },                                                      \
 }
+
+#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)          \
+static struct ext4_attr ext4_attr_##_name = {                          \
+       .attr = {.name = __stringify(_name), .mode = _mode },           \
+       .show   = _show,                                                \
+       .store  = _store,                                               \
+       .u = {                                                          \
+               .offset = offsetof(struct ext4_super_block, _elname),   \
+       },                                                              \
+}
+
 #define EXT4_ATTR(name, mode, show, store) \
 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 
 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
+
+#define EXT4_RO_ATTR_ES_UI(name, elname)       \
+       EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
 #define EXT4_RW_ATTR_SBI_UI(name, elname)      \
        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
+
 #define ATTR_LIST(name) &ext4_attr_##name.attr
 #define EXT4_DEPRECATED_ATTR(_name, _val)      \
 static struct ext4_attr ext4_attr_##_name = {                  \
@@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
+EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
+EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
+EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
 
 static struct attribute *ext4_attrs[] = {
        ATTR_LIST(delayed_allocation_blocks),
@@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = {
        ATTR_LIST(warning_ratelimit_burst),
        ATTR_LIST(msg_ratelimit_interval_ms),
        ATTR_LIST(msg_ratelimit_burst),
+       ATTR_LIST(errors_count),
+       ATTR_LIST(first_error_time),
+       ATTR_LIST(last_error_time),
        NULL,
 };
 
@@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj)
        complete(&ext4_feat->f_kobj_unregister);
 }
 
+static ssize_t ext4_feat_show(struct kobject *kobj,
+                             struct attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "supported\n");
+}
+
+/*
+ * We can not use ext4_attr_show/store because it relies on the kobject
+ * being embedded in the ext4_sb_info structure which is definitely not
+ * true in this case.
+ */
+static const struct sysfs_ops ext4_feat_ops = {
+       .show   = ext4_feat_show,
+       .store  = NULL,
+};
+
 static struct kobj_type ext4_feat_ktype = {
        .default_attrs  = ext4_feat_attrs,
-       .sysfs_ops      = &ext4_attr_ops,
+       .sysfs_ops      = &ext4_feat_ops,
        .release        = ext4_feat_release,
 };
 
@@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
        int compat, incompat;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+       if (ext4_has_metadata_csum(sb)) {
                /* journal checksum v3 */
                compat = 0;
                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb)
                incompat = 0;
        }
 
+       jbd2_journal_clear_features(sbi->s_journal,
+                       JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+                       JBD2_FEATURE_INCOMPAT_CSUM_V3 |
+                       JBD2_FEATURE_INCOMPAT_CSUM_V2);
        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
                ret = jbd2_journal_set_features(sbi->s_journal,
                                compat, 0,
@@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb)
                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
        } else {
-               jbd2_journal_clear_features(sbi->s_journal,
-                               JBD2_FEATURE_COMPAT_CHECKSUM, 0,
-                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
-                               JBD2_FEATURE_INCOMPAT_CSUM_V3 |
-                               JBD2_FEATURE_INCOMPAT_CSUM_V2);
+               jbd2_journal_clear_features(sbi->s_journal, 0, 0,
+                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
        }
 
        return ret;
@@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                logical_sb_block = sb_block;
        }
 
-       if (!(bh = sb_bread(sb, logical_sb_block))) {
+       if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
                ext4_msg(sb, KERN_ERR, "unable to read superblock");
                goto out_fail;
        }
@@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
 
        /* Precompute checksum seed for all metadata */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (ext4_has_metadata_csum(sb))
                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
                                               sizeof(es->s_uuid));
 
@@ -3519,8 +3539,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                set_opt(sb, ERRORS_CONT);
        else
                set_opt(sb, ERRORS_RO);
-       if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
-               set_opt(sb, BLOCK_VALIDITY);
+       /* block_validity enabled by default; disable with noblock_validity */
+       set_opt(sb, BLOCK_VALIDITY);
        if (def_mount_opts & EXT4_DEFM_DISCARD)
                set_opt(sb, DISCARD);
 
@@ -3646,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                brelse(bh);
                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
                offset = do_div(logical_sb_block, blocksize);
-               bh = sb_bread(sb, logical_sb_block);
+               bh = sb_bread_unmovable(sb, logical_sb_block);
                if (!bh) {
                        ext4_msg(sb, KERN_ERR,
                               "Can't read superblock on 2nd try");
@@ -3868,7 +3888,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logical_sb_block, i);
-               sbi->s_group_desc[i] = sb_bread(sb, block);
+               sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
                if (!sbi->s_group_desc[i]) {
                        ext4_msg(sb, KERN_ERR,
                               "can't read group descriptor %d", i);
@@ -3890,13 +3910,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_err_report.data = (unsigned long) sb;
 
        /* Register extent status tree shrinker */
-       ext4_es_register_shrinker(sbi);
-
-       err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "insufficient memory");
+       if (ext4_es_register_shrinker(sbi))
                goto failed_mount3;
-       }
 
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_extent_max_zeroout_kb = 32;
@@ -3904,11 +3919,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        /*
         * set up enough so that it can read an inode
         */
-       if (!test_opt(sb, NOLOAD) &&
-           EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-               sb->s_op = &ext4_sops;
-       else
-               sb->s_op = &ext4_nojournal_sops;
+       sb->s_op = &ext4_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -4229,10 +4240,9 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
-failed_mount3:
        ext4_es_unregister_shrinker(sbi);
+failed_mount3:
        del_timer_sync(&sbi->s_err_report);
-       percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -4247,7 +4257,7 @@ failed_mount:
                remove_proc_entry(sb->s_id, ext4_proc_root);
        }
 #ifdef CONFIG_QUOTA
-       for (i = 0; i < MAXQUOTAS; i++)
+       for (i = 0; i < EXT4_MAXQUOTAS; i++)
                kfree(sbi->s_qf_names[i]);
 #endif
        ext4_blkdev_remove(sbi);
@@ -4375,6 +4385,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
                goto out_bdev;
        }
 
+       if ((le32_to_cpu(es->s_feature_ro_compat) &
+            EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+           es->s_checksum != ext4_superblock_csum(sb, es)) {
+               ext4_msg(sb, KERN_ERR, "external journal has "
+                                      "corrupt superblock");
+               brelse(bh);
+               goto out_bdev;
+       }
+
        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
                brelse(bh);
@@ -4677,15 +4696,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
         * being sent at the end of the function. But we can skip it if
         * transaction_commit will do it for us.
         */
-       target = jbd2_get_latest_transaction(sbi->s_journal);
-       if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
-           !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+       if (sbi->s_journal) {
+               target = jbd2_get_latest_transaction(sbi->s_journal);
+               if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
+                   !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+                       needs_barrier = true;
+
+               if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
+                       if (wait)
+                               ret = jbd2_log_wait_commit(sbi->s_journal,
+                                                          target);
+               }
+       } else if (wait && test_opt(sb, BARRIER))
                needs_barrier = true;
-
-       if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
-               if (wait)
-                       ret = jbd2_log_wait_commit(sbi->s_journal, target);
-       }
        if (needs_barrier) {
                int err;
                err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
@@ -4696,19 +4719,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
        return ret;
 }
 
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
-{
-       int ret = 0;
-
-       trace_ext4_sync_fs(sb, wait);
-       flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
-       dquot_writeback_dquots(sb, -1);
-       if (wait && test_opt(sb, BARRIER))
-               ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
-
-       return ret;
-}
-
 /*
  * LVM calls this function before a (read-only) snapshot is created.  This
  * gives us a chance to flush the journal completely and mark the fs clean.
@@ -4727,23 +4737,26 @@ static int ext4_freeze(struct super_block *sb)
 
        journal = EXT4_SB(sb)->s_journal;
 
-       /* Now we set up the journal barrier. */
-       jbd2_journal_lock_updates(journal);
+       if (journal) {
+               /* Now we set up the journal barrier. */
+               jbd2_journal_lock_updates(journal);
 
-       /*
-        * Don't clear the needs_recovery flag if we failed to flush
-        * the journal.
-        */
-       error = jbd2_journal_flush(journal);
-       if (error < 0)
-               goto out;
+               /*
+                * Don't clear the needs_recovery flag if we failed to
+                * flush the journal.
+                */
+               error = jbd2_journal_flush(journal);
+               if (error < 0)
+                       goto out;
+       }
 
        /* Journal blocked and flushed, clear needs_recovery flag. */
        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        error = ext4_commit_super(sb, 1);
 out:
-       /* we rely on upper layer to stop further updates */
-       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+       if (journal)
+               /* we rely on upper layer to stop further updates */
+               jbd2_journal_unlock_updates(journal);
        return error;
 }
 
@@ -4774,7 +4787,7 @@ struct ext4_mount_options {
        u32 s_min_batch_time, s_max_batch_time;
 #ifdef CONFIG_QUOTA
        int s_jquota_fmt;
-       char *s_qf_names[MAXQUOTAS];
+       char *s_qf_names[EXT4_MAXQUOTAS];
 #endif
 };
 
@@ -4804,7 +4817,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        old_opts.s_max_batch_time = sbi->s_max_batch_time;
 #ifdef CONFIG_QUOTA
        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
-       for (i = 0; i < MAXQUOTAS; i++)
+       for (i = 0; i < EXT4_MAXQUOTAS; i++)
                if (sbi->s_qf_names[i]) {
                        old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
                                                         GFP_KERNEL);
@@ -4965,7 +4978,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
-       for (i = 0; i < MAXQUOTAS; i++)
+       for (i = 0; i < EXT4_MAXQUOTAS; i++)
                kfree(old_opts.s_qf_names[i]);
        if (enable_quota) {
                if (sb_any_quota_suspended(sb))
@@ -4994,7 +5007,7 @@ restore_opts:
        sbi->s_max_batch_time = old_opts.s_max_batch_time;
 #ifdef CONFIG_QUOTA
        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
-       for (i = 0; i < MAXQUOTAS; i++) {
+       for (i = 0; i < EXT4_MAXQUOTAS; i++) {
                kfree(sbi->s_qf_names[i]);
                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
        }
@@ -5197,7 +5210,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 {
        int err;
        struct inode *qf_inode;
-       unsigned long qf_inums[MAXQUOTAS] = {
+       unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
        };
@@ -5225,13 +5238,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 static int ext4_enable_quotas(struct super_block *sb)
 {
        int type, err = 0;
-       unsigned long qf_inums[MAXQUOTAS] = {
+       unsigned long qf_inums[EXT4_MAXQUOTAS] = {
                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
        };
 
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
-       for (type = 0; type < MAXQUOTAS; type++) {
+       for (type = 0; type < EXT4_MAXQUOTAS; type++) {
                if (qf_inums[type]) {
                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
                                                DQUOT_USAGE_ENABLED);
@@ -5309,7 +5322,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 {
        struct inode *inode = sb_dqopt(sb)->files[type];
        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
-       int err = 0;
        int offset = off & (sb->s_blocksize - 1);
        int tocopy;
        size_t toread;
@@ -5324,9 +5336,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
        while (toread > 0) {
                tocopy = sb->s_blocksize - offset < toread ?
                                sb->s_blocksize - offset : toread;
-               bh = ext4_bread(NULL, inode, blk, 0, &err);
-               if (err)
-                       return err;
+               bh = ext4_bread(NULL, inode, blk, 0);
+               if (IS_ERR(bh))
+                       return PTR_ERR(bh);
                if (!bh)        /* A hole? */
                        memset(data, 0, tocopy);
                else
@@ -5347,8 +5359,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 {
        struct inode *inode = sb_dqopt(sb)->files[type];
        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
-       int err = 0;
-       int offset = off & (sb->s_blocksize - 1);
+       int err, offset = off & (sb->s_blocksize - 1);
        struct buffer_head *bh;
        handle_t *handle = journal_current_handle();
 
@@ -5369,14 +5380,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
                return -EIO;
        }
 
-       bh = ext4_bread(handle, inode, blk, 1, &err);
+       bh = ext4_bread(handle, inode, blk, 1);
+       if (IS_ERR(bh))
+               return PTR_ERR(bh);
        if (!bh)
                goto out;
        BUFFER_TRACE(bh, "get write access");
        err = ext4_journal_get_write_access(handle, bh);
        if (err) {
                brelse(bh);
-               goto out;
+               return err;
        }
        lock_buffer(bh);
        memcpy(bh->b_data+offset, data, len);
@@ -5385,8 +5398,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
        err = ext4_handle_dirty_metadata(handle, NULL, bh);
        brelse(bh);
 out:
-       if (err)
-               return err;
        if (inode->i_size < off + len) {
                i_size_write(inode, off + len);
                EXT4_I(inode)->i_disksize = inode->i_size;
index e7387337060c96f06fe9360966992bb286c20496..1e09fc77395ce0c7cc20496161f09cf0d6eefdbd 100644 (file)
@@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
                                        sector_t block_nr,
                                        struct ext4_xattr_header *hdr)
 {
-       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+       if (ext4_has_metadata_csum(inode->i_sb) &&
            (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
                return 0;
        return 1;
@@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode,
                                      sector_t block_nr,
                                      struct ext4_xattr_header *hdr)
 {
-       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+       if (!ext4_has_metadata_csum(inode->i_sb))
                return;
 
        hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 
 static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+                      void *value_start)
 {
-       while (!IS_LAST_ENTRY(entry)) {
-               struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+       struct ext4_xattr_entry *e = entry;
+
+       while (!IS_LAST_ENTRY(e)) {
+               struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
                if ((void *)next >= end)
                        return -EIO;
-               entry = next;
+               e = next;
        }
+
+       while (!IS_LAST_ENTRY(entry)) {
+               if (entry->e_value_size != 0 &&
+                   (value_start + le16_to_cpu(entry->e_value_offs) <
+                    (void *)e + sizeof(__u32) ||
+                    value_start + le16_to_cpu(entry->e_value_offs) +
+                   le32_to_cpu(entry->e_value_size) > end))
+                       return -EIO;
+               entry = EXT4_XATTR_NEXT(entry);
+       }
+
        return 0;
 }
 
@@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
                return -EIO;
        if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
                return -EIO;
-       error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+       error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+                                      bh->b_data);
        if (!error)
                set_buffer_verified(bh);
        return error;
@@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
        header = IHDR(inode, raw_inode);
        entry = IFIRST(header);
        end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-       error = ext4_xattr_check_names(entry, end);
+       error = ext4_xattr_check_names(entry, end, entry);
        if (error)
                goto cleanup;
        error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
        raw_inode = ext4_raw_inode(&iloc);
        header = IHDR(inode, raw_inode);
        end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-       error = ext4_xattr_check_names(IFIRST(header), end);
+       error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
        if (error)
                goto cleanup;
        error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -899,14 +912,8 @@ inserted:
                        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 
-                       /*
-                        * take i_data_sem because we will test
-                        * i_delalloc_reserved_flag in ext4_mb_new_blocks
-                        */
-                       down_read(&EXT4_I(inode)->i_data_sem);
                        block = ext4_new_meta_blocks(handle, inode, goal, 0,
                                                     NULL, &error);
-                       up_read((&EXT4_I(inode)->i_data_sem));
                        if (error)
                                goto cleanup;
 
@@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
        is->s.here = is->s.first;
        is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
        if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
-               error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+               error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+                                              IFIRST(header));
                if (error)
                        return error;
                /* Find the named attribute. */
index 06fe11e0abfa02bf4ff9c7b9d0d6360537c84f4e..aab8549591e7ad7143e41a9957338cb8f5583fd6 100644 (file)
@@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode)
                goto out_err;
        }
 
-       bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+       bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
        if (!bh) {
                printk(KERN_ERR
                       "%s: Cannot get buffer for journal superblock\n",
index 7f34f4716165311f66b62b81dd25a32e899dcf80..988b32ed4c8737db045b517714ee8bcc5698405f 100644 (file)
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
        if (jh->b_transaction == NULL && !buffer_locked(bh) &&
            !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
-               /*
-                * Get our reference so that bh cannot be freed before
-                * we unlock it
-                */
-               get_bh(bh);
                JBUFFER_TRACE(jh, "remove from checkpoint list");
                ret = __jbd2_journal_remove_checkpoint(jh) + 1;
-               BUFFER_TRACE(bh, "release");
-               __brelse(bh);
        }
        return ret;
 }
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 
        nblocks = jbd2_space_needed(journal);
        while (jbd2_log_space_left(journal) < nblocks) {
-               if (journal->j_flags & JBD2_ABORT)
-                       return;
                write_unlock(&journal->j_state_lock);
                mutex_lock(&journal->j_checkpoint_mutex);
 
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal)
                 * trace for forensic evidence.
                 */
                write_lock(&journal->j_state_lock);
+               if (journal->j_flags & JBD2_ABORT) {
+                       mutex_unlock(&journal->j_checkpoint_mutex);
+                       return;
+               }
                spin_lock(&journal->j_list_lock);
                nblocks = jbd2_space_needed(journal);
                space_left = jbd2_log_space_left(journal);
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
        }
 }
 
-/*
- * Clean up transaction's list of buffers submitted for io.
- * We wait for any pending IO to complete and remove any clean
- * buffers. Note that we take the buffers in the opposite ordering
- * from the one in which they were submitted for IO.
- *
- * Return 0 on success, and return <0 if some buffers have failed
- * to be written out.
- *
- * Called with j_list_lock held.
- */
-static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
-{
-       struct journal_head *jh;
-       struct buffer_head *bh;
-       tid_t this_tid;
-       int released = 0;
-       int ret = 0;
-
-       this_tid = transaction->t_tid;
-restart:
-       /* Did somebody clean up the transaction in the meanwhile? */
-       if (journal->j_checkpoint_transactions != transaction ||
-                       transaction->t_tid != this_tid)
-               return ret;
-       while (!released && transaction->t_checkpoint_io_list) {
-               jh = transaction->t_checkpoint_io_list;
-               bh = jh2bh(jh);
-               get_bh(bh);
-               if (buffer_locked(bh)) {
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       /* the journal_head may have gone by now */
-                       BUFFER_TRACE(bh, "brelse");
-                       __brelse(bh);
-                       spin_lock(&journal->j_list_lock);
-                       goto restart;
-               }
-               if (unlikely(buffer_write_io_error(bh)))
-                       ret = -EIO;
-
-               /*
-                * Now in whatever state the buffer currently is, we know that
-                * it has been written out and so we can drop it from the list
-                */
-               released = __jbd2_journal_remove_checkpoint(jh);
-               __brelse(bh);
-       }
-
-       return ret;
-}
-
 static void
 __flush_batch(journal_t *journal, int *batch_count)
 {
@@ -254,81 +197,6 @@ __flush_batch(journal_t *journal, int *batch_count)
        *batch_count = 0;
 }
 
-/*
- * Try to flush one buffer from the checkpoint list to disk.
- *
- * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.  Return <0 if the buffer has failed to
- * be written out.
- *
- * Called with j_list_lock held and drops it if 1 is returned
- */
-static int __process_buffer(journal_t *journal, struct journal_head *jh,
-                           int *batch_count, transaction_t *transaction)
-{
-       struct buffer_head *bh = jh2bh(jh);
-       int ret = 0;
-
-       if (buffer_locked(bh)) {
-               get_bh(bh);
-               spin_unlock(&journal->j_list_lock);
-               wait_on_buffer(bh);
-               /* the journal_head may have gone by now */
-               BUFFER_TRACE(bh, "brelse");
-               __brelse(bh);
-               ret = 1;
-       } else if (jh->b_transaction != NULL) {
-               transaction_t *t = jh->b_transaction;
-               tid_t tid = t->t_tid;
-
-               transaction->t_chp_stats.cs_forced_to_close++;
-               spin_unlock(&journal->j_list_lock);
-               if (unlikely(journal->j_flags & JBD2_UNMOUNT))
-                       /*
-                        * The journal thread is dead; so starting and
-                        * waiting for a commit to finish will cause
-                        * us to wait for a _very_ long time.
-                        */
-                       printk(KERN_ERR "JBD2: %s: "
-                              "Waiting for Godot: block %llu\n",
-                              journal->j_devname,
-                              (unsigned long long) bh->b_blocknr);
-               jbd2_log_start_commit(journal, tid);
-               jbd2_log_wait_commit(journal, tid);
-               ret = 1;
-       } else if (!buffer_dirty(bh)) {
-               ret = 1;
-               if (unlikely(buffer_write_io_error(bh)))
-                       ret = -EIO;
-               get_bh(bh);
-               BUFFER_TRACE(bh, "remove from checkpoint");
-               __jbd2_journal_remove_checkpoint(jh);
-               spin_unlock(&journal->j_list_lock);
-               __brelse(bh);
-       } else {
-               /*
-                * Important: we are about to write the buffer, and
-                * possibly block, while still holding the journal lock.
-                * We cannot afford to let the transaction logic start
-                * messing around with this buffer before we write it to
-                * disk, as that would break recoverability.
-                */
-               BUFFER_TRACE(bh, "queue");
-               get_bh(bh);
-               J_ASSERT_BH(bh, !buffer_jwrite(bh));
-               journal->j_chkpt_bhs[*batch_count] = bh;
-               __buffer_relink_io(jh);
-               transaction->t_chp_stats.cs_written++;
-               (*batch_count)++;
-               if (*batch_count == JBD2_NR_BATCH) {
-                       spin_unlock(&journal->j_list_lock);
-                       __flush_batch(journal, batch_count);
-                       ret = 1;
-               }
-       }
-       return ret;
-}
-
 /*
  * Perform an actual checkpoint. We take the first transaction on the
  * list of transactions to be checkpointed and send all its buffers
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
  */
 int jbd2_log_do_checkpoint(journal_t *journal)
 {
-       transaction_t *transaction;
-       tid_t this_tid;
-       int result;
+       struct journal_head     *jh;
+       struct buffer_head      *bh;
+       transaction_t           *transaction;
+       tid_t                   this_tid;
+       int                     result, batch_count = 0;
 
        jbd_debug(1, "Start checkpoint\n");
 
@@ -374,45 +244,117 @@ restart:
         * done (maybe it's a new transaction, but it fell at the same
         * address).
         */
-       if (journal->j_checkpoint_transactions == transaction &&
-                       transaction->t_tid == this_tid) {
-               int batch_count = 0;
-               struct journal_head *jh;
-               int retry = 0, err;
-
-               while (!retry && transaction->t_checkpoint_list) {
-                       jh = transaction->t_checkpoint_list;
-                       retry = __process_buffer(journal, jh, &batch_count,
-                                                transaction);
-                       if (retry < 0 && !result)
-                               result = retry;
-                       if (!retry && (need_resched() ||
-                               spin_needbreak(&journal->j_list_lock))) {
-                               spin_unlock(&journal->j_list_lock);
-                               retry = 1;
-                               break;
-                       }
+       if (journal->j_checkpoint_transactions != transaction ||
+           transaction->t_tid != this_tid)
+               goto out;
+
+       /* checkpoint all of the transaction's buffers */
+       while (transaction->t_checkpoint_list) {
+               jh = transaction->t_checkpoint_list;
+               bh = jh2bh(jh);
+
+               if (buffer_locked(bh)) {
+                       spin_unlock(&journal->j_list_lock);
+                       get_bh(bh);
+                       wait_on_buffer(bh);
+                       /* the journal_head may have gone by now */
+                       BUFFER_TRACE(bh, "brelse");
+                       __brelse(bh);
+                       goto retry;
                }
+               if (jh->b_transaction != NULL) {
+                       transaction_t *t = jh->b_transaction;
+                       tid_t tid = t->t_tid;
 
-               if (batch_count) {
-                       if (!retry) {
-                               spin_unlock(&journal->j_list_lock);
-                               retry = 1;
-                       }
-                       __flush_batch(journal, &batch_count);
+                       transaction->t_chp_stats.cs_forced_to_close++;
+                       spin_unlock(&journal->j_list_lock);
+                       if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+                               /*
+                                * The journal thread is dead; so
+                                * starting and waiting for a commit
+                                * to finish will cause us to wait for
+                                * a _very_ long time.
+                                */
+                               printk(KERN_ERR
+               "JBD2: %s: Waiting for Godot: block %llu\n",
+               journal->j_devname, (unsigned long long) bh->b_blocknr);
+
+                       jbd2_log_start_commit(journal, tid);
+                       jbd2_log_wait_commit(journal, tid);
+                       goto retry;
+               }
+               if (!buffer_dirty(bh)) {
+                       if (unlikely(buffer_write_io_error(bh)) && !result)
+                               result = -EIO;
+                       BUFFER_TRACE(bh, "remove from checkpoint");
+                       if (__jbd2_journal_remove_checkpoint(jh))
+                               /* The transaction was released; we're done */
+                               goto out;
+                       continue;
                }
+               /*
+                * Important: we are about to write the buffer, and
+                * possibly block, while still holding the journal
+                * lock.  We cannot afford to let the transaction
+                * logic start messing around with this buffer before
+                * we write it to disk, as that would break
+                * recoverability.
+                */
+               BUFFER_TRACE(bh, "queue");
+               get_bh(bh);
+               J_ASSERT_BH(bh, !buffer_jwrite(bh));
+               journal->j_chkpt_bhs[batch_count++] = bh;
+               __buffer_relink_io(jh);
+               transaction->t_chp_stats.cs_written++;
+               if ((batch_count == JBD2_NR_BATCH) ||
+                   need_resched() ||
+                   spin_needbreak(&journal->j_list_lock))
+                       goto unlock_and_flush;
+       }
 
-               if (retry) {
+       if (batch_count) {
+               unlock_and_flush:
+                       spin_unlock(&journal->j_list_lock);
+               retry:
+                       if (batch_count)
+                               __flush_batch(journal, &batch_count);
                        spin_lock(&journal->j_list_lock);
                        goto restart;
+       }
+
+       /*
+        * Now we issued all of the transaction's buffers, let's deal
+        * with the buffers that are out for I/O.
+        */
+restart2:
+       /* Did somebody clean up the transaction in the meanwhile? */
+       if (journal->j_checkpoint_transactions != transaction ||
+           transaction->t_tid != this_tid)
+               goto out;
+
+       while (transaction->t_checkpoint_io_list) {
+               jh = transaction->t_checkpoint_io_list;
+               bh = jh2bh(jh);
+               if (buffer_locked(bh)) {
+                       spin_unlock(&journal->j_list_lock);
+                       get_bh(bh);
+                       wait_on_buffer(bh);
+                       /* the journal_head may have gone by now */
+                       BUFFER_TRACE(bh, "brelse");
+                       __brelse(bh);
+                       spin_lock(&journal->j_list_lock);
+                       goto restart2;
                }
+               if (unlikely(buffer_write_io_error(bh)) && !result)
+                       result = -EIO;
+
                /*
-                * Now we have cleaned up the first transaction's checkpoint
-                * list. Let's clean up the second one
+                * Now in whatever state the buffer currently is, we
+                * know that it has been written out and so we can
+                * drop it from the list
                 */
-               err = __wait_cp_io(journal, transaction);
-               if (!result)
-                       result = err;
+               if (__jbd2_journal_remove_checkpoint(jh))
+                       break;
        }
 out:
        spin_unlock(&journal->j_list_lock);
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * Find all the written-back checkpoint buffers in the given list and
  * release them.
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns 1 if we freed the transaction, 0 otherwise.
  */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh)
 {
        struct journal_head *last_jh;
        struct journal_head *next_jh = jh;
-       int ret, freed = 0;
+       int ret;
+       int freed = 0;
 
-       *released = 0;
        if (!jh)
                return 0;
 
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
                jh = next_jh;
                next_jh = jh->b_cpnext;
                ret = __try_to_free_cp_buf(jh);
-               if (ret) {
-                       freed++;
-                       if (ret == 2) {
-                               *released = 1;
-                               return freed;
-                       }
-               }
+               if (!ret)
+                       return freed;
+               if (ret == 2)
+                       return 1;
+               freed = 1;
                /*
                 * This function only frees up some memory
                 * if possible so we dont have an obligation
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
  */
-
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
        transaction_t *transaction, *last_transaction, *next_transaction;
-       int ret = 0;
-       int released;
+       int ret;
 
        transaction = journal->j_checkpoint_transactions;
        if (!transaction)
-               goto out;
+               return;
 
        last_transaction = transaction->t_cpprev;
        next_transaction = transaction;
        do {
                transaction = next_transaction;
                next_transaction = transaction->t_cpnext;
-               ret += journal_clean_one_cp_list(transaction->
-                               t_checkpoint_list, &released);
+               ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
                /*
                 * This function only frees up some memory if possible so we
                 * dont have an obligation to finish processing. Bail out if
                 * preemption requested:
                 */
                if (need_resched())
-                       goto out;
-               if (released)
+                       return;
+               if (ret)
                        continue;
                /*
                 * It is essential that we are as careful as in the case of
                 * t_checkpoint_list with removing the buffer from the list as
                 * we can possibly see not yet submitted buffers on io_list
                 */
-               ret += journal_clean_one_cp_list(transaction->
-                               t_checkpoint_io_list, &released);
+               ret = journal_clean_one_cp_list(transaction->
+                               t_checkpoint_io_list);
                if (need_resched())
-                       goto out;
+                       return;
+               /*
+                * Stop scanning if we couldn't free the transaction. This
+                * avoids pointless scanning of transactions which still
+                * weren't checkpointed.
+                */
+               if (!ret)
+                       return;
        } while (transaction != last_transaction);
-out:
-       return ret;
 }
 
 /*
index 19d74d86d99cc630aec2c0ca8f7bf4ef619e3aa3..e4dc74713a4328eda4738823f51e0c6070937e0a 100644 (file)
@@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
                goto out_err;
        }
 
-       bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+       bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
        if (!bh) {
                printk(KERN_ERR
                       "%s: Cannot get buffer for journal superblock\n",
@@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal)
                goto out;
        }
 
-       if (jbd2_journal_has_csum_v2or3(journal) &&
-           JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
-               /* Can't have checksum v1 and v2 on at the same time! */
-               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
-                      "at the same time!\n");
-               goto out;
-       }
-
        if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
            JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
                /* Can't have checksum v2 and v3 at the same time! */
@@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal)
                goto out;
        }
 
+       if (jbd2_journal_has_csum_v2or3(journal) &&
+           JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
+               /* Can't have checksum v1 and v2 on at the same time! */
+               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+                      "at the same time!\n");
+               goto out;
+       }
+
        if (!jbd2_verify_csum_type(journal, sb)) {
                printk(KERN_ERR "JBD2: Unknown checksum type\n");
                goto out;
index 9b329b55ffe3726a611ccf8282190e420d173080..bcbef08a4d8fc8873994eb37d35881626f05af70 100644 (file)
@@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal,
                            !jbd2_descr_block_csum_verify(journal,
                                                          bh->b_data)) {
                                err = -EIO;
+                               brelse(bh);
                                goto failed;
                        }
 
index 324329ceea1e89c48b8170f8886e4b0c88066e91..73b45225a7ca1bbe749ea3b62e38a35add22936b 100644 (file)
@@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
                        unsigned size);
-struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
-                       unsigned size);
+struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
+                                 unsigned size, gfp_t gfp);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
-struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_gfp(struct block_device *,
+                               sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
@@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh)
 static inline struct buffer_head *
 sb_bread(struct super_block *sb, sector_t block)
 {
-       return __bread(sb->s_bdev, block, sb->s_blocksize);
+       return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+}
+
+static inline struct buffer_head *
+sb_bread_unmovable(struct super_block *sb, sector_t block)
+{
+       return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
 }
 
 static inline void
@@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block)
 static inline struct buffer_head *
 sb_getblk(struct super_block *sb, sector_t block)
 {
-       return __getblk(sb->s_bdev, block, sb->s_blocksize);
+       return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
 }
 
 static inline struct buffer_head *
@@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh)
                __lock_buffer(bh);
 }
 
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+                                                  sector_t block,
+                                                  unsigned size)
+{
+       return __getblk_gfp(bdev, block, size, 0);
+}
+
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+                                          sector_t block,
+                                          unsigned size)
+{
+       return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
+/**
+ *  __bread() - reads a specified block and returns the bh
+ *  @bdev: the block_device to read from
+ *  @block: number of block
+ *  @size: size (in bytes) to read
+ *
+ *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache is allocated from movable area so that it can be migrated.
+ *  It returns NULL if the block was unreadable.
+ */
+static inline struct buffer_head *
+__bread(struct block_device *bdev, sector_t block, unsigned size)
+{
+       return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
 extern int __set_page_dirty_buffers(struct page *page);
 
 #else /* CONFIG_BLOCK */
index 0dae71e9971c434cef33c13572dbd13018f3604e..704b9a599b268dd9c7d0b9d45b97c5b67245700b 100644 (file)
@@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
index 02d11ee7f19d1515ac47178575d653e52fc8e4e8..27eb1bfbe7049adbaac4d90b8f2f77f95c9741bc 100644 (file)
@@ -1176,6 +1176,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
index d4f70a7fe8761a73e7128964e83b29f7809bd4e9..ff4bd1b35246a33482b997d2093dbedbe6079357 100644 (file)
@@ -2369,7 +2369,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
                  show_extent_status(__entry->found ? __entry->status : 0))
 );
 
-TRACE_EVENT(ext4_es_shrink_enter,
+DECLARE_EVENT_CLASS(ext4__es_shrink_enter,
        TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
 
        TP_ARGS(sb, nr_to_scan, cache_cnt),
@@ -2391,26 +2391,38 @@ TRACE_EVENT(ext4_es_shrink_enter,
                  __entry->nr_to_scan, __entry->cache_cnt)
 );
 
-TRACE_EVENT(ext4_es_shrink_exit,
-       TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt),
+DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count,
+       TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
 
-       TP_ARGS(sb, shrunk_nr, cache_cnt),
+       TP_ARGS(sb, nr_to_scan, cache_cnt)
+);
+
+DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter,
+       TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
+
+       TP_ARGS(sb, nr_to_scan, cache_cnt)
+);
+
+TRACE_EVENT(ext4_es_shrink_scan_exit,
+       TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt),
+
+       TP_ARGS(sb, nr_shrunk, cache_cnt),
 
        TP_STRUCT__entry(
                __field(        dev_t,  dev                     )
-               __field(        int,    shrunk_nr               )
+               __field(        int,    nr_shrunk               )
                __field(        int,    cache_cnt               )
        ),
 
        TP_fast_assign(
                __entry->dev            = sb->s_dev;
-               __entry->shrunk_nr      = shrunk_nr;
+               __entry->nr_shrunk      = nr_shrunk;
                __entry->cache_cnt      = cache_cnt;
        ),
 
-       TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d",
+       TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->shrunk_nr, __entry->cache_cnt)
+                 __entry->nr_shrunk, __entry->cache_cnt)
 );
 
 TRACE_EVENT(ext4_collapse_range,
@@ -2438,6 +2450,37 @@ TRACE_EVENT(ext4_collapse_range,
                  __entry->offset, __entry->len)
 );
 
+TRACE_EVENT(ext4_es_shrink,
+       TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
+                int skip_precached, int nr_skipped, int retried),
+
+       TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev             )
+               __field(        int,            nr_shrunk       )
+               __field(        unsigned long long, scan_time   )
+               __field(        int,            skip_precached  )
+               __field(        int,            nr_skipped      )
+               __field(        int,            retried         )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = sb->s_dev;
+               __entry->nr_shrunk      = nr_shrunk;
+               __entry->scan_time      = div_u64(scan_time, 1000);
+               __entry->skip_precached = skip_precached;
+               __entry->nr_skipped     = nr_skipped;
+               __entry->retried        = retried;
+       ),
+
+       TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
+                 "nr_skipped %d retried %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
+                 __entry->scan_time, __entry->skip_precached,
+                 __entry->nr_skipped, __entry->retried)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
index 96d167372d89405372ef7cd544799e76fd99a886..261eaf6e5a198d47d662d4a7da0b950f01660c6a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h> /* grr. try_to_release_page,
                                   do_invalidatepage */
 #include <linux/cleancache.h>
+#include <linux/rmap.h>
 #include "internal.h"
 
 static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,11 +720,67 @@ EXPORT_SYMBOL(truncate_pagecache);
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
+       loff_t oldsize = inode->i_size;
+
        i_size_write(inode, newsize);
+       if (newsize > oldsize)
+               pagecache_isize_extended(inode, oldsize, newsize);
        truncate_pagecache(inode, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
+/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode:     inode for which i_size was extended
+ * @from:      original inode size
+ * @to:                new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page.  This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+       int bsize = 1 << inode->i_blkbits;
+       loff_t rounded_from;
+       struct page *page;
+       pgoff_t index;
+
+       WARN_ON(!mutex_is_locked(&inode->i_mutex));
+       WARN_ON(to > inode->i_size);
+
+       if (from >= to || bsize == PAGE_CACHE_SIZE)
+               return;
+       /* Page straddling @from will not have any hole block created? */
+       rounded_from = round_up(from, bsize);
+       if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+               return;
+
+       index = from >> PAGE_CACHE_SHIFT;
+       page = find_lock_page(inode->i_mapping, index);
+       /* Page not cached? Nothing to do */
+       if (!page)
+               return;
+       /*
+        * See clear_page_dirty_for_io() for details why set_page_dirty()
+        * is needed.
+        */
+       if (page_mkclean(page))
+               set_page_dirty(page);
+       unlock_page(page);
+       page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode