Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Jun 2012 17:12:15 +0000 (10:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Jun 2012 17:12:15 +0000 (10:12 -0700)
Pull Ext4 updates from Theodore Ts'o:
 "The major new feature added in this update is Darrick J Wong's
  metadata checksum feature, which adds crc32 checksums to ext4's
  metadata fields.

  There is also the usual set of cleanups and bug fixes."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (44 commits)
  ext4: hole-punch use truncate_pagecache_range
  jbd2: use kmem_cache_zalloc wrapper instead of flag
  ext4: remove mb_groups before tearing down the buddy_cache
  ext4: add ext4_mb_unload_buddy in the error path
  ext4: don't trash state flags in EXT4_IOC_SETFLAGS
  ext4: let getattr report the right blocks in delalloc+bigalloc
  ext4: add missing save_error_info() to ext4_error()
  ext4: add debugging trigger for ext4_error()
  ext4: protect group inode free counting with group lock
  ext4: use consistent ssize_t type in ext4_file_write()
  ext4: fix format flag in ext4_ext_binsearch_idx()
  ext4: cleanup in ext4_discard_allocated_blocks()
  ext4: return ENOMEM when mounts fail due to lack of memory
  ext4: remove redundundant "(char *) bh->b_data" casts
  ext4: disallow hard-linked directory in ext4_lookup
  ext4: fix potential integer overflow in alloc_flex_gd()
  ext4: remove needs_recovery in ext4_mb_init()
  ext4: force ro mount if ext4_setup_super() fails
  ext4: fix potential NULL dereference in ext4_free_inodes_counts()
  ext4/jbd2: add metadata checksumming to the list of supported features
  ...

28 files changed:
fs/ext4/Kconfig
fs/ext4/balloc.c
fs/ext4/bitmap.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_extents.h
fs/ext4/ext4_jbd2.c
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/mmp.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/ext4/xattr.h
fs/jbd2/Kconfig
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jbd2/revoke.c
fs/jbd2/transaction.c
include/linux/jbd2.h
include/linux/jbd_common.h

index 9ed1bb1f319f381b700a6386a4d8d068d04e0fdf..c22f17021b6eee7ca942a3525eb9f4fd23de6011 100644 (file)
@@ -2,6 +2,8 @@ config EXT4_FS
        tristate "The Extended 4 (ext4) filesystem"
        select JBD2
        select CRC16
+       select CRYPTO
+       select CRYPTO_CRC32C
        help
          This is the next generation of the ext3 filesystem.
 
index c45c41129a35b7346463e0f18e847b78a52e0426..99b6324290db916466d8b5c0633e9fa216d21798 100644 (file)
@@ -168,12 +168,14 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 
        /* If checksum is bad mark all blocks used to prevent allocation
         * essentially implementing a per-group read-only flag. */
-       if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+       if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
                ext4_error(sb, "Checksum bad for group %u", block_group);
                ext4_free_group_clusters_set(sb, gdp, 0);
                ext4_free_inodes_set(sb, gdp, 0);
                ext4_itable_unused_set(sb, gdp, 0);
                memset(bh->b_data, 0xff, sb->s_blocksize);
+               ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+                                          EXT4_BLOCKS_PER_GROUP(sb) / 8);
                return;
        }
        memset(bh->b_data, 0, sb->s_blocksize);
@@ -210,6 +212,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
         */
        ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
                             sb->s_blocksize * 8, bh->b_data);
+       ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+                                  EXT4_BLOCKS_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, block_group, gdp);
 }
 
 /* Return the number of free blocks in a block group.  It is used when
@@ -276,9 +281,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 }
 
 static int ext4_valid_block_bitmap(struct super_block *sb,
-                                       struct ext4_group_desc *desc,
-                                       unsigned int block_group,
-                                       struct buffer_head *bh)
+                                  struct ext4_group_desc *desc,
+                                  unsigned int block_group,
+                                  struct buffer_head *bh)
 {
        ext4_grpblk_t offset;
        ext4_grpblk_t next_zero_bit;
@@ -325,6 +330,23 @@ err_out:
                        block_group, bitmap_blk);
        return 0;
 }
+
+void ext4_validate_block_bitmap(struct super_block *sb,
+                              struct ext4_group_desc *desc,
+                              unsigned int block_group,
+                              struct buffer_head *bh)
+{
+       if (buffer_verified(bh))
+               return;
+
+       ext4_lock_group(sb, block_group);
+       if (ext4_valid_block_bitmap(sb, desc, block_group, bh) &&
+           ext4_block_bitmap_csum_verify(sb, block_group, desc, bh,
+                                         EXT4_BLOCKS_PER_GROUP(sb) / 8))
+               set_buffer_verified(bh);
+       ext4_unlock_group(sb, block_group);
+}
+
 /**
  * ext4_read_block_bitmap()
  * @sb:                        super block
@@ -355,12 +377,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        }
 
        if (bitmap_uptodate(bh))
-               return bh;
+               goto verify;
 
        lock_buffer(bh);
        if (bitmap_uptodate(bh)) {
                unlock_buffer(bh);
-               return bh;
+               goto verify;
        }
        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -379,7 +401,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
                 */
                set_bitmap_uptodate(bh);
                unlock_buffer(bh);
-               return bh;
+               goto verify;
        }
        /*
         * submit the buffer_head for reading
@@ -390,6 +412,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        get_bh(bh);
        submit_bh(READ, bh);
        return bh;
+verify:
+       ext4_validate_block_bitmap(sb, desc, block_group, bh);
+       return bh;
 }
 
 /* Returns 0 on success, 1 on error */
@@ -412,7 +437,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
        }
        clear_buffer_new(bh);
        /* Panic or remount fs read-only if block bitmap is invalid */
-       ext4_valid_block_bitmap(sb, desc, block_group, bh);
+       ext4_validate_block_bitmap(sb, desc, block_group, bh);
        return 0;
 }
 
index fa3af81ac565c16dba6237edc89c3c7d70c5fc61..b319721da26ae32010adcd46db7e2d98ec50887a 100644 (file)
@@ -29,3 +29,86 @@ unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
 
 #endif  /*  EXT4FS_DEBUG  */
 
+int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+                                 struct ext4_group_desc *gdp,
+                                 struct buffer_head *bh, int sz)
+{
+       __u32 hi;
+       __u32 provided, calculated;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
+       calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
+       if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
+               hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi);
+               provided |= (hi << 16);
+       } else
+               calculated &= 0xFFFF;
+
+       return provided == calculated;
+}
+
+void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+                               struct ext4_group_desc *gdp,
+                               struct buffer_head *bh, int sz)
+{
+       __u32 csum;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
+       gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
+       if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
+               gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
+}
+
+int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+                                 struct ext4_group_desc *gdp,
+                                 struct buffer_head *bh, int sz)
+{
+       __u32 hi;
+       __u32 provided, calculated;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
+       calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
+       if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) {
+               hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi);
+               provided |= (hi << 16);
+       } else
+               calculated &= 0xFFFF;
+
+       if (provided == calculated)
+               return 1;
+
+       ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
+       return 0;
+}
+
+void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+                               struct ext4_group_desc *gdp,
+                               struct buffer_head *bh, int sz)
+{
+       __u32 csum;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
+       gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
+       if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END)
+               gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16);
+}
index b86786202643bdd8044ee85fb72a0a21bc2c9bef..aa39e600d15954244aead38f7aed30513ce86d65 100644 (file)
@@ -179,6 +179,18 @@ static int ext4_readdir(struct file *filp,
                        continue;
                }
 
+               /* Check the checksum */
+               if (!buffer_verified(bh) &&
+                   !ext4_dirent_csum_verify(inode,
+                               (struct ext4_dir_entry *)bh->b_data)) {
+                       EXT4_ERROR_FILE(filp, 0, "directory fails checksum "
+                                       "at offset %llu",
+                                       (unsigned long long)filp->f_pos);
+                       filp->f_pos += sb->s_blocksize - offset;
+                       continue;
+               }
+               set_buffer_verified(bh);
+
 revalidate:
                /* If the dir block has changed since the last call to
                 * readdir(2), then we might be pointing to an invalid
index c21b1de51afbb42191adea4fc4a357e3906c8489..cfc4e01b3c8370c642681824ef55b13a66683c0d 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/wait.h>
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
+#include <crypto/hash.h>
 #ifdef __KERNEL__
 #include <linux/compat.h>
 #endif
@@ -298,7 +299,9 @@ struct ext4_group_desc
        __le16  bg_free_inodes_count_lo;/* Free inodes count */
        __le16  bg_used_dirs_count_lo;  /* Directories count */
        __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
-       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
+       __le32  bg_exclude_bitmap_lo;   /* Exclude bitmap for snapshots */
+       __le16  bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
+       __le16  bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
        __le16  bg_itable_unused_lo;    /* Unused inodes count */
        __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
        __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
@@ -308,9 +311,19 @@ struct ext4_group_desc
        __le16  bg_free_inodes_count_hi;/* Free inodes count MSB */
        __le16  bg_used_dirs_count_hi;  /* Directories count MSB */
        __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
-       __u32   bg_reserved2[3];
+       __le32  bg_exclude_bitmap_hi;   /* Exclude bitmap block MSB */
+       __le16  bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
+       __le16  bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
+       __u32   bg_reserved;
 };
 
+#define EXT4_BG_INODE_BITMAP_CSUM_HI_END       \
+       (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \
+        sizeof(__le16))
+#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END       \
+       (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \
+        sizeof(__le16))
+
 /*
  * Structure of a flex block group info
  */
@@ -650,7 +663,8 @@ struct ext4_inode {
                        __le16  l_i_file_acl_high;
                        __le16  l_i_uid_high;   /* these 2 fields */
                        __le16  l_i_gid_high;   /* were reserved2[0] */
-                       __u32   l_i_reserved2;
+                       __le16  l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
+                       __le16  l_i_reserved;
                } linux2;
                struct {
                        __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
@@ -666,7 +680,7 @@ struct ext4_inode {
                } masix2;
        } osd2;                         /* OS dependent 2 */
        __le16  i_extra_isize;
-       __le16  i_pad1;
+       __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
        __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
        __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
        __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
@@ -768,7 +782,7 @@ do {                                                                               \
 #define i_gid_low      i_gid
 #define i_uid_high     osd2.linux2.l_i_uid_high
 #define i_gid_high     osd2.linux2.l_i_gid_high
-#define i_reserved2    osd2.linux2.l_i_reserved2
+#define i_checksum_lo  osd2.linux2.l_i_checksum_lo
 
 #elif defined(__GNU__)
 
@@ -908,6 +922,9 @@ struct ext4_inode_info {
         */
        tid_t i_sync_tid;
        tid_t i_datasync_tid;
+
+       /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
+       __u32 i_csum_seed;
 };
 
 /*
@@ -1001,6 +1018,9 @@ extern void ext4_set_bits(void *bm, int cur, int len);
 #define EXT4_ERRORS_PANIC              3       /* Panic */
 #define EXT4_ERRORS_DEFAULT            EXT4_ERRORS_CONTINUE
 
+/* Metadata checksum algorithm codes */
+#define EXT4_CRC32C_CHKSUM             1
+
 /*
  * Structure of the super block
  */
@@ -1087,7 +1107,7 @@ struct ext4_super_block {
        __le64  s_mmp_block;            /* Block for multi-mount protection */
        __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
        __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
-       __u8    s_reserved_char_pad;
+       __u8    s_checksum_type;        /* metadata checksum algorithm used */
        __le16  s_reserved_pad;
        __le64  s_kbytes_written;       /* nr of lifetime kilobytes written */
        __le32  s_snapshot_inum;        /* Inode number of active snapshot */
@@ -1113,7 +1133,8 @@ struct ext4_super_block {
        __le32  s_usr_quota_inum;       /* inode for tracking user quota */
        __le32  s_grp_quota_inum;       /* inode for tracking group quota */
        __le32  s_overhead_clusters;    /* overhead blocks/clusters in fs */
-       __le32  s_reserved[109];        /* Padding to the end of the block */
+       __le32  s_reserved[108];        /* Padding to the end of the block */
+       __le32  s_checksum;             /* crc32c(superblock) */
 };
 
 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1176,6 +1197,7 @@ struct ext4_sb_info {
        struct proc_dir_entry *s_proc;
        struct kobject s_kobj;
        struct completion s_kobj_unregister;
+       struct super_block *s_sb;
 
        /* Journaling */
        struct journal_s *s_journal;
@@ -1266,6 +1288,12 @@ struct ext4_sb_info {
 
        /* record the last minlen when FITRIM is called. */
        atomic_t s_last_trim_minblks;
+
+       /* Reference to checksum algorithm driver via cryptoapi */
+       struct crypto_shash *s_chksum_driver;
+
+       /* Precomputed FS UUID checksum for seeding other checksums */
+       __u32 s_csum_seed;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1414,6 +1442,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE     0x0040
 #define EXT4_FEATURE_RO_COMPAT_QUOTA           0x0100
 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC                0x0200
+/*
+ * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM).  When
+ * METADATA_CSUM is set, group descriptor checksums use the same algorithm as
+ * all other data structures' checksums.  However, the METADATA_CSUM and
+ * GDT_CSUM bits are mutually exclusive.
+ */
 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM   0x0400
 
 #define EXT4_FEATURE_INCOMPAT_COMPRESSION      0x0001
@@ -1461,7 +1495,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
-                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC)
+                                        EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
+                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -1526,6 +1561,18 @@ struct ext4_dir_entry_2 {
        char    name[EXT4_NAME_LEN];    /* File name */
 };
 
+/*
+ * This is a bogus directory entry at the end of each leaf block that
+ * records checksums.
+ */
+struct ext4_dir_entry_tail {
+       __le32  det_reserved_zero1;     /* Pretend to be unused */
+       __le16  det_rec_len;            /* 12 */
+       __u8    det_reserved_zero2;     /* Zero name length */
+       __u8    det_reserved_ft;        /* 0xDE, fake file type */
+       __le32  det_checksum;           /* crc32c(uuid+inum+dirblock) */
+};
+
 /*
  * Ext4 directory file types.  Only the low 3 bits are used.  The
  * other bits are reserved for now.
@@ -1541,6 +1588,8 @@ struct ext4_dir_entry_2 {
 
 #define EXT4_FT_MAX            8
 
+#define EXT4_FT_DIR_CSUM       0xDE
+
 /*
  * EXT4_DIR_PAD defines the directory entries boundaries
  *
@@ -1609,6 +1658,25 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
 #define DX_HASH_HALF_MD4_UNSIGNED      4
 #define DX_HASH_TEA_UNSIGNED           5
 
+static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
+                             const void *address, unsigned int length)
+{
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(sbi->s_chksum_driver)];
+       } desc;
+       int err;
+
+       desc.shash.tfm = sbi->s_chksum_driver;
+       desc.shash.flags = 0;
+       *(u32 *)desc.ctx = crc;
+
+       err = crypto_shash_update(&desc.shash, address, length);
+       BUG_ON(err);
+
+       return *(u32 *)desc.ctx;
+}
+
 #ifdef __KERNEL__
 
 /* hash info structure used by the directory hash */
@@ -1741,7 +1809,8 @@ struct mmp_struct {
        __le16  mmp_check_interval;
 
        __le16  mmp_pad1;
-       __le32  mmp_pad2[227];
+       __le32  mmp_pad2[226];
+       __le32  mmp_checksum;           /* crc32c(uuid+mmp_block) */
 };
 
 /* arguments passed to the mmp thread */
@@ -1784,8 +1853,24 @@ struct mmpd_data {
 
 /* bitmap.c */
 extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
+void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+                               struct ext4_group_desc *gdp,
+                               struct buffer_head *bh, int sz);
+int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+                                 struct ext4_group_desc *gdp,
+                                 struct buffer_head *bh, int sz);
+void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+                               struct ext4_group_desc *gdp,
+                               struct buffer_head *bh, int sz);
+int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+                                 struct ext4_group_desc *gdp,
+                                 struct buffer_head *bh, int sz);
 
 /* balloc.c */
+extern void ext4_validate_block_bitmap(struct super_block *sb,
+                                      struct ext4_group_desc *desc,
+                                      unsigned int block_group,
+                                      struct buffer_head *bh);
 extern unsigned int ext4_block_group(struct super_block *sb,
                        ext4_fsblk_t blocknr);
 extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
@@ -1864,7 +1949,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
 /* mballoc.c */
 extern long ext4_mb_stats;
 extern long ext4_mb_max_to_scan;
-extern int ext4_mb_init(struct super_block *, int);
+extern int ext4_mb_init(struct super_block *);
 extern int ext4_mb_release(struct super_block *);
 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
                                struct ext4_allocation_request *, int *);
@@ -1936,6 +2021,8 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern int ext4_ext_migrate(struct inode *);
 
 /* namei.c */
+extern int ext4_dirent_csum_verify(struct inode *inode,
+                                  struct ext4_dir_entry *dirent);
 extern int ext4_orphan_add(handle_t *, struct inode *);
 extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -1950,6 +2037,10 @@ extern int ext4_group_extend(struct super_block *sb,
 extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
 
 /* super.c */
+extern int ext4_superblock_csum_verify(struct super_block *sb,
+                                      struct ext4_super_block *es);
+extern void ext4_superblock_csum_set(struct super_block *sb,
+                                    struct ext4_super_block *es);
 extern void *ext4_kvmalloc(size_t size, gfp_t flags);
 extern void *ext4_kvzalloc(size_t size, gfp_t flags);
 extern void ext4_kvfree(void *ptr);
@@ -2025,10 +2116,17 @@ extern void ext4_used_dirs_set(struct super_block *sb,
                                struct ext4_group_desc *bg, __u32 count);
 extern void ext4_itable_unused_set(struct super_block *sb,
                                   struct ext4_group_desc *bg, __u32 count);
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
-                                  struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
                                       struct ext4_group_desc *gdp);
+extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
+                                    struct ext4_group_desc *gdp);
+
+static inline int ext4_has_group_desc_csum(struct super_block *sb)
+{
+       return EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                         EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
+                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+}
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
@@ -2225,6 +2323,9 @@ static inline void ext4_unlock_group(struct super_block *sb,
 
 static inline void ext4_mark_super_dirty(struct super_block *sb)
 {
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+       ext4_superblock_csum_set(sb, es);
        if (EXT4_SB(sb)->s_journal == NULL)
                sb->s_dirt =1;
 }
@@ -2314,6 +2415,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp);
+extern int ext4_mmp_csum_verify(struct super_block *sb,
+                               struct mmp_struct *mmp);
 
 /* BH_Uninit flag: blocks are allocated but uninitialized on disk */
 enum ext4_state_bits {
index 0f58b86e3a0206e19626f361f453aa80b2838857..cb1b2c919963290fd10d09ba12f6d8c53ace9fa6 100644 (file)
  * ext4_inode has i_block array (60 bytes total).
  * The first 12 bytes store ext4_extent_header;
  * the remainder stores an array of ext4_extent.
+ * For non-inode extent blocks, ext4_extent_tail
+ * follows the array.
  */
 
+/*
+ * This is the extent tail on-disk structure.
+ * All other extent structures are 12 bytes long.  It turns out that
+ * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which
+ * covers all valid ext4 block sizes.  Therefore, this tail structure can be
+ * crammed into the end of the block without having to rebalance the tree.
+ */
+struct ext4_extent_tail {
+       __le32  et_checksum;    /* crc32c(uuid+inum+extent_block) */
+};
+
 /*
  * This is the extent on-disk structure.
  * It's used at the bottom of the tree.
@@ -101,6 +114,17 @@ struct ext4_extent_header {
 
 #define EXT4_EXT_MAGIC         cpu_to_le16(0xf30a)
 
+#define EXT4_EXTENT_TAIL_OFFSET(hdr) \
+       (sizeof(struct ext4_extent_header) + \
+        (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max)))
+
+static inline struct ext4_extent_tail *
+find_ext4_extent_tail(struct ext4_extent_header *eh)
+{
+       return (struct ext4_extent_tail *)(((void *)eh) +
+                                          EXT4_EXTENT_TAIL_OFFSET(eh));
+}
+
 /*
  * Array of ext4_ext_path contains path to some extent.
  * Creation/lookup routines use it for traversal/splitting/etc.
index aca17901758249d4329d780714e328ef42851e35..90f7c2e84db1bef3fdb90931f9124cfe052705dd 100644 (file)
@@ -138,16 +138,23 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 }
 
 int __ext4_handle_dirty_super(const char *where, unsigned int line,
-                             handle_t *handle, struct super_block *sb)
+                             handle_t *handle, struct super_block *sb,
+                             int now)
 {
        struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
        int err = 0;
 
        if (ext4_handle_valid(handle)) {
+               ext4_superblock_csum_set(sb,
+                               (struct ext4_super_block *)bh->b_data);
                err = jbd2_journal_dirty_metadata(handle, bh);
                if (err)
                        ext4_journal_abort_handle(where, line, __func__,
                                                  bh, handle, err);
+       } else if (now) {
+               ext4_superblock_csum_set(sb,
+                               (struct ext4_super_block *)bh->b_data);
+               mark_buffer_dirty(bh);
        } else
                sb->s_dirt = 1;
        return err;
index 83b20fcf9400b11b28185470f8feef309faa8252..f440e8f1841f4e2521486bd94ae19ed83aa896ab 100644 (file)
@@ -213,7 +213,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
                                 struct buffer_head *bh);
 
 int __ext4_handle_dirty_super(const char *where, unsigned int line,
-                             handle_t *handle, struct super_block *sb);
+                             handle_t *handle, struct super_block *sb,
+                             int now);
 
 #define ext4_journal_get_write_access(handle, bh) \
        __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
@@ -225,8 +226,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
 #define ext4_handle_dirty_metadata(handle, inode, bh) \
        __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
                                     (bh))
+#define ext4_handle_dirty_super_now(handle, sb) \
+       __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1)
 #define ext4_handle_dirty_super(handle, sb) \
-       __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
+       __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0)
 
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
index abcdeab67f5232b66d4aa5a6cbb88838094f6247..91341ec6e06a94f2f400d10039a64585cd17ed2e 100644 (file)
 #define EXT4_EXT_MARK_UNINIT1  0x2  /* mark first half uninitialized */
 #define EXT4_EXT_MARK_UNINIT2  0x4  /* mark second half uninitialized */
 
+static __le32 ext4_extent_block_csum(struct inode *inode,
+                                    struct ext4_extent_header *eh)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       __u32 csum;
+
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
+                          EXT4_EXTENT_TAIL_OFFSET(eh));
+       return cpu_to_le32(csum);
+}
+
+static int ext4_extent_block_csum_verify(struct inode *inode,
+                                        struct ext4_extent_header *eh)
+{
+       struct ext4_extent_tail *et;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       et = find_ext4_extent_tail(eh);
+       if (et->et_checksum != ext4_extent_block_csum(inode, eh))
+               return 0;
+       return 1;
+}
+
+static void ext4_extent_block_csum_set(struct inode *inode,
+                                      struct ext4_extent_header *eh)
+{
+       struct ext4_extent_tail *et;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       et = find_ext4_extent_tail(eh);
+       et->et_checksum = ext4_extent_block_csum(inode, eh);
+}
+
 static int ext4_split_extent(handle_t *handle,
                                struct inode *inode,
                                struct ext4_ext_path *path,
@@ -117,6 +157,7 @@ static int __ext4_ext_dirty(const char *where, unsigned int line,
 {
        int err;
        if (path->p_bh) {
+               ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
                /* path points to block */
                err = __ext4_handle_dirty_metadata(where, line, handle,
                                                   inode, path->p_bh);
@@ -391,6 +432,12 @@ static int __ext4_ext_check(const char *function, unsigned int line,
                error_msg = "invalid extent entries";
                goto corrupted;
        }
+       /* Verify checksum on non-root extent tree nodes */
+       if (ext_depth(inode) != depth &&
+           !ext4_extent_block_csum_verify(inode, eh)) {
+               error_msg = "extent tree corrupted";
+               goto corrupted;
+       }
        return 0;
 
 corrupted:
@@ -412,6 +459,26 @@ int ext4_ext_check_inode(struct inode *inode)
        return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
 }
 
+static int __ext4_ext_check_block(const char *function, unsigned int line,
+                                 struct inode *inode,
+                                 struct ext4_extent_header *eh,
+                                 int depth,
+                                 struct buffer_head *bh)
+{
+       int ret;
+
+       if (buffer_verified(bh))
+               return 0;
+       ret = ext4_ext_check(inode, eh, depth);
+       if (ret)
+               return ret;
+       set_buffer_verified(bh);
+       return ret;
+}
+
+#define ext4_ext_check_block(inode, eh, depth, bh)     \
+       __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh)
+
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 {
@@ -536,7 +603,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
        }
 
        path->p_idx = l - 1;
-       ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+       ext_debug("  -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
                  ext4_idx_pblock(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
@@ -668,8 +735,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        i = depth;
        /* walk through the tree */
        while (i) {
-               int need_to_validate = 0;
-
                ext_debug("depth %d: num %d, max %d\n",
                          ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 
@@ -688,8 +753,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                                put_bh(bh);
                                goto err;
                        }
-                       /* validate the extent entries */
-                       need_to_validate = 1;
                }
                eh = ext_block_hdr(bh);
                ppos++;
@@ -703,7 +766,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                path[ppos].p_hdr = eh;
                i--;
 
-               if (need_to_validate && ext4_ext_check(inode, eh, i))
+               if (ext4_ext_check_block(inode, eh, i, bh))
                        goto err;
        }
 
@@ -914,6 +977,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                le16_add_cpu(&neh->eh_entries, m);
        }
 
+       ext4_extent_block_csum_set(inode, neh);
        set_buffer_uptodate(bh);
        unlock_buffer(bh);
 
@@ -992,6 +1056,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                                sizeof(struct ext4_extent_idx) * m);
                        le16_add_cpu(&neh->eh_entries, m);
                }
+               ext4_extent_block_csum_set(inode, neh);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
 
@@ -1089,6 +1154,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        else
                neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
        neh->eh_magic = EXT4_EXT_MAGIC;
+       ext4_extent_block_csum_set(inode, neh);
        set_buffer_uptodate(bh);
        unlock_buffer(bh);
 
@@ -1344,7 +1410,8 @@ got_index:
                        return -EIO;
                eh = ext_block_hdr(bh);
                /* subtract from p_depth to get proper eh_depth */
-               if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
+               if (ext4_ext_check_block(inode, eh,
+                                        path->p_depth - depth, bh)) {
                        put_bh(bh);
                        return -EIO;
                }
@@ -1357,7 +1424,7 @@ got_index:
        if (bh == NULL)
                return -EIO;
        eh = ext_block_hdr(bh);
-       if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
+       if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
                put_bh(bh);
                return -EIO;
        }
@@ -2644,8 +2711,8 @@ cont:
                                err = -EIO;
                                break;
                        }
-                       if (ext4_ext_check(inode, ext_block_hdr(bh),
-                                                       depth - i - 1)) {
+                       if (ext4_ext_check_block(inode, ext_block_hdr(bh),
+                                                       depth - i - 1, bh)) {
                                err = -EIO;
                                break;
                        }
@@ -4722,8 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 
        /* Now release the pages */
        if (last_page_offset > first_page_offset) {
-               truncate_inode_pages_range(mapping, first_page_offset,
-                                          last_page_offset-1);
+               truncate_pagecache_range(inode, first_page_offset,
+                                        last_page_offset - 1);
        }
 
        /* finish any pending end_io work */
index cb70f1812a70f5ca8452e98776cd309ad6638055..8c7642a00054fd1ddf649e733e4b6efb5a0eb14b 100644 (file)
@@ -95,7 +95,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
        int unaligned_aio = 0;
-       int ret;
+       ssize_t ret;
 
        /*
         * If we have encountered a bitmap-format file, the size limit
index 9f9acac6c43f4ac8006e363b5567b4a88dd56615..d48e8b14928cf993c50c33fe9b18a90203c2c492 100644 (file)
@@ -70,24 +70,27 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
                                       ext4_group_t block_group,
                                       struct ext4_group_desc *gdp)
 {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-
        J_ASSERT_BH(bh, buffer_locked(bh));
 
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
-       if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+       if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
                ext4_error(sb, "Checksum bad for group %u", block_group);
                ext4_free_group_clusters_set(sb, gdp, 0);
                ext4_free_inodes_set(sb, gdp, 0);
                ext4_itable_unused_set(sb, gdp, 0);
                memset(bh->b_data, 0xff, sb->s_blocksize);
+               ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
+                                          EXT4_INODES_PER_GROUP(sb) / 8);
                return 0;
        }
 
        memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
        ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
                        bh->b_data);
+       ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
+                                  EXT4_INODES_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, block_group, gdp);
 
        return EXT4_INODES_PER_GROUP(sb);
 }
@@ -128,12 +131,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                return NULL;
        }
        if (bitmap_uptodate(bh))
-               return bh;
+               goto verify;
 
        lock_buffer(bh);
        if (bitmap_uptodate(bh)) {
                unlock_buffer(bh);
-               return bh;
+               goto verify;
        }
 
        ext4_lock_group(sb, block_group);
@@ -141,6 +144,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                ext4_init_inode_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
+               set_buffer_verified(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
                return bh;
@@ -154,7 +158,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                 */
                set_bitmap_uptodate(bh);
                unlock_buffer(bh);
-               return bh;
+               goto verify;
        }
        /*
         * submit the buffer_head for reading
@@ -171,6 +175,20 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                           block_group, bitmap_blk);
                return NULL;
        }
+
+verify:
+       ext4_lock_group(sb, block_group);
+       if (!buffer_verified(bh) &&
+           !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
+                                          EXT4_INODES_PER_GROUP(sb) / 8)) {
+               ext4_unlock_group(sb, block_group);
+               put_bh(bh);
+               ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
+                          "inode_bitmap = %llu", block_group, bitmap_blk);
+               return NULL;
+       }
+       ext4_unlock_group(sb, block_group);
+       set_buffer_verified(bh);
        return bh;
 }
 
@@ -276,7 +294,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                ext4_used_dirs_set(sb, gdp, count);
                percpu_counter_dec(&sbi->s_dirs_counter);
        }
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+                                  EXT4_INODES_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, block_group, gdp);
        ext4_unlock_group(sb, block_group);
 
        percpu_counter_inc(&sbi->s_freeinodes_counter);
@@ -488,10 +508,12 @@ fallback_retry:
        for (i = 0; i < ngroups; i++) {
                grp = (parent_group + i) % ngroups;
                desc = ext4_get_group_desc(sb, grp, NULL);
-               grp_free = ext4_free_inodes_count(sb, desc);
-               if (desc && grp_free && grp_free >= avefreei) {
-                       *group = grp;
-                       return 0;
+               if (desc) {
+                       grp_free = ext4_free_inodes_count(sb, desc);
+                       if (grp_free && grp_free >= avefreei) {
+                               *group = grp;
+                               return 0;
+                       }
                }
        }
 
@@ -709,7 +731,7 @@ repeat_in_this_group:
 
 got:
        /* We may have to initialize the block bitmap if it isn't already */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+       if (ext4_has_group_desc_csum(sb) &&
            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                struct buffer_head *block_bitmap_bh;
 
@@ -731,8 +753,11 @@ got:
                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
                        ext4_free_group_clusters_set(sb, gdp,
                                ext4_free_clusters_after_init(sb, group, gdp));
-                       gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
-                                                               gdp);
+                       ext4_block_bitmap_csum_set(sb, group, gdp,
+                                                  block_bitmap_bh,
+                                                  EXT4_BLOCKS_PER_GROUP(sb) /
+                                                  8);
+                       ext4_group_desc_csum_set(sb, group, gdp);
                }
                ext4_unlock_group(sb, group);
 
@@ -751,7 +776,7 @@ got:
                goto fail;
 
        /* Update the relevant bg descriptor fields */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+       if (ext4_has_group_desc_csum(sb)) {
                int free;
                struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 
@@ -772,7 +797,10 @@ got:
                        ext4_itable_unused_set(sb, gdp,
                                        (EXT4_INODES_PER_GROUP(sb) - ino));
                up_read(&grp->alloc_sem);
+       } else {
+               ext4_lock_group(sb, group);
        }
+
        ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
        if (S_ISDIR(mode)) {
                ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
@@ -782,10 +810,12 @@ got:
                        atomic_inc(&sbi->s_flex_groups[f].used_dirs);
                }
        }
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-               gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-               ext4_unlock_group(sb, group);
+       if (ext4_has_group_desc_csum(sb)) {
+               ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
+                                          EXT4_INODES_PER_GROUP(sb) / 8);
+               ext4_group_desc_csum_set(sb, group, gdp);
        }
+       ext4_unlock_group(sb, group);
 
        BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
@@ -850,6 +880,19 @@ got:
        inode->i_generation = sbi->s_next_generation++;
        spin_unlock(&sbi->s_next_gen_lock);
 
+       /* Precompute checksum seed for inode metadata */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               __u32 csum;
+               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+               __le32 inum = cpu_to_le32(inode->i_ino);
+               __le32 gen = cpu_to_le32(inode->i_generation);
+               csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
+                                  sizeof(inum));
+               ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
+                                             sizeof(gen));
+       }
+
        ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
        ext4_set_inode_state(inode, EXT4_STATE_NEW);
 
@@ -1140,7 +1183,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 skip_zeroout:
        ext4_lock_group(sb, group);
        gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+       ext4_group_desc_csum_set(sb, group, gdp);
        ext4_unlock_group(sb, group);
 
        BUFFER_TRACE(group_desc_bh,
index 07eaf565fdcb2ad4fba4f92c6fe55a01b2fea17b..02bc8cbe7281b3d47c3449a1c4b8e4220685ba52 100644 (file)
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
+static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
+                             struct ext4_inode_info *ei)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       __u16 csum_lo;
+       __u16 csum_hi = 0;
+       __u32 csum;
+
+       csum_lo = raw->i_checksum_lo;
+       raw->i_checksum_lo = 0;
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+               csum_hi = raw->i_checksum_hi;
+               raw->i_checksum_hi = 0;
+       }
+
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
+                          EXT4_INODE_SIZE(inode->i_sb));
+
+       raw->i_checksum_lo = csum_lo;
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               raw->i_checksum_hi = csum_hi;
+
+       return csum;
+}
+
+static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
+                                 struct ext4_inode_info *ei)
+{
+       __u32 provided, calculated;
+
+       if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+           cpu_to_le32(EXT4_OS_LINUX) ||
+           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       provided = le16_to_cpu(raw->i_checksum_lo);
+       calculated = ext4_inode_csum(inode, raw, ei);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
+       else
+               calculated &= 0xFFFF;
+
+       return provided == calculated;
+}
+
+static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
+                               struct ext4_inode_info *ei)
+{
+       __u32 csum;
+
+       if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+           cpu_to_le32(EXT4_OS_LINUX) ||
+           !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       csum = ext4_inode_csum(inode, raw, ei);
+       raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+           EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
+               raw->i_checksum_hi = cpu_to_le16(csum >> 16);
+}
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
@@ -3517,8 +3584,7 @@ make_io:
                                b = table;
                        end = b + EXT4_SB(sb)->s_inode_readahead_blks;
                        num = EXT4_INODES_PER_GROUP(sb);
-                       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+                       if (ext4_has_group_desc_csum(sb))
                                num -= ext4_itable_unused_count(sb, gdp);
                        table += num / inodes_per_block;
                        if (end > table)
@@ -3646,6 +3712,39 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        if (ret < 0)
                goto bad_inode;
        raw_inode = ext4_raw_inode(&iloc);
+
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+                       EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
+                               EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
+                               EXT4_INODE_SIZE(inode->i_sb));
+                       ret = -EIO;
+                       goto bad_inode;
+               }
+       } else
+               ei->i_extra_isize = 0;
+
+       /* Precompute checksum seed for inode metadata */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+               __u32 csum;
+               __le32 inum = cpu_to_le32(inode->i_ino);
+               __le32 gen = raw_inode->i_generation;
+               csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
+                                  sizeof(inum));
+               ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
+                                             sizeof(gen));
+       }
+
+       if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
+               EXT4_ERROR_INODE(inode, "checksum invalid");
+               ret = -EIO;
+               goto bad_inode;
+       }
+
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
@@ -3725,12 +3824,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        }
 
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
-               ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
-               if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                   EXT4_INODE_SIZE(inode->i_sb)) {
-                       ret = -EIO;
-                       goto bad_inode;
-               }
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
                        ei->i_extra_isize = sizeof(struct ext4_inode) -
@@ -3742,8 +3835,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
                                ext4_set_inode_state(inode, EXT4_STATE_XATTR);
                }
-       } else
-               ei->i_extra_isize = 0;
+       }
 
        EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
@@ -3942,7 +4034,7 @@ static int ext4_do_update_inode(handle_t *handle,
                        EXT4_SET_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
                        ext4_handle_sync(handle);
-                       err = ext4_handle_dirty_super(handle, sb);
+                       err = ext4_handle_dirty_super_now(handle, sb);
                }
        }
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -3969,6 +4061,8 @@ static int ext4_do_update_inode(handle_t *handle,
                raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
        }
 
+       ext4_inode_csum_set(inode, raw_inode, ei);
+
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
        rc = ext4_handle_dirty_metadata(handle, NULL, bh);
        if (!err)
@@ -4213,7 +4307,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
         * will return the blocks that include the delayed allocation
         * blocks for this file.
         */
-       delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+       delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
+                               EXT4_I(inode)->i_reserved_data_blocks);
 
        stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
        return 0;
index 6eee25591b8159bc96d35a16f94f94c0855a35b9..8ad112ae0ade2f21a953ccc03b687939b0b81310 100644 (file)
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                handle_t *handle = NULL;
                int err, migrate = 0;
                struct ext4_iloc iloc;
-               unsigned int oldflags;
+               unsigned int oldflags, mask, i;
                unsigned int jflag;
 
                if (!inode_owner_or_capable(inode))
@@ -115,8 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (err)
                        goto flags_err;
 
-               flags = flags & EXT4_FL_USER_MODIFIABLE;
-               flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
+               for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+                       if (!(mask & EXT4_FL_USER_MODIFIABLE))
+                               continue;
+                       if (mask & flags)
+                               ext4_set_inode_flag(inode, i);
+                       else
+                               ext4_clear_inode_flag(inode, i);
+               }
                ei->i_flags = flags;
 
                ext4_set_inode_flags(inode);
@@ -152,6 +158,13 @@ flags_out:
                if (!inode_owner_or_capable(inode))
                        return -EPERM;
 
+               if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+                       ext4_warning(sb, "Setting inode version is not "
+                                    "supported with metadata_csum enabled.");
+                       return -ENOTTY;
+               }
+
                err = mnt_want_write_file(filp);
                if (err)
                        return err;
index 99ab428bcfa089822e74b433aee7b1bf4076e34d..1cd6994fc446008b74dc9b77863edf0f24e14c33 100644 (file)
@@ -788,7 +788,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
        int first_block;
        struct super_block *sb;
        struct buffer_head *bhs;
-       struct buffer_head **bh;
+       struct buffer_head **bh = NULL;
        struct inode *inode;
        char *data;
        char *bitmap;
@@ -2375,7 +2375,7 @@ static int ext4_groupinfo_create_slab(size_t size)
        return 0;
 }
 
-int ext4_mb_init(struct super_block *sb, int needs_recovery)
+int ext4_mb_init(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned i, j;
@@ -2517,6 +2517,9 @@ int ext4_mb_release(struct super_block *sb)
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
+       if (sbi->s_proc)
+               remove_proc_entry("mb_groups", sbi->s_proc);
+
        if (sbi->s_group_info) {
                for (i = 0; i < ngroups; i++) {
                        grinfo = ext4_get_group_info(sb, i);
@@ -2564,8 +2567,6 @@ int ext4_mb_release(struct super_block *sb)
        }
 
        free_percpu(sbi->s_locality_groups);
-       if (sbi->s_proc)
-               remove_proc_entry("mb_groups", sbi->s_proc);
 
        return 0;
 }
@@ -2797,7 +2798,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        }
        len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
        ext4_free_group_clusters_set(sb, gdp, len);
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
+       ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh,
+                                  EXT4_BLOCKS_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
 
        ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
        percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
@@ -3071,13 +3074,9 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 {
        struct ext4_prealloc_space *pa = ac->ac_pa;
-       int len;
-
-       if (pa && pa->pa_type == MB_INODE_PA) {
-               len = ac->ac_b_ex.fe_len;
-               pa->pa_free += len;
-       }
 
+       if (pa && pa->pa_type == MB_INODE_PA)
+               pa->pa_free += ac->ac_b_ex.fe_len;
 }
 
 /*
@@ -4636,6 +4635,7 @@ do_more:
                 */
                new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
                if (!new_entry) {
+                       ext4_mb_unload_buddy(&e4b);
                        err = -ENOMEM;
                        goto error_return;
                }
@@ -4659,7 +4659,9 @@ do_more:
 
        ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
        ext4_free_group_clusters_set(sb, gdp, ret);
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+                                  EXT4_BLOCKS_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, block_group, gdp);
        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
 
@@ -4803,7 +4805,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
        mb_free_blocks(NULL, &e4b, bit, count);
        blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
        ext4_free_group_clusters_set(sb, desc, blk_free_count);
-       desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
+       ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh,
+                                  EXT4_BLOCKS_PER_GROUP(sb) / 8);
+       ext4_group_desc_csum_set(sb, block_group, desc);
        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeclusters_counter,
                           EXT4_B2C(sbi, blocks_freed));
index ed6548d89165e1d9c31118aca21d3e89a3772ab2..f99a1311e84765296b0a0a04534e0be0536915bc 100644 (file)
@@ -6,12 +6,45 @@
 
 #include "ext4.h"
 
+/* Checksumming functions */
+static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       int offset = offsetof(struct mmp_struct, mmp_checksum);
+       __u32 csum;
+
+       csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
+
+       return cpu_to_le32(csum);
+}
+
+int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
+}
+
+void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
+}
+
 /*
  * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  * faster.
  */
-static int write_mmp_block(struct buffer_head *bh)
+static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 {
+       struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
+
+       ext4_mmp_csum_set(sb, mmp);
        mark_buffer_dirty(bh);
        lock_buffer(bh);
        bh->b_end_io = end_buffer_write_sync;
@@ -59,7 +92,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
        }
 
        mmp = (struct mmp_struct *)((*bh)->b_data);
-       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
+       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
+           !ext4_mmp_csum_verify(sb, mmp))
                return -EINVAL;
 
        return 0;
@@ -120,7 +154,7 @@ static int kmmpd(void *data)
                mmp->mmp_time = cpu_to_le64(get_seconds());
                last_update_time = jiffies;
 
-               retval = write_mmp_block(bh);
+               retval = write_mmp_block(sb, bh);
                /*
                 * Don't spew too many error messages. Print one every
                 * (s_mmp_update_interval * 60) seconds.
@@ -200,7 +234,7 @@ static int kmmpd(void *data)
        mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
        mmp->mmp_time = cpu_to_le64(get_seconds());
 
-       retval = write_mmp_block(bh);
+       retval = write_mmp_block(sb, bh);
 
 failed:
        kfree(data);
@@ -299,7 +333,7 @@ skip:
        seq = mmp_new_seq();
        mmp->mmp_seq = cpu_to_le32(seq);
 
-       retval = write_mmp_block(bh);
+       retval = write_mmp_block(sb, bh);
        if (retval)
                goto failed;
 
index e2a3f4b0ff78d6f81fbf2228f12f201e6ab1a024..5845cd97bf8b094b0fc01082279e8d65ee73f241 100644 (file)
@@ -145,6 +145,14 @@ struct dx_map_entry
        u16 size;
 };
 
+/*
+ * This goes at the end of each htree block.
+ */
+struct dx_tail {
+       u32 dt_reserved;
+       __le32 dt_checksum;     /* crc32c(uuid+inum+dirblock) */
+};
+
 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
 static inline unsigned dx_get_hash(struct dx_entry *entry);
@@ -180,6 +188,230 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
 
+/* checksumming functions */
+#define EXT4_DIRENT_TAIL(block, blocksize) \
+       ((struct ext4_dir_entry_tail *)(((void *)(block)) + \
+                                       ((blocksize) - \
+                                        sizeof(struct ext4_dir_entry_tail))))
+
+static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
+                                  unsigned int blocksize)
+{
+       memset(t, 0, sizeof(struct ext4_dir_entry_tail));
+       t->det_rec_len = ext4_rec_len_to_disk(
+                       sizeof(struct ext4_dir_entry_tail), blocksize);
+       t->det_reserved_ft = EXT4_FT_DIR_CSUM;
+}
+
+/* Walk through a dirent block to find a checksum "dirent" at the tail */
+static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
+                                                  struct ext4_dir_entry *de)
+{
+       struct ext4_dir_entry_tail *t;
+
+#ifdef PARANOID
+       struct ext4_dir_entry *d, *top;
+
+       d = de;
+       top = (struct ext4_dir_entry *)(((void *)de) +
+               (EXT4_BLOCK_SIZE(inode->i_sb) -
+               sizeof(struct ext4_dir_entry_tail)));
+       while (d < top && d->rec_len)
+               d = (struct ext4_dir_entry *)(((void *)d) +
+                   le16_to_cpu(d->rec_len));
+
+       if (d != top)
+               return NULL;
+
+       t = (struct ext4_dir_entry_tail *)d;
+#else
+       t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
+#endif
+
+       if (t->det_reserved_zero1 ||
+           le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
+           t->det_reserved_zero2 ||
+           t->det_reserved_ft != EXT4_FT_DIR_CSUM)
+               return NULL;
+
+       return t;
+}
+
+static __le32 ext4_dirent_csum(struct inode *inode,
+                              struct ext4_dir_entry *dirent, int size)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       __u32 csum;
+
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
+       return cpu_to_le32(csum);
+}
+
+int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
+{
+       struct ext4_dir_entry_tail *t;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       t = get_dirent_tail(inode, dirent);
+       if (!t) {
+               EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
+                                "leaf for checksum.  Please run e2fsck -D.");
+               return 0;
+       }
+
+       if (t->det_checksum != ext4_dirent_csum(inode, dirent,
+                                               (void *)t - (void *)dirent))
+               return 0;
+
+       return 1;
+}
+
+static void ext4_dirent_csum_set(struct inode *inode,
+                                struct ext4_dir_entry *dirent)
+{
+       struct ext4_dir_entry_tail *t;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       t = get_dirent_tail(inode, dirent);
+       if (!t) {
+               EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
+                                "leaf for checksum.  Please run e2fsck -D.");
+               return;
+       }
+
+       t->det_checksum = ext4_dirent_csum(inode, dirent,
+                                          (void *)t - (void *)dirent);
+}
+
+static inline int ext4_handle_dirty_dirent_node(handle_t *handle,
+                                               struct inode *inode,
+                                               struct buffer_head *bh)
+{
+       ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+       return ext4_handle_dirty_metadata(handle, inode, bh);
+}
+
+static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
+                                              struct ext4_dir_entry *dirent,
+                                              int *offset)
+{
+       struct ext4_dir_entry *dp;
+       struct dx_root_info *root;
+       int count_offset;
+
+       if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
+               count_offset = 8;
+       else if (le16_to_cpu(dirent->rec_len) == 12) {
+               dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
+               if (le16_to_cpu(dp->rec_len) !=
+                   EXT4_BLOCK_SIZE(inode->i_sb) - 12)
+                       return NULL;
+               root = (struct dx_root_info *)(((void *)dp + 12));
+               if (root->reserved_zero ||
+                   root->info_length != sizeof(struct dx_root_info))
+                       return NULL;
+               count_offset = 32;
+       } else
+               return NULL;
+
+       if (offset)
+               *offset = count_offset;
+       return (struct dx_countlimit *)(((void *)dirent) + count_offset);
+}
+
+static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
+                          int count_offset, int count, struct dx_tail *t)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       __u32 csum, old_csum;
+       int size;
+
+       size = count_offset + (count * sizeof(struct dx_entry));
+       old_csum = t->dt_checksum;
+       t->dt_checksum = 0;
+       csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
+       csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
+       t->dt_checksum = old_csum;
+
+       return cpu_to_le32(csum);
+}
+
+static int ext4_dx_csum_verify(struct inode *inode,
+                              struct ext4_dir_entry *dirent)
+{
+       struct dx_countlimit *c;
+       struct dx_tail *t;
+       int count_offset, limit, count;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       c = get_dx_countlimit(inode, dirent, &count_offset);
+       if (!c) {
+               EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
+               return 1;
+       }
+       limit = le16_to_cpu(c->limit);
+       count = le16_to_cpu(c->count);
+       if (count_offset + (limit * sizeof(struct dx_entry)) >
+           EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
+               EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
+                                "tree checksum found.  Run e2fsck -D.");
+               return 1;
+       }
+       t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
+
+       if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
+                                           count, t))
+               return 0;
+       return 1;
+}
+
+static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
+{
+       struct dx_countlimit *c;
+       struct dx_tail *t;
+       int count_offset, limit, count;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       c = get_dx_countlimit(inode, dirent, &count_offset);
+       if (!c) {
+               EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
+               return;
+       }
+       limit = le16_to_cpu(c->limit);
+       count = le16_to_cpu(c->count);
+       if (count_offset + (limit * sizeof(struct dx_entry)) >
+           EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
+               EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
+                                "tree checksum.  Run e2fsck -D.");
+               return;
+       }
+       t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
+
+       t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
+}
+
+static inline int ext4_handle_dirty_dx_node(handle_t *handle,
+                                           struct inode *inode,
+                                           struct buffer_head *bh)
+{
+       ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+       return ext4_handle_dirty_metadata(handle, inode, bh);
+}
+
 /*
  * p is at least 6 bytes before the end of page
  */
@@ -239,12 +471,20 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
                EXT4_DIR_REC_LEN(2) - infosize;
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               entry_space -= sizeof(struct dx_tail);
        return entry_space / sizeof(struct dx_entry);
 }
 
 static inline unsigned dx_node_limit(struct inode *dir)
 {
        unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               entry_space -= sizeof(struct dx_tail);
        return entry_space / sizeof(struct dx_entry);
 }
 
@@ -390,6 +630,15 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                goto fail;
        }
 
+       if (!buffer_verified(bh) &&
+           !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
+               ext4_warning(dir->i_sb, "Root failed checksum");
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+       set_buffer_verified(bh);
+
        entries = (struct dx_entry *) (((char *)&root->info) +
                                       root->info.info_length);
 
@@ -450,6 +699,17 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
                if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
+
+               if (!buffer_verified(bh) &&
+                   !ext4_dx_csum_verify(dir,
+                                        (struct ext4_dir_entry *)bh->b_data)) {
+                       ext4_warning(dir->i_sb, "Node failed checksum");
+                       brelse(bh);
+                       *err = ERR_BAD_DX_DIR;
+                       goto fail;
+               }
+               set_buffer_verified(bh);
+
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
                        ext4_warning(dir->i_sb,
                                     "dx entry: limit != node limit");
@@ -549,6 +809,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
                if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
                                      0, &err)))
                        return err; /* Failure */
+
+               if (!buffer_verified(bh) &&
+                   !ext4_dx_csum_verify(dir,
+                                        (struct ext4_dir_entry *)bh->b_data)) {
+                       ext4_warning(dir->i_sb, "Node failed checksum");
+                       return -EIO;
+               }
+               set_buffer_verified(bh);
+
                p++;
                brelse(p->bh);
                p->bh = bh;
@@ -577,6 +846,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
        if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
                return err;
 
+       if (!buffer_verified(bh) &&
+           !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
+               return -EIO;
+       set_buffer_verified(bh);
+
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        top = (struct ext4_dir_entry_2 *) ((char *) de +
                                           dir->i_sb->s_blocksize -
@@ -936,6 +1210,15 @@ restart:
                        brelse(bh);
                        goto next;
                }
+               if (!buffer_verified(bh) &&
+                   !ext4_dirent_csum_verify(dir,
+                               (struct ext4_dir_entry *)bh->b_data)) {
+                       EXT4_ERROR_INODE(dir, "checksumming directory "
+                                        "block %lu", (unsigned long)block);
+                       brelse(bh);
+                       goto next;
+               }
+               set_buffer_verified(bh);
                i = search_dirblock(bh, dir, d_name,
                            block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
                if (i == 1) {
@@ -987,6 +1270,16 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
                        goto errout;
 
+               if (!buffer_verified(bh) &&
+                   !ext4_dirent_csum_verify(dir,
+                               (struct ext4_dir_entry *)bh->b_data)) {
+                       EXT4_ERROR_INODE(dir, "checksumming directory "
+                                        "block %lu", (unsigned long)block);
+                       brelse(bh);
+                       *err = -EIO;
+                       goto errout;
+               }
+               set_buffer_verified(bh);
                retval = search_dirblock(bh, dir, d_name,
                                         block << EXT4_BLOCK_SIZE_BITS(sb),
                                         res_dir);
@@ -1037,6 +1330,12 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
                        EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
                        return ERR_PTR(-EIO);
                }
+               if (unlikely(ino == dir->i_ino)) {
+                       EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir",
+                                        dentry->d_name.len,
+                                        dentry->d_name.name);
+                       return ERR_PTR(-EIO);
+               }
                inode = ext4_iget(dir->i_sb, ino);
                if (inode == ERR_PTR(-ESTALE)) {
                        EXT4_ERROR_INODE(dir,
@@ -1156,8 +1455,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        char *data1 = (*bh)->b_data, *data2;
        unsigned split, move, size;
        struct ext4_dir_entry_2 *de = NULL, *de2;
+       struct ext4_dir_entry_tail *t;
+       int     csum_size = 0;
        int     err = 0, i;
 
+       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
+
        bh2 = ext4_append (handle, dir, &newblock, &err);
        if (!(bh2)) {
                brelse(*bh);
@@ -1204,10 +1509,20 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
        /* Fancy dance to stay within two buffers */
        de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
        de = dx_pack_dirents(data1, blocksize);
-       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+       de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
+                                          (char *) de,
                                           blocksize);
-       de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
+       de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
+                                           (char *) de2,
                                            blocksize);
+       if (csum_size) {
+               t = EXT4_DIRENT_TAIL(data2, blocksize);
+               initialize_dirent_tail(t, blocksize);
+
+               t = EXT4_DIRENT_TAIL(data1, blocksize);
+               initialize_dirent_tail(t, blocksize);
+       }
+
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
@@ -1218,10 +1533,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                de = de2;
        }
        dx_insert_block(frame, hash2 + continued, newblock);
-       err = ext4_handle_dirty_metadata(handle, dir, bh2);
+       err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
        if (err)
                goto journal_error;
-       err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
+       err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
        if (err)
                goto journal_error;
        brelse(bh2);
@@ -1258,11 +1573,16 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        unsigned short  reclen;
        int             nlen, rlen, err;
        char            *top;
+       int             csum_size = 0;
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
 
        reclen = EXT4_DIR_REC_LEN(namelen);
        if (!de) {
                de = (struct ext4_dir_entry_2 *)bh->b_data;
-               top = bh->b_data + blocksize - reclen;
+               top = bh->b_data + (blocksize - csum_size) - reclen;
                while ((char *) de <= top) {
                        if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
                                return -EIO;
@@ -1295,11 +1615,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                de = de1;
        }
        de->file_type = EXT4_FT_UNKNOWN;
-       if (inode) {
-               de->inode = cpu_to_le32(inode->i_ino);
-               ext4_set_de_type(dir->i_sb, de, inode->i_mode);
-       } else
-               de->inode = 0;
+       de->inode = cpu_to_le32(inode->i_ino);
+       ext4_set_de_type(dir->i_sb, de, inode->i_mode);
        de->name_len = namelen;
        memcpy(de->name, name, namelen);
        /*
@@ -1318,7 +1635,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        dir->i_version++;
        ext4_mark_inode_dirty(handle, dir);
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, dir, bh);
+       err = ext4_handle_dirty_dirent_node(handle, dir, bh);
        if (err)
                ext4_std_error(dir->i_sb, err);
        return 0;
@@ -1339,6 +1656,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        struct dx_frame frames[2], *frame;
        struct dx_entry *entries;
        struct ext4_dir_entry_2 *de, *de2;
+       struct ext4_dir_entry_tail *t;
        char            *data1, *top;
        unsigned        len;
        int             retval;
@@ -1346,6 +1664,11 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        struct dx_hash_info hinfo;
        ext4_lblk_t  block;
        struct fake_dirent *fde;
+       int             csum_size = 0;
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
 
        blocksize =  dir->i_sb->s_blocksize;
        dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
@@ -1366,7 +1689,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                brelse(bh);
                return -EIO;
        }
-       len = ((char *) root) + blocksize - (char *) de;
+       len = ((char *) root) + (blocksize - csum_size) - (char *) de;
 
        /* Allocate new block for the 0th block's dirents */
        bh2 = ext4_append(handle, dir, &block, &retval);
@@ -1382,8 +1705,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        top = data1 + len;
        while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
                de = de2;
-       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+       de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
+                                          (char *) de,
                                           blocksize);
+
+       if (csum_size) {
+               t = EXT4_DIRENT_TAIL(data1, blocksize);
+               initialize_dirent_tail(t, blocksize);
+       }
+
        /* Initialize the root; the dot dirents already exist */
        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
@@ -1408,8 +1738,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        frame->bh = bh;
        bh = bh2;
 
-       ext4_handle_dirty_metadata(handle, dir, frame->bh);
-       ext4_handle_dirty_metadata(handle, dir, bh);
+       ext4_handle_dirty_dx_node(handle, dir, frame->bh);
+       ext4_handle_dirty_dirent_node(handle, dir, bh);
 
        de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
        if (!de) {
@@ -1445,11 +1775,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        struct inode *dir = dentry->d_parent->d_inode;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
+       struct ext4_dir_entry_tail *t;
        struct super_block *sb;
        int     retval;
        int     dx_fallback=0;
        unsigned blocksize;
        ext4_lblk_t block, blocks;
+       int     csum_size = 0;
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
 
        sb = dir->i_sb;
        blocksize = sb->s_blocksize;
@@ -1468,6 +1804,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                bh = ext4_bread(handle, dir, block, 0, &retval);
                if(!bh)
                        return retval;
+               if (!buffer_verified(bh) &&
+                   !ext4_dirent_csum_verify(dir,
+                               (struct ext4_dir_entry *)bh->b_data))
+                       return -EIO;
+               set_buffer_verified(bh);
                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
                if (retval != -ENOSPC) {
                        brelse(bh);
@@ -1484,7 +1825,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
-       de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
+       de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
+
+       if (csum_size) {
+               t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
+               initialize_dirent_tail(t, blocksize);
+       }
+
        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
        brelse(bh);
        if (retval == 0)
@@ -1516,6 +1863,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
        if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
                goto cleanup;
 
+       if (!buffer_verified(bh) &&
+           !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
+               goto journal_error;
+       set_buffer_verified(bh);
+
        BUFFER_TRACE(bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, bh);
        if (err)
@@ -1583,7 +1935,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        dxtrace(dx_show_index("node", frames[1].entries));
                        dxtrace(dx_show_index("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_handle_dirty_metadata(handle, dir, bh2);
+                       err = ext4_handle_dirty_dx_node(handle, dir, bh2);
                        if (err)
                                goto journal_error;
                        brelse (bh2);
@@ -1609,7 +1961,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        if (err)
                                goto journal_error;
                }
-               err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
+               err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
                if (err) {
                        ext4_std_error(inode->i_sb, err);
                        goto cleanup;
@@ -1641,12 +1993,17 @@ static int ext4_delete_entry(handle_t *handle,
 {
        struct ext4_dir_entry_2 *de, *pde;
        unsigned int blocksize = dir->i_sb->s_blocksize;
+       int csum_size = 0;
        int i, err;
 
+       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
+
        i = 0;
        pde = NULL;
        de = (struct ext4_dir_entry_2 *) bh->b_data;
-       while (i < bh->b_size) {
+       while (i < bh->b_size - csum_size) {
                if (ext4_check_dir_entry(dir, NULL, de, bh, i))
                        return -EIO;
                if (de == de_del)  {
@@ -1667,7 +2024,7 @@ static int ext4_delete_entry(handle_t *handle,
                                de->inode = 0;
                        dir->i_version++;
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-                       err = ext4_handle_dirty_metadata(handle, dir, bh);
+                       err = ext4_handle_dirty_dirent_node(handle, dir, bh);
                        if (unlikely(err)) {
                                ext4_std_error(dir->i_sb, err);
                                return err;
@@ -1809,9 +2166,15 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        struct inode *inode;
        struct buffer_head *dir_block = NULL;
        struct ext4_dir_entry_2 *de;
+       struct ext4_dir_entry_tail *t;
        unsigned int blocksize = dir->i_sb->s_blocksize;
+       int csum_size = 0;
        int err, retries = 0;
 
+       if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
+
        if (EXT4_DIR_LINK_MAX(dir))
                return -EMLINK;
 
@@ -1852,16 +2215,24 @@ retry:
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
        de = ext4_next_entry(de, blocksize);
        de->inode = cpu_to_le32(dir->i_ino);
-       de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
+       de->rec_len = ext4_rec_len_to_disk(blocksize -
+                                          (csum_size + EXT4_DIR_REC_LEN(1)),
                                           blocksize);
        de->name_len = 2;
        strcpy(de->name, "..");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
        set_nlink(inode, 2);
+
+       if (csum_size) {
+               t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
+               initialize_dirent_tail(t, blocksize);
+       }
+
        BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, inode, dir_block);
+       err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
        if (err)
                goto out_clear_inode;
+       set_buffer_verified(dir_block);
        err = ext4_mark_inode_dirty(handle, inode);
        if (!err)
                err = ext4_add_entry(handle, dentry, inode);
@@ -1911,6 +2282,14 @@ static int empty_dir(struct inode *inode)
                                     inode->i_ino);
                return 1;
        }
+       if (!buffer_verified(bh) &&
+           !ext4_dirent_csum_verify(inode,
+                       (struct ext4_dir_entry *)bh->b_data)) {
+               EXT4_ERROR_INODE(inode, "checksum error reading directory "
+                                "lblock 0");
+               return -EIO;
+       }
+       set_buffer_verified(bh);
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de1 = ext4_next_entry(de, sb->s_blocksize);
        if (le32_to_cpu(de->inode) != inode->i_ino ||
@@ -1942,6 +2321,14 @@ static int empty_dir(struct inode *inode)
                                offset += sb->s_blocksize;
                                continue;
                        }
+                       if (!buffer_verified(bh) &&
+                           !ext4_dirent_csum_verify(inode,
+                                       (struct ext4_dir_entry *)bh->b_data)) {
+                               EXT4_ERROR_INODE(inode, "checksum error "
+                                                "reading directory lblock 0");
+                               return -EIO;
+                       }
+                       set_buffer_verified(bh);
                        de = (struct ext4_dir_entry_2 *) bh->b_data;
                }
                if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
@@ -2010,7 +2397,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        /* Insert this inode at the head of the on-disk orphan list... */
        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-       err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+       err = ext4_handle_dirty_super_now(handle, sb);
        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
        if (!err)
                err = rc;
@@ -2083,7 +2470,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
                if (err)
                        goto out_brelse;
                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-               err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
+               err = ext4_handle_dirty_super_now(handle, inode->i_sb);
        } else {
                struct ext4_iloc iloc2;
                struct inode *i_prev =
@@ -2442,6 +2829,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
                if (!dir_bh)
                        goto end_rename;
+               if (!buffer_verified(dir_bh) &&
+                   !ext4_dirent_csum_verify(old_inode,
+                               (struct ext4_dir_entry *)dir_bh->b_data))
+                       goto end_rename;
+               set_buffer_verified(dir_bh);
                if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
                                old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
                        goto end_rename;
@@ -2472,7 +2864,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                                        ext4_current_time(new_dir);
                ext4_mark_inode_dirty(handle, new_dir);
                BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
-               retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh);
+               retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh);
                if (unlikely(retval)) {
                        ext4_std_error(new_dir->i_sb, retval);
                        goto end_rename;
@@ -2526,7 +2918,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
-               retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
+               retval = ext4_handle_dirty_dirent_node(handle, old_inode,
+                                                      dir_bh);
                if (retval) {
                        ext4_std_error(old_dir->i_sb, retval);
                        goto end_rename;
index 59fa0be272516adf6cbbc94384106690bf710c65..7ea6cbb44121952bf0d4f81f914950ab284dba6b 100644 (file)
@@ -161,6 +161,8 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
        if (flex_gd == NULL)
                goto out3;
 
+       if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
+               goto out2;
        flex_gd->count = flexbg_size;
 
        flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
@@ -796,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        ext4_kvfree(o_group_desc);
 
        le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-       err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
+       err = ext4_handle_dirty_super_now(handle, sb);
        if (err)
                ext4_std_error(sb, err);
 
@@ -968,6 +970,8 @@ static void update_backups(struct super_block *sb,
                goto exit_err;
        }
 
+       ext4_superblock_csum_set(sb, (struct ext4_super_block *)data);
+
        while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
                struct buffer_head *bh;
 
@@ -1067,6 +1071,54 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
        return err;
 }
 
+static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
+{
+       struct buffer_head *bh = sb_getblk(sb, block);
+       if (!bh)
+               return NULL;
+
+       if (bitmap_uptodate(bh))
+               return bh;
+
+       lock_buffer(bh);
+       if (bh_submit_read(bh) < 0) {
+               unlock_buffer(bh);
+               brelse(bh);
+               return NULL;
+       }
+       unlock_buffer(bh);
+
+       return bh;
+}
+
+static int ext4_set_bitmap_checksums(struct super_block *sb,
+                                    ext4_group_t group,
+                                    struct ext4_group_desc *gdp,
+                                    struct ext4_new_group_data *group_data)
+{
+       struct buffer_head *bh;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 0;
+
+       bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
+       if (!bh)
+               return -EIO;
+       ext4_inode_bitmap_csum_set(sb, group, gdp, bh,
+                                  EXT4_INODES_PER_GROUP(sb) / 8);
+       brelse(bh);
+
+       bh = ext4_get_bitmap(sb, group_data->block_bitmap);
+       if (!bh)
+               return -EIO;
+       ext4_block_bitmap_csum_set(sb, group, gdp, bh,
+                                  EXT4_BLOCKS_PER_GROUP(sb) / 8);
+       brelse(bh);
+
+       return 0;
+}
+
 /*
  * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
  */
@@ -1093,18 +1145,24 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
                 */
                gdb_bh = sbi->s_group_desc[gdb_num];
                /* Update group descriptor block for new group */
-               gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
+               gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
                                                 gdb_off * EXT4_DESC_SIZE(sb));
 
                memset(gdp, 0, EXT4_DESC_SIZE(sb));
                ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
                ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
+               err = ext4_set_bitmap_checksums(sb, group, gdp, group_data);
+               if (err) {
+                       ext4_std_error(sb, err);
+                       break;
+               }
+
                ext4_inode_table_set(sb, gdp, group_data->inode_table);
                ext4_free_group_clusters_set(sb, gdp,
                                             EXT4_B2C(sbi, group_data->free_blocks_count));
                ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
                gdp->bg_flags = cpu_to_le16(*bg_flags);
-               gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+               ext4_group_desc_csum_set(sb, group, gdp);
 
                err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
                if (unlikely(err)) {
@@ -1343,17 +1401,14 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
                           (1 + ext4_bg_num_gdb(sb, group + i) +
                            le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
                group_data[i].free_blocks_count = blocks_per_group - overhead;
-               if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                              EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+               if (ext4_has_group_desc_csum(sb))
                        flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
                                               EXT4_BG_INODE_UNINIT;
                else
                        flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
        }
 
-       if (last_group == n_group &&
-           EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                      EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+       if (last_group == n_group && ext4_has_group_desc_csum(sb))
                /* We need to initialize block bitmap of last group. */
                flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
 
index 35b5954489eeb88c6c5a29fd76fced5c3472e6f5..eb7aa3e4ef05caf136f24e0565a28e6d1e0a1539 100644 (file)
@@ -112,6 +112,48 @@ static struct file_system_type ext3_fs_type = {
 #define IS_EXT3_SB(sb) (0)
 #endif
 
+static int ext4_verify_csum_type(struct super_block *sb,
+                                struct ext4_super_block *es)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
+}
+
+static __le32 ext4_superblock_csum(struct super_block *sb,
+                                  struct ext4_super_block *es)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       int offset = offsetof(struct ext4_super_block, s_checksum);
+       __u32 csum;
+
+       csum = ext4_chksum(sbi, ~0, (char *)es, offset);
+
+       return cpu_to_le32(csum);
+}
+
+int ext4_superblock_csum_verify(struct super_block *sb,
+                               struct ext4_super_block *es)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return 1;
+
+       return es->s_checksum == ext4_superblock_csum(sb, es);
+}
+
+void ext4_superblock_csum_set(struct super_block *sb,
+                             struct ext4_super_block *es)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       es->s_checksum = ext4_superblock_csum(sb, es);
+}
+
 void *ext4_kvmalloc(size_t size, gfp_t flags)
 {
        void *ret;
@@ -497,6 +539,7 @@ void __ext4_error(struct super_block *sb, const char *function,
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
               sb->s_id, function, line, current->comm, &vaf);
        va_end(args);
+       save_error_info(sb, function, line);
 
        ext4_handle_error(sb);
 }
@@ -905,6 +948,8 @@ static void ext4_put_super(struct super_block *sb)
        unlock_super(sb);
        kobject_put(&sbi->s_kobj);
        wait_for_completion(&sbi->s_kobj_unregister);
+       if (sbi->s_chksum_driver)
+               crypto_free_shash(sbi->s_chksum_driver);
        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
 }
@@ -1922,43 +1967,69 @@ failed:
        return 0;
 }
 
-__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
-                           struct ext4_group_desc *gdp)
+static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+                                  struct ext4_group_desc *gdp)
 {
+       int offset;
        __u16 crc = 0;
+       __le32 le_group = cpu_to_le32(block_group);
 
-       if (sbi->s_es->s_feature_ro_compat &
-           cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-               int offset = offsetof(struct ext4_group_desc, bg_checksum);
-               __le32 le_group = cpu_to_le32(block_group);
-
-               crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
-               crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
-               crc = crc16(crc, (__u8 *)gdp, offset);
-               offset += sizeof(gdp->bg_checksum); /* skip checksum */
-               /* for checksum of struct ext4_group_desc do the rest...*/
-               if ((sbi->s_es->s_feature_incompat &
-                    cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
-                   offset < le16_to_cpu(sbi->s_es->s_desc_size))
-                       crc = crc16(crc, (__u8 *)gdp + offset,
-                                   le16_to_cpu(sbi->s_es->s_desc_size) -
-                                       offset);
+       if ((sbi->s_es->s_feature_ro_compat &
+            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+               /* Use new metadata_csum algorithm */
+               __u16 old_csum;
+               __u32 csum32;
+
+               old_csum = gdp->bg_checksum;
+               gdp->bg_checksum = 0;
+               csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
+                                    sizeof(le_group));
+               csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
+                                    sbi->s_desc_size);
+               gdp->bg_checksum = old_csum;
+
+               crc = csum32 & 0xFFFF;
+               goto out;
        }
 
+       /* old crc16 code */
+       offset = offsetof(struct ext4_group_desc, bg_checksum);
+
+       crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+       crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+       crc = crc16(crc, (__u8 *)gdp, offset);
+       offset += sizeof(gdp->bg_checksum); /* skip checksum */
+       /* for checksum of struct ext4_group_desc do the rest...*/
+       if ((sbi->s_es->s_feature_incompat &
+            cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+           offset < le16_to_cpu(sbi->s_es->s_desc_size))
+               crc = crc16(crc, (__u8 *)gdp + offset,
+                           le16_to_cpu(sbi->s_es->s_desc_size) -
+                               offset);
+
+out:
        return cpu_to_le16(crc);
 }
 
-int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
                                struct ext4_group_desc *gdp)
 {
-       if ((sbi->s_es->s_feature_ro_compat &
-            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
-           (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+       if (ext4_has_group_desc_csum(sb) &&
+           (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
+                                                     block_group, gdp)))
                return 0;
 
        return 1;
 }
 
+void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
+                             struct ext4_group_desc *gdp)
+{
+       if (!ext4_has_group_desc_csum(sb))
+               return;
+       gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
+}
+
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors(struct super_block *sb,
                                  ext4_group_t *first_not_zeroed)
@@ -2013,7 +2084,7 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                ext4_lock_group(sb, i);
-               if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+               if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                                 "Checksum for group %u failed (%u!=%u)",
                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
@@ -2417,6 +2488,23 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
        return count;
 }
 
+static ssize_t trigger_test_error(struct ext4_attr *a,
+                                 struct ext4_sb_info *sbi,
+                                 const char *buf, size_t count)
+{
+       int len = count;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (len && buf[len-1] == '\n')
+               len--;
+
+       if (len)
+               ext4_error(sbi->s_sb, "%.*s", len, buf);
+       return count;
+}
+
 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
 static struct ext4_attr ext4_attr_##_name = {                  \
        .attr = {.name = __stringify(_name), .mode = _mode },   \
@@ -2447,6 +2535,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
+EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
 
 static struct attribute *ext4_attrs[] = {
        ATTR_LIST(delayed_allocation_blocks),
@@ -2461,6 +2550,7 @@ static struct attribute *ext4_attrs[] = {
        ATTR_LIST(mb_stream_req),
        ATTR_LIST(mb_group_prealloc),
        ATTR_LIST(max_writeback_mb_bump),
+       ATTR_LIST(trigger_fs_error),
        NULL,
 };
 
@@ -2957,6 +3047,44 @@ static void ext4_destroy_lazyinit_thread(void)
        kthread_stop(ext4_lazyinit_task);
 }
 
+static int set_journal_csum_feature_set(struct super_block *sb)
+{
+       int ret = 1;
+       int compat, incompat;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               /* journal checksum v2 */
+               compat = 0;
+               incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+       } else {
+               /* journal checksum v1 */
+               compat = JBD2_FEATURE_COMPAT_CHECKSUM;
+               incompat = 0;
+       }
+
+       if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+               ret = jbd2_journal_set_features(sbi->s_journal,
+                               compat, 0,
+                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+                               incompat);
+       } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
+               ret = jbd2_journal_set_features(sbi->s_journal,
+                               compat, 0,
+                               incompat);
+               jbd2_journal_clear_features(sbi->s_journal, 0, 0,
+                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+       } else {
+               jbd2_journal_clear_features(sbi->s_journal,
+                               JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+                               JBD2_FEATURE_INCOMPAT_CSUM_V2);
+       }
+
+       return ret;
+}
+
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 {
        char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -2993,6 +3121,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto out_free_orig;
        }
        sb->s_fs_info = sbi;
+       sbi->s_sb = sb;
        sbi->s_mount_opt = 0;
        sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
        sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
@@ -3032,13 +3161,54 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         * Note: s_es must be initialized as soon as possible because
         *       some ext4 macro-instructions depend on its value
         */
-       es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
+       es = (struct ext4_super_block *) (bh->b_data + offset);
        sbi->s_es = es;
        sb->s_magic = le16_to_cpu(es->s_magic);
        if (sb->s_magic != EXT4_SUPER_MAGIC)
                goto cantfind_ext4;
        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
 
+       /* Warn if metadata_csum and gdt_csum are both set. */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+           EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+               ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
+                            "redundant flags; please run fsck.");
+
+       /* Check for a known checksum algorithm */
+       if (!ext4_verify_csum_type(sb, es)) {
+               ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+                        "unknown checksum algorithm.");
+               silent = 1;
+               goto cantfind_ext4;
+       }
+
+       /* Load the checksum driver */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                                      EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+               sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+               if (IS_ERR(sbi->s_chksum_driver)) {
+                       ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
+                       ret = PTR_ERR(sbi->s_chksum_driver);
+                       sbi->s_chksum_driver = NULL;
+                       goto failed_mount;
+               }
+       }
+
+       /* Check superblock checksum */
+       if (!ext4_superblock_csum_verify(sb, es)) {
+               ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+                        "invalid superblock checksum.  Run e2fsck?");
+               silent = 1;
+               goto cantfind_ext4;
+       }
+
+       /* Precompute checksum seed for all metadata */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
+                                              sizeof(es->s_uuid));
+
        /* Set defaults before we parse the mount options */
        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
        set_opt(sb, INIT_INODE_TABLE);
@@ -3200,7 +3370,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                               "Can't read superblock on 2nd try");
                        goto failed_mount;
                }
-               es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
+               es = (struct ext4_super_block *)(bh->b_data + offset);
                sbi->s_es = es;
                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
                        ext4_msg(sb, KERN_ERR,
@@ -3392,6 +3562,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                          GFP_KERNEL);
        if (sbi->s_group_desc == NULL) {
                ext4_msg(sb, KERN_ERR, "not enough memory");
+               ret = -ENOMEM;
                goto failed_mount;
        }
 
@@ -3449,6 +3620,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        if (err) {
                ext4_msg(sb, KERN_ERR, "insufficient memory");
+               ret = err;
                goto failed_mount3;
        }
 
@@ -3506,26 +3678,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto no_journal;
        }
 
-       if (ext4_blocks_count(es) > 0xffffffffULL &&
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
                goto failed_mount_wq;
        }
 
-       if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
-               jbd2_journal_set_features(sbi->s_journal,
-                               JBD2_FEATURE_COMPAT_CHECKSUM, 0,
-                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
-       } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
-               jbd2_journal_set_features(sbi->s_journal,
-                               JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
-               jbd2_journal_clear_features(sbi->s_journal, 0, 0,
-                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
-       } else {
-               jbd2_journal_clear_features(sbi->s_journal,
-                               JBD2_FEATURE_COMPAT_CHECKSUM, 0,
-                               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+       if (!set_journal_csum_feature_set(sb)) {
+               ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
+                        "feature set");
+               goto failed_mount_wq;
        }
 
        /* We have now updated the journal if required, so we can
@@ -3606,7 +3769,8 @@ no_journal:
                goto failed_mount4;
        }
 
-       ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
+       if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+               sb->s_flags |= MS_RDONLY;
 
        /* determine the minimum size of new large inodes, if present */
        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -3641,7 +3805,7 @@ no_journal:
        }
 
        ext4_ext_init(sb);
-       err = ext4_mb_init(sb, needs_recovery);
+       err = ext4_mb_init(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
                         err);
@@ -3724,6 +3888,8 @@ failed_mount2:
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
 failed_mount:
+       if (sbi->s_chksum_driver)
+               crypto_free_shash(sbi->s_chksum_driver);
        if (sbi->s_proc) {
                remove_proc_entry("options", sbi->s_proc);
                remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -3847,7 +4013,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
                goto out_bdev;
        }
 
-       es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
+       es = (struct ext4_super_block *) (bh->b_data + offset);
        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
            !(le32_to_cpu(es->s_feature_incompat) &
              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
@@ -4039,6 +4205,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
                                &EXT4_SB(sb)->s_freeinodes_counter));
        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
+       ext4_superblock_csum_set(sb, es);
        mark_buffer_dirty(sbh);
        if (sync) {
                error = sync_dirty_buffer(sbh);
@@ -4333,7 +4500,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                                struct ext4_group_desc *gdp =
                                        ext4_get_group_desc(sb, g, NULL);
 
-                               if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
+                               if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
                                        ext4_msg(sb, KERN_ERR,
               "ext4_remount: Checksum for group %u failed (%u!=%u)",
                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
index e88748e55c0f246e90ca21c2094303719f83df07..e56c9ed7d6e30d523b7f8e4b638f9190427cf50d 100644 (file)
@@ -122,6 +122,58 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
        NULL
 };
 
+static __le32 ext4_xattr_block_csum(struct inode *inode,
+                                   sector_t block_nr,
+                                   struct ext4_xattr_header *hdr)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       __u32 csum, old;
+
+       old = hdr->h_checksum;
+       hdr->h_checksum = 0;
+       if (le32_to_cpu(hdr->h_refcount) != 1) {
+               block_nr = cpu_to_le64(block_nr);
+               csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
+                                  sizeof(block_nr));
+       } else
+               csum = ei->i_csum_seed;
+       csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
+                          EXT4_BLOCK_SIZE(inode->i_sb));
+       hdr->h_checksum = old;
+       return cpu_to_le32(csum);
+}
+
+static int ext4_xattr_block_csum_verify(struct inode *inode,
+                                       sector_t block_nr,
+                                       struct ext4_xattr_header *hdr)
+{
+       if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+           (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
+               return 0;
+       return 1;
+}
+
+static void ext4_xattr_block_csum_set(struct inode *inode,
+                                     sector_t block_nr,
+                                     struct ext4_xattr_header *hdr)
+{
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+               EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+               return;
+
+       hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+}
+
+static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
+                                               struct inode *inode,
+                                               struct buffer_head *bh)
+{
+       ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
+       return ext4_handle_dirty_metadata(handle, inode, bh);
+}
+
 static inline const struct xattr_handler *
 ext4_xattr_handler(int name_index)
 {
@@ -156,12 +208,22 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
 }
 
 static inline int
-ext4_xattr_check_block(struct buffer_head *bh)
+ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 {
+       int error;
+
+       if (buffer_verified(bh))
+               return 0;
+
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1))
                return -EIO;
-       return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+       if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+               return -EIO;
+       error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+       if (!error)
+               set_buffer_verified(bh);
+       return error;
 }
 
 static inline int
@@ -224,7 +286,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-       if (ext4_xattr_check_block(bh)) {
+       if (ext4_xattr_check_block(inode, bh)) {
 bad_block:
                EXT4_ERROR_INODE(inode, "bad block %llu",
                                 EXT4_I(inode)->i_file_acl);
@@ -369,7 +431,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-       if (ext4_xattr_check_block(bh)) {
+       if (ext4_xattr_check_block(inode, bh)) {
                EXT4_ERROR_INODE(inode, "bad block %llu",
                                 EXT4_I(inode)->i_file_acl);
                error = -EIO;
@@ -492,7 +554,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                if (ce)
                        mb_cache_entry_release(ce);
                unlock_buffer(bh);
-               error = ext4_handle_dirty_metadata(handle, inode, bh);
+               error = ext4_handle_dirty_xattr_block(handle, inode, bh);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
                dquot_free_block(inode, 1);
@@ -662,7 +724,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
-               if (ext4_xattr_check_block(bs->bh)) {
+               if (ext4_xattr_check_block(inode, bs->bh)) {
                        EXT4_ERROR_INODE(inode, "bad block %llu",
                                         EXT4_I(inode)->i_file_acl);
                        error = -EIO;
@@ -725,9 +787,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                        if (error == -EIO)
                                goto bad_block;
                        if (!error)
-                               error = ext4_handle_dirty_metadata(handle,
-                                                                  inode,
-                                                                  bs->bh);
+                               error = ext4_handle_dirty_xattr_block(handle,
+                                                                     inode,
+                                                                     bs->bh);
                        if (error)
                                goto cleanup;
                        goto inserted;
@@ -796,9 +858,9 @@ inserted:
                                ea_bdebug(new_bh, "reusing; refcount now=%d",
                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
                                unlock_buffer(new_bh);
-                               error = ext4_handle_dirty_metadata(handle,
-                                                                  inode,
-                                                                  new_bh);
+                               error = ext4_handle_dirty_xattr_block(handle,
+                                                                     inode,
+                                                                     new_bh);
                                if (error)
                                        goto cleanup_dquot;
                        }
@@ -855,8 +917,8 @@ getblk_failed:
                        set_buffer_uptodate(new_bh);
                        unlock_buffer(new_bh);
                        ext4_xattr_cache_insert(new_bh);
-                       error = ext4_handle_dirty_metadata(handle,
-                                                          inode, new_bh);
+                       error = ext4_handle_dirty_xattr_block(handle,
+                                                             inode, new_bh);
                        if (error)
                                goto cleanup;
                }
@@ -1193,7 +1255,7 @@ retry:
                error = -EIO;
                if (!bh)
                        goto cleanup;
-               if (ext4_xattr_check_block(bh)) {
+               if (ext4_xattr_check_block(inode, bh)) {
                        EXT4_ERROR_INODE(inode, "bad block %llu",
                                         EXT4_I(inode)->i_file_acl);
                        error = -EIO;
index 25b7387ff183f880cdb9ccaf2529ca8c0f218a7b..91f31ca7d9af9df24a965c64bb0271c43a4d4b09 100644 (file)
@@ -27,7 +27,9 @@ struct ext4_xattr_header {
        __le32  h_refcount;     /* reference count */
        __le32  h_blocks;       /* number of disk blocks used */
        __le32  h_hash;         /* hash value of all attributes */
-       __u32   h_reserved[4];  /* zero right now */
+       __le32  h_checksum;     /* crc32c(uuid+id+xattrblock) */
+                               /* id = inum if refcount=1, blknum otherwise */
+       __u32   h_reserved[3];  /* zero right now */
 };
 
 struct ext4_xattr_ibody_header {
index f32f346f4b0a521a5b6bbaedc7b0a5a7750c4b1e..69a48c2944da682c8a133fe75183c086ef08813b 100644 (file)
@@ -1,6 +1,8 @@
 config JBD2
        tristate
        select CRC32
+       select CRYPTO
+       select CRYPTO_CRC32C
        help
          This is a generic journaling layer for block devices that support
          both 32-bit and 64-bit block numbers.  It is currently used by
index 840f70f507924a0ac4db70a9d729f715783b49be..216f4299f65e7e2f1e26859c8e1247cdf71c55df 100644 (file)
@@ -85,6 +85,24 @@ nope:
        __brelse(bh);
 }
 
+static void jbd2_commit_block_csum_set(journal_t *j,
+                                      struct journal_head *descriptor)
+{
+       struct commit_header *h;
+       __u32 csum;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return;
+
+       h = (struct commit_header *)(jh2bh(descriptor)->b_data);
+       h->h_chksum_type = 0;
+       h->h_chksum_size = 0;
+       h->h_chksum[0] = 0;
+       csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
+                          j->j_blocksize);
+       h->h_chksum[0] = cpu_to_be32(csum);
+}
+
 /*
  * Done it all: now submit the commit record.  We should have
  * cleaned up our previous buffers by now, so if we are in abort
@@ -128,6 +146,7 @@ static int journal_submit_commit_record(journal_t *journal,
                tmp->h_chksum_size      = JBD2_CRC32_CHKSUM_SIZE;
                tmp->h_chksum[0]        = cpu_to_be32(crc32_sum);
        }
+       jbd2_commit_block_csum_set(journal, descriptor);
 
        JBUFFER_TRACE(descriptor, "submit commit block");
        lock_buffer(bh);
@@ -301,6 +320,44 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
                tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
 
+static void jbd2_descr_block_csum_set(journal_t *j,
+                                     struct journal_head *descriptor)
+{
+       struct jbd2_journal_block_tail *tail;
+       __u32 csum;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return;
+
+       tail = (struct jbd2_journal_block_tail *)
+                       (jh2bh(descriptor)->b_data + j->j_blocksize -
+                       sizeof(struct jbd2_journal_block_tail));
+       tail->t_checksum = 0;
+       csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
+                          j->j_blocksize);
+       tail->t_checksum = cpu_to_be32(csum);
+}
+
+static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
+                                   struct buffer_head *bh, __u32 sequence)
+{
+       struct page *page = bh->b_page;
+       __u8 *addr;
+       __u32 csum;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return;
+
+       sequence = cpu_to_be32(sequence);
+       addr = kmap_atomic(page, KM_USER0);
+       csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
+                         sizeof(sequence));
+       csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data),
+                         bh->b_size);
+       kunmap_atomic(addr, KM_USER0);
+
+       tag->t_checksum = cpu_to_be32(csum);
+}
 /*
  * jbd2_journal_commit_transaction
  *
@@ -334,6 +391,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        unsigned long first_block;
        tid_t first_tid;
        int update_tail;
+       int csum_size = 0;
+
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               csum_size = sizeof(struct jbd2_journal_block_tail);
 
        /*
         * First job: lock down the current transaction and wait for
@@ -627,7 +688,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
                tag = (journal_block_tag_t *) tagp;
                write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
-               tag->t_flags = cpu_to_be32(tag_flag);
+               tag->t_flags = cpu_to_be16(tag_flag);
+               jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh),
+                                       commit_transaction->t_tid);
                tagp += tag_bytes;
                space_left -= tag_bytes;
 
@@ -643,7 +706,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
                if (bufs == journal->j_wbufsize ||
                    commit_transaction->t_buffers == NULL ||
-                   space_left < tag_bytes + 16) {
+                   space_left < tag_bytes + 16 + csum_size) {
 
                        jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
 
@@ -651,8 +714,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                            submitting the IOs.  "tag" still points to
                            the last tag we set up. */
 
-                       tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
+                       tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
 
+                       jbd2_descr_block_csum_set(journal, descriptor);
 start_journal_io:
                        for (i = 0; i < bufs; i++) {
                                struct buffer_head *bh = wbuf[i];
index 1afb701622b0b17748b4cd7f7d171df139bcc9cc..e9a3c4c85594e30aca1ed1f14d5667ba0595160a 100644 (file)
@@ -97,6 +97,43 @@ EXPORT_SYMBOL(jbd2_inode_cache);
 static void __journal_abort_soft (journal_t *journal, int errno);
 static int jbd2_journal_create_slab(size_t slab_size);
 
+/* Checksumming functions */
+int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
+{
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
+}
+
+static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
+{
+       __u32 csum, old_csum;
+
+       old_csum = sb->s_checksum;
+       sb->s_checksum = 0;
+       csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
+       sb->s_checksum = old_csum;
+
+       return cpu_to_be32(csum);
+}
+
+int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
+{
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       return sb->s_checksum == jbd2_superblock_csum(j, sb);
+}
+
+void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
+{
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return;
+
+       sb->s_checksum = jbd2_superblock_csum(j, sb);
+}
+
 /*
  * Helper function used to manage commit timeouts
  */
@@ -1348,6 +1385,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
        jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
                  journal->j_errno);
        sb->s_errno    = cpu_to_be32(journal->j_errno);
+       jbd2_superblock_csum_set(journal, sb);
        read_unlock(&journal->j_state_lock);
 
        jbd2_write_superblock(journal, WRITE_SYNC);
@@ -1376,6 +1414,9 @@ static int journal_get_superblock(journal_t *journal)
                }
        }
 
+       if (buffer_verified(bh))
+               return 0;
+
        sb = journal->j_superblock;
 
        err = -EINVAL;
@@ -1413,6 +1454,43 @@ static int journal_get_superblock(journal_t *journal)
                goto out;
        }
 
+       if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
+           JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+               /* Can't have checksum v1 and v2 on at the same time! */
+               printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
+                      "at the same time!\n");
+               goto out;
+       }
+
+       if (!jbd2_verify_csum_type(journal, sb)) {
+               printk(KERN_ERR "JBD: Unknown checksum type\n");
+               goto out;
+       }
+
+       /* Load the checksum driver */
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+               journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+               if (IS_ERR(journal->j_chksum_driver)) {
+                       printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
+                       err = PTR_ERR(journal->j_chksum_driver);
+                       journal->j_chksum_driver = NULL;
+                       goto out;
+               }
+       }
+
+       /* Check superblock checksum */
+       if (!jbd2_superblock_csum_verify(journal, sb)) {
+               printk(KERN_ERR "JBD: journal checksum error\n");
+               goto out;
+       }
+
+       /* Precompute checksum seed for all metadata */
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+                                                  sizeof(sb->s_uuid));
+
+       set_buffer_verified(bh);
+
        return 0;
 
 out:
@@ -1564,6 +1642,8 @@ int jbd2_journal_destroy(journal_t *journal)
                iput(journal->j_inode);
        if (journal->j_revoke)
                jbd2_journal_destroy_revoke(journal);
+       if (journal->j_chksum_driver)
+               crypto_free_shash(journal->j_chksum_driver);
        kfree(journal->j_wbuf);
        kfree(journal);
 
@@ -1653,6 +1733,10 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
 int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
                          unsigned long ro, unsigned long incompat)
 {
+#define INCOMPAT_FEATURE_ON(f) \
+               ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
+#define COMPAT_FEATURE_ON(f) \
+               ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
        journal_superblock_t *sb;
 
        if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
@@ -1661,16 +1745,54 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
        if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
                return 0;
 
+       /* Asking for checksumming v2 and v1?  Only give them v2. */
+       if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 &&
+           compat & JBD2_FEATURE_COMPAT_CHECKSUM)
+               compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
+
        jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
                  compat, ro, incompat);
 
        sb = journal->j_superblock;
 
+       /* If enabling v2 checksums, update superblock */
+       if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+               sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
+               sb->s_feature_compat &=
+                       ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
+
+               /* Load the checksum driver */
+               if (journal->j_chksum_driver == NULL) {
+                       journal->j_chksum_driver = crypto_alloc_shash("crc32c",
+                                                                     0, 0);
+                       if (IS_ERR(journal->j_chksum_driver)) {
+                               printk(KERN_ERR "JBD: Cannot load crc32c "
+                                      "driver.\n");
+                               journal->j_chksum_driver = NULL;
+                               return 0;
+                       }
+               }
+
+               /* Precompute checksum seed for all metadata */
+               if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+                                             JBD2_FEATURE_INCOMPAT_CSUM_V2))
+                       journal->j_csum_seed = jbd2_chksum(journal, ~0,
+                                                          sb->s_uuid,
+                                                          sizeof(sb->s_uuid));
+       }
+
+       /* If enabling v1 checksums, downgrade superblock */
+       if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
+               sb->s_feature_incompat &=
+                       ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2);
+
        sb->s_feature_compat    |= cpu_to_be32(compat);
        sb->s_feature_ro_compat |= cpu_to_be32(ro);
        sb->s_feature_incompat  |= cpu_to_be32(incompat);
 
        return 1;
+#undef COMPAT_FEATURE_ON
+#undef INCOMPAT_FEATURE_ON
 }
 
 /*
@@ -1975,10 +2097,16 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
  */
 size_t journal_tag_bytes(journal_t *journal)
 {
+       journal_block_tag_t tag;
+       size_t x = 0;
+
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               x += sizeof(tag.t_checksum);
+
        if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
-               return JBD2_TAG_SIZE64;
+               return x + JBD2_TAG_SIZE64;
        else
-               return JBD2_TAG_SIZE32;
+               return x + JBD2_TAG_SIZE32;
 }
 
 /*
index c1a03354a22ff1b5a787251b422afcb5225ca2c9..0131e4362534c4d5b83273130ee292463ec49f07 100644 (file)
@@ -174,6 +174,25 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
        return 0;
 }
 
+static int jbd2_descr_block_csum_verify(journal_t *j,
+                                       void *buf)
+{
+       struct jbd2_journal_block_tail *tail;
+       __u32 provided, calculated;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
+                       sizeof(struct jbd2_journal_block_tail));
+       provided = tail->t_checksum;
+       tail->t_checksum = 0;
+       calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+       tail->t_checksum = provided;
+
+       provided = be32_to_cpu(provided);
+       return provided == calculated;
+}
 
 /*
  * Count the number of in-use tags in a journal descriptor block.
@@ -186,6 +205,9 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
        int                     nr = 0, size = journal->j_blocksize;
        int                     tag_bytes = journal_tag_bytes(journal);
 
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               size -= sizeof(struct jbd2_journal_block_tail);
+
        tagp = &bh->b_data[sizeof(journal_header_t)];
 
        while ((tagp - bh->b_data + tag_bytes) <= size) {
@@ -193,10 +215,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 
                nr++;
                tagp += tag_bytes;
-               if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
+               if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
                        tagp += 16;
 
-               if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
+               if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
                        break;
        }
 
@@ -353,6 +375,41 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
        return 0;
 }
 
+static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
+{
+       struct commit_header *h;
+       __u32 provided, calculated;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       h = buf;
+       provided = h->h_chksum[0];
+       h->h_chksum[0] = 0;
+       calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+       h->h_chksum[0] = provided;
+
+       provided = be32_to_cpu(provided);
+       return provided == calculated;
+}
+
+static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
+                                     void *buf, __u32 sequence)
+{
+       __u32 provided, calculated;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       sequence = cpu_to_be32(sequence);
+       calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
+                                sizeof(sequence));
+       calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize);
+       provided = be32_to_cpu(tag->t_checksum);
+
+       return provided == cpu_to_be32(calculated);
+}
+
 static int do_one_pass(journal_t *journal,
                        struct recovery_info *info, enum passtype pass)
 {
@@ -366,6 +423,7 @@ static int do_one_pass(journal_t *journal,
        int                     blocktype;
        int                     tag_bytes = journal_tag_bytes(journal);
        __u32                   crc32_sum = ~0; /* Transactional Checksums */
+       int                     descr_csum_size = 0;
 
        /*
         * First thing is to establish what we expect to find in the log
@@ -451,6 +509,18 @@ static int do_one_pass(journal_t *journal,
 
                switch(blocktype) {
                case JBD2_DESCRIPTOR_BLOCK:
+                       /* Verify checksum first */
+                       if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+                                       JBD2_FEATURE_INCOMPAT_CSUM_V2))
+                               descr_csum_size =
+                                       sizeof(struct jbd2_journal_block_tail);
+                       if (descr_csum_size > 0 &&
+                           !jbd2_descr_block_csum_verify(journal,
+                                                         bh->b_data)) {
+                               err = -EIO;
+                               goto failed;
+                       }
+
                        /* If it is a valid descriptor block, replay it
                         * in pass REPLAY; if journal_checksums enabled, then
                         * calculate checksums in PASS_SCAN, otherwise,
@@ -481,11 +551,11 @@ static int do_one_pass(journal_t *journal,
 
                        tagp = &bh->b_data[sizeof(journal_header_t)];
                        while ((tagp - bh->b_data + tag_bytes)
-                              <= journal->j_blocksize) {
+                              <= journal->j_blocksize - descr_csum_size) {
                                unsigned long io_block;
 
                                tag = (journal_block_tag_t *) tagp;
-                               flags = be32_to_cpu(tag->t_flags);
+                               flags = be16_to_cpu(tag->t_flags);
 
                                io_block = next_log_block++;
                                wrap(journal, next_log_block);
@@ -516,6 +586,19 @@ static int do_one_pass(journal_t *journal,
                                                goto skip_write;
                                        }
 
+                                       /* Look for block corruption */
+                                       if (!jbd2_block_tag_csum_verify(
+                                               journal, tag, obh->b_data,
+                                               be32_to_cpu(tmp->h_sequence))) {
+                                               brelse(obh);
+                                               success = -EIO;
+                                               printk(KERN_ERR "JBD: Invalid "
+                                                      "checksum recovering "
+                                                      "block %llu in log\n",
+                                                      blocknr);
+                                               continue;
+                                       }
+
                                        /* Find a buffer for the new
                                         * data being restored */
                                        nbh = __getblk(journal->j_fs_dev,
@@ -650,6 +733,19 @@ static int do_one_pass(journal_t *journal,
                                }
                                crc32_sum = ~0;
                        }
+                       if (pass == PASS_SCAN &&
+                           !jbd2_commit_block_csum_verify(journal,
+                                                          bh->b_data)) {
+                               info->end_transaction = next_commit_ID;
+
+                               if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
+                                    JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+                                       journal->j_failed_commit =
+                                               next_commit_ID;
+                                       brelse(bh);
+                                       break;
+                               }
+                       }
                        brelse(bh);
                        next_commit_ID++;
                        continue;
@@ -706,6 +802,25 @@ static int do_one_pass(journal_t *journal,
        return err;
 }
 
+static int jbd2_revoke_block_csum_verify(journal_t *j,
+                                        void *buf)
+{
+       struct jbd2_journal_revoke_tail *tail;
+       __u32 provided, calculated;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return 1;
+
+       tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
+                       sizeof(struct jbd2_journal_revoke_tail));
+       provided = tail->r_checksum;
+       tail->r_checksum = 0;
+       calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+       tail->r_checksum = provided;
+
+       provided = be32_to_cpu(provided);
+       return provided == calculated;
+}
 
 /* Scan a revoke record, marking all blocks mentioned as revoked. */
 
@@ -720,6 +835,9 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
        offset = sizeof(jbd2_journal_revoke_header_t);
        max = be32_to_cpu(header->r_count);
 
+       if (!jbd2_revoke_block_csum_verify(journal, header))
+               return -EINVAL;
+
        if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
                record_len = 8;
 
index 6973705d6a3d9db1c96ed67f55c97c8a13ee2f6d..f30b80b4ce8bef98cab621bf731e13682661ca6d 100644 (file)
@@ -578,6 +578,7 @@ static void write_one_revoke_record(journal_t *journal,
                                    struct jbd2_revoke_record_s *record,
                                    int write_op)
 {
+       int csum_size = 0;
        struct journal_head *descriptor;
        int offset;
        journal_header_t *header;
@@ -592,9 +593,13 @@ static void write_one_revoke_record(journal_t *journal,
        descriptor = *descriptorp;
        offset = *offsetp;
 
+       /* Do we need to leave space at the end for a checksum? */
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               csum_size = sizeof(struct jbd2_journal_revoke_tail);
+
        /* Make sure we have a descriptor with space left for the record */
        if (descriptor) {
-               if (offset == journal->j_blocksize) {
+               if (offset >= journal->j_blocksize - csum_size) {
                        flush_descriptor(journal, descriptor, offset, write_op);
                        descriptor = NULL;
                }
@@ -631,6 +636,24 @@ static void write_one_revoke_record(journal_t *journal,
        *offsetp = offset;
 }
 
+static void jbd2_revoke_csum_set(journal_t *j,
+                                struct journal_head *descriptor)
+{
+       struct jbd2_journal_revoke_tail *tail;
+       __u32 csum;
+
+       if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+               return;
+
+       tail = (struct jbd2_journal_revoke_tail *)
+                       (jh2bh(descriptor)->b_data + j->j_blocksize -
+                       sizeof(struct jbd2_journal_revoke_tail));
+       tail->r_checksum = 0;
+       csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data,
+                          j->j_blocksize);
+       tail->r_checksum = cpu_to_be32(csum);
+}
+
 /*
  * Flush a revoke descriptor out to the journal.  If we are aborting,
  * this is a noop; otherwise we are generating a buffer which needs to
@@ -652,6 +675,8 @@ static void flush_descriptor(journal_t *journal,
 
        header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
        header->r_count = cpu_to_be32(offset);
+       jbd2_revoke_csum_set(journal, descriptor);
+
        set_buffer_jwrite(bh);
        BUFFER_TRACE(bh, "write");
        set_buffer_dirty(bh);
index ddcd3549c6c26cbc9cb9dd46831b189ed3c0441e..fb1ab9533b67277a557cd5f8ea9f7216b8284d4e 100644 (file)
@@ -162,8 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 
 alloc_transaction:
        if (!journal->j_running_transaction) {
-               new_transaction = kmem_cache_alloc(transaction_cache,
-                                                  gfp_mask | __GFP_ZERO);
+               new_transaction = kmem_cache_zalloc(transaction_cache,
+                                                   gfp_mask);
                if (!new_transaction) {
                        /*
                         * If __GFP_FS is not present, then we may be
index 912c30a8ddb1e47cd732fbd95f281238ca601ae0..f334c7fab96762ab4131c9886df87d4d6d4dde9d 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <crypto/hash.h>
 #endif
 
 #define journal_oom_retry 1
@@ -147,12 +148,24 @@ typedef struct journal_header_s
 #define JBD2_CRC32_CHKSUM   1
 #define JBD2_MD5_CHKSUM     2
 #define JBD2_SHA1_CHKSUM    3
+#define JBD2_CRC32C_CHKSUM  4
 
 #define JBD2_CRC32_CHKSUM_SIZE 4
 
 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
 /*
  * Commit block header for storing transactional checksums:
+ *
+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
+ * fields are used to store a checksum of the descriptor and data blocks.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
+ * field is used to store crc32c(uuid+commit_block).  Each journal metadata
+ * block gets its own checksum, and data block checksums are stored in
+ * journal_block_tag (in the descriptor).  The other h_chksum* fields are
+ * not used.
+ *
+ * Checksum v1 and v2 are mutually exclusive features.
  */
 struct commit_header {
        __be32          h_magic;
@@ -175,13 +188,19 @@ struct commit_header {
 typedef struct journal_block_tag_s
 {
        __be32          t_blocknr;      /* The on-disk block number */
-       __be32          t_flags;        /* See below */
+       __be16          t_checksum;     /* truncated crc32c(uuid+seq+block) */
+       __be16          t_flags;        /* See below */
        __be32          t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
 #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
 #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
 
+/* Tail of descriptor block, for checksumming */
+struct jbd2_journal_block_tail {
+       __be32          t_checksum;     /* crc32c(uuid+descr_block) */
+};
+
 /*
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
@@ -192,6 +211,10 @@ typedef struct jbd2_journal_revoke_header_s
        __be32           r_count;       /* Count of bytes used in the block */
 } jbd2_journal_revoke_header_t;
 
+/* Tail of revoke block, for checksumming */
+struct jbd2_journal_revoke_tail {
+       __be32          r_checksum;     /* crc32c(uuid+revoke_block) */
+};
 
 /* Definitions for the journal tag flags word: */
 #define JBD2_FLAG_ESCAPE               1       /* on-disk block is escaped */
@@ -241,7 +264,10 @@ typedef struct journal_superblock_s
        __be32  s_max_trans_data;       /* Limit of data blocks per trans. */
 
 /* 0x0050 */
-       __u32   s_padding[44];
+       __u8    s_checksum_type;        /* checksum type */
+       __u8    s_padding2[3];
+       __u32   s_padding[42];
+       __be32  s_checksum;             /* crc32c(superblock) */
 
 /* 0x0100 */
        __u8    s_users[16*48];         /* ids of all fs'es sharing the log */
@@ -263,13 +289,15 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_REVOKE           0x00000001
 #define JBD2_FEATURE_INCOMPAT_64BIT            0x00000002
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT     0x00000004
+#define JBD2_FEATURE_INCOMPAT_CSUM_V2          0x00000008
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES     JBD2_FEATURE_COMPAT_CHECKSUM
 #define JBD2_KNOWN_ROCOMPAT_FEATURES   0
 #define JBD2_KNOWN_INCOMPAT_FEATURES   (JBD2_FEATURE_INCOMPAT_REVOKE | \
                                        JBD2_FEATURE_INCOMPAT_64BIT | \
-                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
+                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
+                                       JBD2_FEATURE_INCOMPAT_CSUM_V2)
 
 #ifdef __KERNEL__
 
@@ -939,6 +967,12 @@ struct journal_s
         * superblock pointer here
         */
        void *j_private;
+
+       /* Reference to checksum algorithm driver via cryptoapi */
+       struct crypto_shash *j_chksum_driver;
+
+       /* Precomputed journal UUID checksum for seeding other checksums */
+       __u32 j_csum_seed;
 };
 
 /*
@@ -1268,6 +1302,25 @@ static inline int jbd_space_needed(journal_t *journal)
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
+                             const void *address, unsigned int length)
+{
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+       } desc;
+       int err;
+
+       desc.shash.tfm = journal->j_chksum_driver;
+       desc.shash.flags = 0;
+       *(u32 *)desc.ctx = crc;
+
+       err = crypto_shash_update(&desc.shash, address, length);
+       BUG_ON(err);
+
+       return *(u32 *)desc.ctx;
+}
+
 #ifdef __KERNEL__
 
 #define buffer_trace_init(bh)  do {} while (0)
index 6230f8556a4eeac37bcaaa83a4cc913177e09c56..6133679bc4c01ace20a0114fd50ff7c3481c7eb9 100644 (file)
@@ -12,6 +12,7 @@ enum jbd_state_bits {
        BH_State,               /* Pins most journal_head state */
        BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
        BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
+       BH_Verified,            /* Metadata block has been verified ok */
        BH_JBDPrivateStart,     /* First bit available for private use by FS */
 };
 
@@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked)
 BUFFER_FNS(RevokeValid, revokevalid)
 TAS_BUFFER_FNS(RevokeValid, revokevalid)
 BUFFER_FNS(Freed, freed)
+BUFFER_FNS(Verified, verified)
 
 static inline struct buffer_head *jh2bh(struct journal_head *jh)
 {