Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: change the field "b_cow_tid" of struct journal_head from type unsigned to tid_t
  ext3.txt: update the links in the section "useful links" to the latest ones
  ext3: Fix data corruption in inodes with journalled data
  ext2: check xattr name_len before acquiring xattr_sem in ext2_xattr_get
  ext3: Fix compilation with -DDX_DEBUG
  quota: Remove unused declaration
  jbd: Use WRITE_SYNC in journal checkpoint.
  jbd: Fix oops in journal_remove_journal_head()
  ext3: Return -EINVAL when start is beyond the end of fs in ext3_trim_fs()
  ext3/ioctl.c: silence sparse warnings about different address spaces
  ext3/ext4 Documentation: remove bh/nobh since it has been deprecated
  ext3: Improve truncate error handling
  ext3: use proper little-endian bitops
  ext2: include fs.h into ext2_fs.h
  ext3: Fix oops in ext3_try_to_allocate_with_rsv()
  jbd: fix a bug of leaking jh->b_jcount
  jbd: remove dependency on __GFP_NOFAIL
  ext3: Convert ext3 to new truncate calling convention
  jbd: Add fixed tracepoints
  ext3: Add fixed tracepoints

Resolve conflicts in fs/ext3/fsync.c due to fsync locking push-down and
new fixed tracepoints.

23 files changed:
Documentation/filesystems/ext3.txt
Documentation/filesystems/ext4.txt
fs/ext2/xattr.c
fs/ext3/balloc.c
fs/ext3/file.c
fs/ext3/fsync.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/ext3/super.c
fs/ext3/xattr.c
fs/jbd/checkpoint.c
fs/jbd/commit.c
fs/jbd/journal.c
fs/jbd/transaction.c
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/jbd.h
include/linux/journal-head.h
include/linux/quota.h
include/trace/events/ext3.h [new file with mode: 0644]
include/trace/events/jbd.h [new file with mode: 0644]

index 272f80d5f966741c567b3259ac6755a9055373c4..22f3a0eda1d22e430ebe350d7e952099b9a9e880 100644 (file)
@@ -147,15 +147,6 @@ grpjquota=<file>   during journal replay. They replace the above
                        package for more details
                        (http://sourceforge.net/projects/linuxquota).
 
-bh             (*)     ext3 associates buffer heads to data pages to
-nobh                   (a) cache disk block mapping information
-                       (b) link pages into transaction to provide
-                           ordering guarantees.
-                       "bh" option forces use of buffer heads.
-                       "nobh" option tries to avoid associating buffer
-                       heads (supported only for "writeback" mode).
-
-
 Specification
 =============
 Ext3 shares all disk implementation with the ext2 filesystem, and adds
@@ -227,5 +218,5 @@ kernel source:      <file:fs/ext3/>
 programs:      http://e2fsprogs.sourceforge.net/
                http://ext2resize.sourceforge.net
 
-useful links:  http://www.ibm.com/developerworks/library/l-fs7.html
-               http://www.ibm.com/developerworks/library/l-fs8.html
+useful links:  http://www.ibm.com/developerworks/library/l-fs7/index.html
+        http://www.ibm.com/developerworks/library/l-fs8/index.html
index 3ae9bc94352a660f2d3ed9feccc0b3aa8955ffcc..232a575a0c4857249edc5aa76a235ce4e258082f 100644 (file)
@@ -68,12 +68,12 @@ Note: More extensive information for getting started with ext4 can be
     '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
     for a fair comparison.  When tuning ext3 for best benchmark numbers,
     it is often worthwhile to try changing the data journaling mode; '-o
-    data=writeback,nobh' can be faster for some workloads.  (Note
-    however that running mounted with data=writeback can potentially
-    leave stale data exposed in recently written files in case of an
-    unclean shutdown, which could be a security exposure in some
-    situations.)  Configuring the filesystem with a large journal can
-    also be helpful for metadata-intensive workloads.
+    data=writeback' can be faster for some workloads.  (Note however that
+    running mounted with data=writeback can potentially leave stale data
+    exposed in recently written files in case of an unclean shutdown,
+    which could be a security exposure in some situations.)  Configuring
+    the filesystem with a large journal can also be helpful for
+    metadata-intensive workloads.
 
 2. Features
 ===========
@@ -272,14 +272,6 @@ grpjquota=<file>   during journal replay. They replace the above
                        package for more details
                        (http://sourceforge.net/projects/linuxquota).
 
-bh             (*)     ext4 associates buffer heads to data pages to
-nobh                   (a) cache disk block mapping information
-                       (b) link pages into transaction to provide
-                           ordering guarantees.
-                       "bh" option forces use of buffer heads.
-                       "nobh" option tries to avoid associating buffer
-                       heads (supported only for "writeback" mode).
-
 stripe=n               Number of filesystem blocks that mballoc will try
                        to use for allocation size and alignment. For RAID5/6
                        systems this should be the number of data
@@ -393,8 +385,7 @@ dioread_nolock              locking. If the dioread_nolock option is specified
                        write and convert the extent to initialized after IO
                        completes. This approach allows ext4 code to avoid
                        using inode mutex, which improves scalability on high
-                       speed storages. However this does not work with nobh
-                       option and the mount will fail. Nor does it work with
+                       speed storages. However this does not work with
                        data journaling and dioread_nolock option will be
                        ignored with kernel warning. Note that dioread_nolock
                        code path is only used for extent-based files.
index 529970617a21c5c45cd1cffa1b39c14f318d72aa..d27b71f1d1832da7aa2b2967a4b853c829dfb721 100644 (file)
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
 
        if (name == NULL)
                return -EINVAL;
+       name_len = strlen(name);
+       if (name_len > 255)
+               return -ERANGE;
+
        down_read(&EXT2_I(inode)->xattr_sem);
        error = -ENODATA;
        if (!EXT2_I(inode)->i_file_acl)
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
                error = -EIO;
                goto cleanup;
        }
-       /* find named attribute */
-       name_len = strlen(name);
 
-       error = -ERANGE;
-       if (name_len > 255)
-               goto cleanup;
+       /* find named attribute */
        entry = FIRST_ENTRY(bh);
        while (!IS_LAST_ENTRY(entry)) {
                struct ext2_xattr_entry *next =
index fe52297e31ad751abc46f5c6695d0e0b029ab3ed..6386d76f44a7d1d9939508e1e8e8be66b394033b 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
+#include <trace/events/ext3.h>
 
 /*
  * balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
        desc = ext3_get_group_desc(sb, block_group, NULL);
        if (!desc)
                return NULL;
+       trace_ext3_read_block_bitmap(sb, block_group);
        bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
        struct rb_node * parent = NULL;
        struct ext3_reserve_window_node *this;
 
+       trace_ext3_rsv_window_add(sb, rsv);
        while (*p)
        {
                parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
        rsv = &block_i->rsv_window_node;
        if (!rsv_is_empty(&rsv->rsv_window)) {
                spin_lock(rsv_lock);
-               if (!rsv_is_empty(&rsv->rsv_window))
+               if (!rsv_is_empty(&rsv->rsv_window)) {
+                       trace_ext3_discard_reservation(inode, rsv);
                        rsv_window_remove(inode->i_sb, rsv);
+               }
                spin_unlock(rsv_lock);
        }
 }
@@ -683,14 +688,10 @@ error_return:
 void ext3_free_blocks(handle_t *handle, struct inode *inode,
                        ext3_fsblk_t block, unsigned long count)
 {
-       struct super_block * sb;
+       struct super_block *sb = inode->i_sb;
        unsigned long dquot_freed_blocks;
 
-       sb = inode->i_sb;
-       if (!sb) {
-               printk ("ext3_free_blocks: nonexistent device");
-               return;
-       }
+       trace_ext3_free_blocks(inode, block, count);
        ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
        if (dquot_freed_blocks)
                dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
        else
                start_block = grp_goal + group_first_block;
 
+       trace_ext3_alloc_new_reservation(sb, start_block);
        size = my_rsv->rsv_goal_size;
 
        if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
         * check if the first free block is within the
         * free space we just reserved
         */
-       if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+       if (start_block >= my_rsv->rsv_start &&
+           start_block <= my_rsv->rsv_end) {
+               trace_ext3_reserved(sb, start_block, my_rsv);
                return 0;               /* success */
+       }
        /*
         * if the first free bit we found is out of the reservable space
         * continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 
        *errp = -ENOSPC;
        sb = inode->i_sb;
-       if (!sb) {
-               printk("ext3_new_block: nonexistent device");
-               return 0;
-       }
 
        /*
         * Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
                return 0;
        }
 
+       trace_ext3_request_blocks(inode, goal, num);
+
        sbi = EXT3_SB(sb);
-       es = EXT3_SB(sb)->s_es;
+       es = sbi->s_es;
        ext3_debug("goal=%lu.\n", goal);
        /*
         * Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
        brelse(bitmap_bh);
        dquot_free_block(inode, *count-num);
        *count = num;
+
+       trace_ext3_allocate_blocks(inode, goal, num,
+                                  (unsigned long long)ret_block);
+
        return ret_block;
 
 io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
                if ((next - start) < minblocks)
                        goto free_extent;
 
+               trace_ext3_discard_blocks(sb, discard_block, next - start);
                 /* Send the TRIM command down to the device */
                err = sb_issue_discard(sb, discard_block, next - start,
                                       GFP_NOFS, 0);
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
        if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
                return -EINVAL;
        if (start >= max_blks)
-               goto out;
+               return -EINVAL;
        if (start + len > max_blks)
                len = max_blks - start;
 
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
        if (ret >= 0)
                ret = 0;
-
-out:
        range->len = trimmed * sb->s_blocksize;
 
        return ret;
index 2be5b99097f13ad44f2db41ca680bfc82a3fa741..724df69847dca1ef2b22ee4827fb6f5c5003ef14 100644 (file)
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
 };
 
 const struct inode_operations ext3_file_inode_operations = {
-       .truncate       = ext3_truncate,
        .setattr        = ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
        .setxattr       = generic_setxattr,
index 0bcf63adb80a9290866c42153bfe6af37777b885..d494c554c6e69e436e4dbccee6c9e5e3d6ab8060 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
 
 /*
  * akpm: A new design for ext3_sync_file().
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        int ret, needs_barrier = 0;
        tid_t commit_tid;
 
+       trace_ext3_sync_file_enter(file, datasync);
+
        if (inode->i_sb->s_flags & MS_RDONLY)
                return 0;
 
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (ret)
-               return ret;
+               goto out;
 
        /*
         * Taking the mutex here just to keep consistent with how fsync was
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        if (ext3_should_journal_data(inode)) {
                mutex_unlock(&inode->i_mutex);
-               return ext3_force_commit(inode->i_sb);
+               ret = ext3_force_commit(inode->i_sb);
+               goto out;
        }
 
        if (datasync)
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        if (needs_barrier)
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
        mutex_unlock(&inode->i_mutex);
+out:
+       trace_ext3_sync_file_exit(inode, ret);
        return ret;
 }
index bfc2dc43681d41c6b54fe405174cc480de1663e9..bf09cbf938cc155c3328b6cc6d6a8a6f1bed637c 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/buffer_head.h>
 #include <linux/random.h>
 #include <linux/bitops.h>
+#include <trace/events/ext3.h>
 
 #include <asm/byteorder.h>
 
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 
        ino = inode->i_ino;
        ext3_debug ("freeing inode %lu\n", ino);
+       trace_ext3_free_inode(inode);
 
        is_directory = S_ISDIR(inode->i_mode);
 
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
                return ERR_PTR(-EPERM);
 
        sb = dir->i_sb;
+       trace_ext3_request_inode(dir, mode);
        inode = new_inode(sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
        }
 
        ext3_debug("allocating inode %lu\n", inode->i_ino);
+       trace_ext3_allocate_inode(inode, dir, mode);
        goto really_out;
 fail:
        ext3_std_error(sb, err);
index 2978a2a17a59b8c0e34a381f3bba4ab1b059d396..04da6acde85dfbcfb50d89302c3f975baab3c76d 100644 (file)
 #include <linux/bio.h>
 #include <linux/fiemap.h>
 #include <linux/namei.h>
+#include <trace/events/ext3.h>
 #include "xattr.h"
 #include "acl.h"
 
 static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
 
 /*
  * Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
 
        might_sleep();
 
+       trace_ext3_forget(inode, is_metadata, blocknr);
        BUFFER_TRACE(bh, "enter");
 
        jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
  */
 void ext3_evict_inode (struct inode *inode)
 {
+       struct ext3_inode_info *ei = EXT3_I(inode);
        struct ext3_block_alloc_info *rsv;
        handle_t *handle;
        int want_delete = 0;
 
+       trace_ext3_evict_inode(inode);
        if (!inode->i_nlink && !is_bad_inode(inode)) {
                dquot_initialize(inode);
                want_delete = 1;
        }
 
+       /*
+        * When journalling data dirty buffers are tracked only in the journal.
+        * So although mm thinks everything is clean and ready for reaping the
+        * inode might still have some pages to write in the running
+        * transaction or waiting to be checkpointed. Thus calling
+        * journal_invalidatepage() (via truncate_inode_pages()) to discard
+        * these buffers can cause data loss. Also even if we did not discard
+        * these buffers, we would have no way to find them after the inode
+        * is reaped and thus user could see stale data if he tries to read
+        * them before the transaction is checkpointed. So be careful and
+        * force everything to disk here... We use ei->i_datasync_tid to
+        * store the newest transaction containing inode's data.
+        *
+        * Note that directories do not have this problem because they don't
+        * use page cache.
+        */
+       if (inode->i_nlink && ext3_should_journal_data(inode) &&
+           (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+               tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+               journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+               log_start_commit(journal, commit_tid);
+               log_wait_commit(journal, commit_tid);
+               filemap_write_and_wait(&inode->i_data);
+       }
        truncate_inode_pages(&inode->i_data, 0);
 
        ext3_discard_reservation(inode);
-       rsv = EXT3_I(inode)->i_block_alloc_info;
-       EXT3_I(inode)->i_block_alloc_info = NULL;
+       rsv = ei->i_block_alloc_info;
+       ei->i_block_alloc_info = NULL;
        if (unlikely(rsv))
                kfree(rsv);
 
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
        if (inode->i_blocks)
                ext3_truncate(inode);
        /*
-        * Kill off the orphan record which ext3_truncate created.
-        * AKPM: I think this can be inside the above `if'.
-        * Note that ext3_orphan_del() has to be able to cope with the
-        * deletion of a non-existent orphan - this is because we don't
-        * know if ext3_truncate() actually created an orphan record.
-        * (Well, we could do this if we need to, but heck - it works)
+        * Kill off the orphan record created when the inode lost the last
+        * link.  Note that ext3_orphan_del() has to be able to cope with the
+        * deletion of a non-existent orphan - ext3_truncate() could
+        * have removed the record.
         */
        ext3_orphan_del(handle, inode);
-       EXT3_I(inode)->i_dtime  = get_seconds();
+       ei->i_dtime = get_seconds();
 
        /*
         * One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        ext3_fsblk_t first_block = 0;
 
 
+       trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
        J_ASSERT(handle != NULL || create == 0);
        depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
        if (!create || err == -EIO)
                goto cleanup;
 
+       /*
+        * Block out ext3_truncate while we alter the tree
+        */
        mutex_lock(&ei->truncate_mutex);
 
        /*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
         */
        count = ext3_blks_to_allocate(partial, indirect_blks,
                                        maxblocks, blocks_to_boundary);
-       /*
-        * Block out ext3_truncate while we alter the tree
-        */
        err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
                                offsets + (partial - chain), partial);
 
@@ -970,6 +999,9 @@ cleanup:
        }
        BUFFER_TRACE(bh_result, "returned");
 out:
+       trace_ext3_get_blocks_exit(inode, iblock,
+                                  depth ? le32_to_cpu(chain[depth-1].key) : 0,
+                                  count, err);
        return err;
 }
 
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
        ext3_truncate(inode);
 }
 
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+       ext3_block_truncate_page(inode, inode->i_size);
+       ext3_truncate(inode);
+}
+
 static int ext3_write_begin(struct file *file, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
                                struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
         * we allocate blocks but write fails for some reason */
        int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
 
+       trace_ext3_write_begin(inode, pos, len, flags);
+
        index = pos >> PAGE_CACHE_SHIFT;
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
        unsigned from, to;
        int ret = 0, ret2;
 
+       trace_ext3_ordered_write_end(inode, pos, len, copied);
        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
 
        from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
        struct inode *inode = file->f_mapping->host;
        int ret;
 
+       trace_ext3_writeback_write_end(inode, pos, len, copied);
        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
        update_file_sizes(inode, pos, copied);
        /*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
 {
        handle_t *handle = ext3_journal_current_handle();
        struct inode *inode = mapping->host;
+       struct ext3_inode_info *ei = EXT3_I(inode);
        int ret = 0, ret2;
        int partial = 0;
        unsigned from, to;
 
+       trace_ext3_journalled_write_end(inode, pos, len, copied);
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
 
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
        if (pos + len > inode->i_size && ext3_can_truncate(inode))
                ext3_orphan_add(handle, inode);
        ext3_set_inode_state(inode, EXT3_STATE_JDATA);
-       if (inode->i_size > EXT3_I(inode)->i_disksize) {
-               EXT3_I(inode)->i_disksize = inode->i_size;
+       atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+       if (inode->i_size > ei->i_disksize) {
+               ei->i_disksize = inode->i_size;
                ret2 = ext3_mark_inode_dirty(handle, inode);
                if (!ret)
                        ret = ret2;
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page,
        if (ext3_journal_current_handle())
                goto out_fail;
 
+       trace_ext3_ordered_writepage(page);
        if (!page_has_buffers(page)) {
                create_empty_buffers(page, inode->i_sb->s_blocksize,
                                (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page,
        if (ext3_journal_current_handle())
                goto out_fail;
 
+       trace_ext3_writeback_writepage(page);
        if (page_has_buffers(page)) {
                if (!walk_page_buffers(NULL, page_buffers(page), 0,
                                      PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page,
        if (ext3_journal_current_handle())
                goto no_write;
 
+       trace_ext3_journalled_writepage(page);
        handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
                if (ret == 0)
                        ret = err;
                ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+               atomic_set(&EXT3_I(inode)->i_datasync_tid,
+                          handle->h_transaction->t_tid);
                unlock_page(page);
        } else {
                /*
@@ -1739,6 +1793,7 @@ out_unlock:
 
 static int ext3_readpage(struct file *file, struct page *page)
 {
+       trace_ext3_readpage(page);
        return mpage_readpage(page, ext3_get_block);
 }
 
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
 {
        journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
+       trace_ext3_invalidatepage(page, offset);
+
        /*
         * If it's a full truncate we just forget about the pending dirtying
         */
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
 {
        journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
+       trace_ext3_releasepage(page);
        WARN_ON(PageChecked(page));
        if (!page_has_buffers(page))
                return 0;
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
        size_t count = iov_length(iov, nr_segs);
        int retries = 0;
 
+       trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
        if (rw == WRITE) {
                loff_t final_size = offset + count;
 
@@ -1827,7 +1887,7 @@ retry:
                loff_t end = offset + iov_length(iov, nr_segs);
 
                if (end > isize)
-                       vmtruncate(inode, isize);
+                       ext3_truncate_failed_direct_write(inode);
        }
        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
@@ -1841,7 +1901,7 @@ retry:
                        /* This is really bad luck. We've written the data
                         * but cannot extend i_size. Truncate allocated blocks
                         * and pretend the write failed... */
-                       ext3_truncate(inode);
+                       ext3_truncate_failed_direct_write(inode);
                        ret = PTR_ERR(handle);
                        goto out;
                }
@@ -1867,6 +1927,8 @@ retry:
                        ret = err;
        }
 out:
+       trace_ext3_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
        return ret;
 }
 
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode)
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
-               struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
 {
        ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned offset = from & (PAGE_CACHE_SIZE - 1);
        unsigned blocksize, iblock, length, pos;
-       struct inode *inode = mapping->host;
+       struct page *page;
+       handle_t *handle = NULL;
        struct buffer_head *bh;
        int err = 0;
 
+       /* Truncated on block boundary - nothing to do */
        blocksize = inode->i_sb->s_blocksize;
+       if ((from & (blocksize - 1)) == 0)
+               return 0;
+
+       page = grab_cache_page(inode->i_mapping, index);
+       if (!page)
+               return -ENOMEM;
        length = blocksize - (offset & (blocksize - 1));
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
                        goto unlock;
        }
 
+       /* data=writeback mode doesn't need transaction to zero-out data */
+       if (!ext3_should_writeback_data(inode)) {
+               /* We journal at most one block */
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle)) {
+                       clear_highpage(page);
+                       flush_dcache_page(page);
+                       err = PTR_ERR(handle);
+                       goto unlock;
+               }
+       }
+
        if (ext3_should_journal_data(inode)) {
                BUFFER_TRACE(bh, "get write access");
                err = ext3_journal_get_write_access(handle, bh);
                if (err)
-                       goto unlock;
+                       goto stop;
        }
 
        zero_user(page, offset, length);
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
                        err = ext3_journal_dirty_data(handle, bh);
                mark_buffer_dirty(bh);
        }
+stop:
+       if (handle)
+               ext3_journal_stop(handle);
 
 unlock:
        unlock_page(page);
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 
 int ext3_can_truncate(struct inode *inode)
 {
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return 0;
        if (S_ISREG(inode->i_mode))
                return 1;
        if (S_ISDIR(inode->i_mode))
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode)
        struct ext3_inode_info *ei = EXT3_I(inode);
        __le32 *i_data = ei->i_data;
        int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-       struct address_space *mapping = inode->i_mapping;
        int offsets[4];
        Indirect chain[4];
        Indirect *partial;
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode)
        int n;
        long last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
-       struct page *page;
+
+       trace_ext3_truncate_enter(inode);
 
        if (!ext3_can_truncate(inode))
                goto out_notrans;
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode)
        if (inode->i_size == 0 && ext3_should_writeback_data(inode))
                ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 
-       /*
-        * We have to lock the EOF page here, because lock_page() nests
-        * outside journal_start().
-        */
-       if ((inode->i_size & (blocksize - 1)) == 0) {
-               /* Block boundary? Nothing to do */
-               page = NULL;
-       } else {
-               page = grab_cache_page(mapping,
-                               inode->i_size >> PAGE_CACHE_SHIFT);
-               if (!page)
-                       goto out_notrans;
-       }
-
        handle = start_transaction(inode);
-       if (IS_ERR(handle)) {
-               if (page) {
-                       clear_highpage(page);
-                       flush_dcache_page(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
+       if (IS_ERR(handle))
                goto out_notrans;
-       }
 
        last_block = (inode->i_size + blocksize-1)
                                        >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
-       if (page)
-               ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
        n = ext3_block_to_path(inode, last_block, offsets, NULL);
        if (n == 0)
                goto out_stop;  /* error */
@@ -2596,6 +2653,7 @@ out_stop:
                ext3_orphan_del(handle, inode);
 
        ext3_journal_stop(handle);
+       trace_ext3_truncate_exit(inode);
        return;
 out_notrans:
        /*
@@ -2604,6 +2662,7 @@ out_notrans:
         */
        if (inode->i_nlink)
                ext3_orphan_del(NULL, inode);
+       trace_ext3_truncate_exit(inode);
 }
 
 static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2745,6 +2804,7 @@ make_io:
                 * has in-inode xattrs, or we don't have this inode in memory.
                 * Read the block from disk.
                 */
+               trace_ext3_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(READ_META, bh);
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                }
 
                error = ext3_orphan_add(handle, inode);
+               if (error) {
+                       ext3_journal_stop(handle);
+                       goto err_out;
+               }
                EXT3_I(inode)->i_disksize = attr->ia_size;
-               rc = ext3_mark_inode_dirty(handle, inode);
-               if (!error)
-                       error = rc;
+               error = ext3_mark_inode_dirty(handle, inode);
                ext3_journal_stop(handle);
+               if (error) {
+                       /* Some hard fs error must have happened. Bail out. */
+                       ext3_orphan_del(NULL, inode);
+                       goto err_out;
+               }
+               rc = ext3_block_truncate_page(inode, attr->ia_size);
+               if (rc) {
+                       /* Cleanup orphan list and exit */
+                       handle = ext3_journal_start(inode, 3);
+                       if (IS_ERR(handle)) {
+                               ext3_orphan_del(NULL, inode);
+                               goto err_out;
+                       }
+                       ext3_orphan_del(handle, inode);
+                       ext3_journal_stop(handle);
+                       goto err_out;
+               }
        }
 
        if ((attr->ia_valid & ATTR_SIZE) &&
            attr->ia_size != i_size_read(inode)) {
-               rc = vmtruncate(inode, attr->ia_size);
-               if (rc)
-                       goto err_out;
+               truncate_setsize(inode, attr->ia_size);
+               ext3_truncate(inode);
        }
 
        setattr_copy(inode, attr);
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
        int err;
 
        might_sleep();
+       trace_ext3_mark_inode_dirty(inode, _RET_IP_);
        err = ext3_reserve_inode_write(handle, inode, &iloc);
        if (!err)
                err = ext3_mark_iloc_dirty(handle, inode, &iloc);
index f4090bd2f345218df867b618806569415c1de688..c7f43944f160e080973ce449a6c97a4bc2a4d2bc 100644 (file)
@@ -285,7 +285,7 @@ group_add_out:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
-               if (copy_from_user(&range, (struct fstrim_range *)arg,
+               if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                                   sizeof(range)))
                        return -EFAULT;
 
@@ -293,7 +293,7 @@ group_add_out:
                if (ret < 0)
                        return ret;
 
-               if (copy_to_user((struct fstrim_range *)arg, &range,
+               if (copy_to_user((struct fstrim_range __user *)arg, &range,
                                 sizeof(range)))
                        return -EFAULT;
 
index 3b57230a17bbf9adad3748d790cc3376e7b3f9c2..6e18a0b7750db81d5900ad813614e8455bf4b82d 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include <trace/events/ext3.h>
 
 #include "namei.h"
 #include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
                                while (len--) printk("%c", *name++);
                                ext3fs_dirhash(de->name, de->name_len, &h);
                                printk(":%x.%u ", h.hash,
-                                      ((char *) de - base));
+                                      (unsigned) ((char *) de - base));
                        }
                        space += EXT3_DIR_REC_LEN(de->name_len);
                        names++;
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
 
        *err = -ENOENT;
 errout:
-       dxtrace(printk("%s not found\n", name));
+       dxtrace(printk("%s not found\n", entry->name));
        dx_release (frames);
        return NULL;
 }
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
        struct ext3_dir_entry_2 * de;
        handle_t *handle;
 
+       trace_ext3_unlink_enter(dir, dentry);
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        dquot_initialize(dir);
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 end_unlink:
        ext3_journal_stop(handle);
        brelse (bh);
+       trace_ext3_unlink_exit(dentry, retval);
        return retval;
 }
 
index b57ea2f912693e8f36ab0c1bd74e3e53205689e8..7beb69ae0015996140fe916712f4b1e13860faa0 100644 (file)
@@ -44,6 +44,9 @@
 #include "acl.h"
 #include "namei.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
 #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
   #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
 #else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
        return &ei->vfs_inode;
 }
 
+static int ext3_drop_inode(struct inode *inode)
+{
+       int drop = generic_drop_inode(inode);
+
+       trace_ext3_drop_inode(inode, drop);
+       return drop;
+}
+
 static void ext3_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
        .destroy_inode  = ext3_destroy_inode,
        .write_inode    = ext3_write_inode,
        .dirty_inode    = ext3_dirty_inode,
+       .drop_inode     = ext3_drop_inode,
        .evict_inode    = ext3_evict_inode,
        .put_super      = ext3_put_super,
        .sync_fs        = ext3_sync_fs,
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
 {
        tid_t target;
 
+       trace_ext3_sync_fs(sb, wait);
        if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
                if (wait)
                        log_wait_commit(EXT3_SB(sb)->s_journal, target);
index 32e6cc23bd9ad69f1f280f5148f2a5c41ffd16ea..d565759d82eee0c06b10fa44e77abc51f29827d7 100644 (file)
@@ -803,8 +803,16 @@ inserted:
                        /* We need to allocate a new block */
                        ext3_fsblk_t goal = ext3_group_first_block_no(sb,
                                                EXT3_I(inode)->i_block_group);
-                       ext3_fsblk_t block = ext3_new_block(handle, inode,
-                                                       goal, &error);
+                       ext3_fsblk_t block;
+
+                       /*
+                        * Protect us agaist concurrent allocations to the
+                        * same inode from ext3_..._writepage(). Reservation
+                        * code does not expect racing allocations.
+                        */
+                       mutex_lock(&EXT3_I(inode)->truncate_mutex);
+                       block = ext3_new_block(handle, inode, goal, &error);
+                       mutex_unlock(&EXT3_I(inode)->truncate_mutex);
                        if (error)
                                goto cleanup;
                        ea_idebug(inode, "creating block %d", block);
index e4b87bc1fa56e0dd2ff8c77ae853367f3a529a81..f94fc48ff3a0c2676156c4a3545f011f9e340a7e 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/jbd.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
 
 /*
  * Unlink a buffer from a transaction checkpoint list.
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
        if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
            !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+               /*
+                * Get our reference so that bh cannot be freed before
+                * we unlock it
+                */
+               get_bh(bh);
                JBUFFER_TRACE(jh, "remove from checkpoint list");
                ret = __journal_remove_checkpoint(jh) + 1;
                jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                BUFFER_TRACE(bh, "release");
                __brelse(bh);
        } else {
@@ -220,8 +226,8 @@ restart:
                        spin_lock(&journal->j_list_lock);
                        goto restart;
                }
+               get_bh(bh);
                if (buffer_locked(bh)) {
-                       get_bh(bh);
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
                        wait_on_buffer(bh);
@@ -240,7 +246,6 @@ restart:
                 */
                released = __journal_remove_checkpoint(jh);
                jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                __brelse(bh);
        }
 
@@ -253,9 +258,12 @@ static void
 __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
        int i;
+       struct blk_plug plug;
 
+       blk_start_plug(&plug);
        for (i = 0; i < *batch_count; i++)
-               write_dirty_buffer(bhs[i], WRITE);
+               write_dirty_buffer(bhs[i], WRITE_SYNC);
+       blk_finish_plug(&plug);
 
        for (i = 0; i < *batch_count; i++) {
                struct buffer_head *bh = bhs[i];
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                ret = 1;
                if (unlikely(buffer_write_io_error(bh)))
                        ret = -EIO;
+               get_bh(bh);
                J_ASSERT_JH(jh, !buffer_jbddirty(bh));
                BUFFER_TRACE(bh, "remove from checkpoint");
                __journal_remove_checkpoint(jh);
                spin_unlock(&journal->j_list_lock);
                jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                __brelse(bh);
        } else {
                /*
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
         * journal straight away.
         */
        result = cleanup_journal_tail(journal);
+       trace_jbd_checkpoint(journal, result);
        jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
        if (result <= 0)
                return result;
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
        if (blocknr < journal->j_tail)
                freed = freed + journal->j_last - journal->j_first;
 
+       trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
        jbd_debug(1,
                  "Cleaning journal tail from %d to %d (offset %u), "
                  "freeing %u\n",
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
 /*
  * journal_clean_one_cp_list
  *
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and release
+ * them.
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
  * Returns number of bufers reaped (for debug)
  */
@@ -632,8 +642,8 @@ out:
  * checkpoint lists.
  *
  * The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
  *
- * This function is called with the journal locked.
  * This function is called with j_list_lock held.
  * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
        }
        journal = transaction->t_journal;
 
+       JBUFFER_TRACE(jh, "removing from transaction");
        __buffer_unlink(jh);
        jh->b_cp_transaction = NULL;
+       journal_put_journal_head(jh);
 
        if (transaction->t_checkpoint_list != NULL ||
            transaction->t_checkpoint_io_list != NULL)
                goto out;
-       JBUFFER_TRACE(jh, "transaction has no more buffers");
 
        /*
         * There is one special case to worry about: if we have just pulled the
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
         * The locking here around t_state is a bit sleazy.
         * See the comment at the end of journal_commit_transaction().
         */
-       if (transaction->t_state != T_FINISHED) {
-               JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+       if (transaction->t_state != T_FINISHED)
                goto out;
-       }
 
        /* OK, that was the last buffer for the transaction: we can now
           safely remove this transaction from the log */
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
        wake_up(&journal->j_wait_logspace);
        ret = 1;
 out:
-       JBUFFER_TRACE(jh, "exit");
        return ret;
 }
 
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
        J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
        J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 
+       /* Get reference for checkpointing transaction */
+       journal_grab_journal_head(jh2bh(jh));
        jh->b_cp_transaction = transaction;
 
        if (!transaction->t_checkpoint_list) {
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
        J_ASSERT(journal->j_committing_transaction != transaction);
        J_ASSERT(journal->j_running_transaction != transaction);
 
+       trace_jbd_drop_transaction(journal, transaction);
        jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
        kfree(transaction);
 }
index 72ffa974b0b8d52852e25d7e016f7b4f1fc50b4a..8799207df058bbe24fa2f852fcc78dbba14828ca 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <trace/events/jbd.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
                        if (!trylock_buffer(bh)) {
                                BUFFER_TRACE(bh, "needs blocking lock");
                                spin_unlock(&journal->j_list_lock);
+                               trace_jbd_do_submit_data(journal,
+                                                    commit_transaction);
                                /* Write out all data to prevent deadlocks */
                                journal_do_submit_data(wbuf, bufs, write_op);
                                bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
                        jbd_unlock_bh_state(bh);
                        if (bufs == journal->j_wbufsize) {
                                spin_unlock(&journal->j_list_lock);
+                               trace_jbd_do_submit_data(journal,
+                                                    commit_transaction);
                                journal_do_submit_data(wbuf, bufs, write_op);
                                bufs = 0;
                                goto write_out_data;
@@ -253,10 +258,6 @@ write_out_data:
                        jbd_unlock_bh_state(bh);
                        if (locked)
                                unlock_buffer(bh);
-                       journal_remove_journal_head(bh);
-                       /* One for our safety reference, other for
-                        * journal_remove_journal_head() */
-                       put_bh(bh);
                        release_data_buffer(bh);
                }
 
@@ -266,6 +267,7 @@ write_out_data:
                }
        }
        spin_unlock(&journal->j_list_lock);
+       trace_jbd_do_submit_data(journal, commit_transaction);
        journal_do_submit_data(wbuf, bufs, write_op);
 
        return err;
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
        commit_transaction = journal->j_running_transaction;
        J_ASSERT(commit_transaction->t_state == T_RUNNING);
 
+       trace_jbd_start_commit(journal, commit_transaction);
        jbd_debug(1, "JBD: starting commit of transaction %d\n",
                        commit_transaction->t_tid);
 
        spin_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_LOCKED;
 
+       trace_jbd_commit_locking(journal, commit_transaction);
        spin_lock(&commit_transaction->t_handle_lock);
        while (commit_transaction->t_updates) {
                DEFINE_WAIT(wait);
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
         */
        journal_switch_revoke_table(journal);
 
+       trace_jbd_commit_flushing(journal, commit_transaction);
        commit_transaction->t_state = T_FLUSH;
        journal->j_committing_transaction = commit_transaction;
        journal->j_running_transaction = NULL;
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
                }
                if (buffer_jbd(bh) && bh2jh(bh) == jh &&
                    jh->b_transaction == commit_transaction &&
-                   jh->b_jlist == BJ_Locked) {
+                   jh->b_jlist == BJ_Locked)
                        __journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       journal_remove_journal_head(bh);
-                       put_bh(bh);
-               } else {
-                       jbd_unlock_bh_state(bh);
-               }
+               jbd_unlock_bh_state(bh);
                release_data_buffer(bh);
                cond_resched_lock(&journal->j_list_lock);
        }
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
        commit_transaction->t_state = T_COMMIT;
        spin_unlock(&journal->j_state_lock);
 
+       trace_jbd_commit_logging(journal, commit_transaction);
        J_ASSERT(commit_transaction->t_nr_buffers <=
                 commit_transaction->t_outstanding_credits);
 
@@ -797,10 +798,16 @@ restart_loop:
        while (commit_transaction->t_forget) {
                transaction_t *cp_transaction;
                struct buffer_head *bh;
+               int try_to_free = 0;
 
                jh = commit_transaction->t_forget;
                spin_unlock(&journal->j_list_lock);
                bh = jh2bh(jh);
+               /*
+                * Get a reference so that bh cannot be freed before we are
+                * done with it.
+                */
+               get_bh(bh);
                jbd_lock_bh_state(bh);
                J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
                        jh->b_transaction == journal->j_running_transaction);
@@ -858,28 +865,27 @@ restart_loop:
                        __journal_insert_checkpoint(jh, commit_transaction);
                        if (is_journal_aborted(journal))
                                clear_buffer_jbddirty(bh);
-                       JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-                       __journal_refile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
                } else {
                        J_ASSERT_BH(bh, !buffer_dirty(bh));
-                       /* The buffer on BJ_Forget list and not jbddirty means
+                       /*
+                        * The buffer on BJ_Forget list and not jbddirty means
                         * it has been freed by this transaction and hence it
                         * could not have been reallocated until this
                         * transaction has committed. *BUT* it could be
                         * reallocated once we have written all the data to
                         * disk and before we process the buffer on BJ_Forget
-                        * list. */
-                       JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-                       __journal_refile_buffer(jh);
-                       if (!jh->b_transaction) {
-                               jbd_unlock_bh_state(bh);
-                                /* needs a brelse */
-                               journal_remove_journal_head(bh);
-                               release_buffer_page(bh);
-                       } else
-                               jbd_unlock_bh_state(bh);
+                        * list.
+                        */
+                       if (!jh->b_next_transaction)
+                               try_to_free = 1;
                }
+               JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+               __journal_refile_buffer(jh);
+               jbd_unlock_bh_state(bh);
+               if (try_to_free)
+                       release_buffer_page(bh);
+               else
+                       __brelse(bh);
                cond_resched_lock(&journal->j_list_lock);
        }
        spin_unlock(&journal->j_list_lock);
@@ -946,6 +952,7 @@ restart_loop:
        }
        spin_unlock(&journal->j_list_lock);
 
+       trace_jbd_end_commit(journal, commit_transaction);
        jbd_debug(1, "JBD: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
 
index e2d4285fbe90ebcc511a96574283418bc933634d..9fe061fb8779be389155a05672b267c8071623e7 100644 (file)
@@ -38,6 +38,9 @@
 #include <linux/debugfs.h>
 #include <linux/ratelimit.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd.h>
+
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
        } else
                write_dirty_buffer(bh, WRITE);
 
+       trace_jbd_update_superblock_end(journal, wait);
 out:
        /* If we have just flushed the log (by marking s_start==0), then
         * any future commit will have to be careful to update the
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
  * When a buffer has its BH_JBD bit set it is immune from being released by
  * core kernel code, mainly via ->b_count.
  *
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
  *
  * Various places in the kernel want to attach a journal_head to a buffer_head
  * _before_ attaching the journal_head to a transaction.  To protect the
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
  *     (Attach a journal_head if needed.  Increments b_jcount)
  *     struct journal_head *jh = journal_add_journal_head(bh);
  *     ...
- *     jh->b_transaction = xxx;
- *     journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
+ *      (Get another reference for transaction)
+ *      journal_grab_journal_head(bh);
+ *      jh->b_transaction = xxx;
+ *      (Put original reference)
+ *      journal_put_journal_head(jh);
  */
 
 /*
  * Give a buffer_head a journal_head.
  *
- * Doesn't need the journal lock.
  * May sleep.
  */
 struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
        struct journal_head *jh = bh2jh(bh);
 
        J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
-       get_bh(bh);
-       if (jh->b_jcount == 0) {
-               if (jh->b_transaction == NULL &&
-                               jh->b_next_transaction == NULL &&
-                               jh->b_cp_transaction == NULL) {
-                       J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
-                       J_ASSERT_BH(bh, buffer_jbd(bh));
-                       J_ASSERT_BH(bh, jh2bh(jh) == bh);
-                       BUFFER_TRACE(bh, "remove journal_head");
-                       if (jh->b_frozen_data) {
-                               printk(KERN_WARNING "%s: freeing "
-                                               "b_frozen_data\n",
-                                               __func__);
-                               jbd_free(jh->b_frozen_data, bh->b_size);
-                       }
-                       if (jh->b_committed_data) {
-                               printk(KERN_WARNING "%s: freeing "
-                                               "b_committed_data\n",
-                                               __func__);
-                               jbd_free(jh->b_committed_data, bh->b_size);
-                       }
-                       bh->b_private = NULL;
-                       jh->b_bh = NULL;        /* debug, really */
-                       clear_buffer_jbd(bh);
-                       __brelse(bh);
-                       journal_free_journal_head(jh);
-               } else {
-                       BUFFER_TRACE(bh, "journal_head was locked");
-               }
+       J_ASSERT_JH(jh, jh->b_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+       J_ASSERT_BH(bh, buffer_jbd(bh));
+       J_ASSERT_BH(bh, jh2bh(jh) == bh);
+       BUFFER_TRACE(bh, "remove journal_head");
+       if (jh->b_frozen_data) {
+               printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+               jbd_free(jh->b_frozen_data, bh->b_size);
        }
+       if (jh->b_committed_data) {
+               printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+               jbd_free(jh->b_committed_data, bh->b_size);
+       }
+       bh->b_private = NULL;
+       jh->b_bh = NULL;        /* debug, really */
+       clear_buffer_jbd(bh);
+       journal_free_journal_head(jh);
 }
 
 /*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head.   If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time.  Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void journal_remove_journal_head(struct buffer_head *bh)
-{
-       jbd_lock_bh_journal_head(bh);
-       __journal_remove_journal_head(bh);
-       jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * Drop a reference on the passed journal_head.  If it fell to zero then
  * release the journal_head from the buffer_head.
  */
 void journal_put_journal_head(struct journal_head *jh)
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
        jbd_lock_bh_journal_head(bh);
        J_ASSERT_JH(jh, jh->b_jcount > 0);
        --jh->b_jcount;
-       if (!jh->b_jcount && !jh->b_transaction) {
+       if (!jh->b_jcount) {
                __journal_remove_journal_head(bh);
+               jbd_unlock_bh_journal_head(bh);
                __brelse(bh);
-       }
-       jbd_unlock_bh_journal_head(bh);
+       } else
+               jbd_unlock_bh_journal_head(bh);
 }
 
 /*
index f7ee81a065dabae13e29501603a8c1726132d05c..7e59c6e66f9b79a9fd57d3d450e51ee3de79b722 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
 
 static void __journal_temp_unlink_buffer(struct journal_head *jh);
 
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
        if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction),
-                                               GFP_NOFS|__GFP_NOFAIL);
+               new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
                if (!new_transaction) {
-                       ret = -ENOMEM;
-                       goto out;
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto alloc_transaction;
                }
        }
 
@@ -696,7 +696,6 @@ repeat:
        if (!jh->b_transaction) {
                JBUFFER_TRACE(jh, "no transaction");
                J_ASSERT_JH(jh, !jh->b_next_transaction);
-               jh->b_transaction = transaction;
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
                spin_lock(&journal->j_list_lock);
                __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
                 * committed and so it's safe to clear the dirty bit.
                 */
                clear_buffer_dirty(jh2bh(jh));
-               jh->b_transaction = transaction;
 
                /* first access by this transaction */
                jh->b_modified = 0;
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
         */
        JBUFFER_TRACE(jh, "cancelling revoke");
        journal_cancel_revoke(handle, jh);
-       journal_put_journal_head(jh);
 out:
+       journal_put_journal_head(jh);
        return err;
 }
 
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                                ret = -EIO;
                                goto no_journal;
                        }
-
-                       if (jh->b_transaction != NULL) {
+                       /* We might have slept so buffer could be refiled now */
+                       if (jh->b_transaction != NULL &&
+                           jh->b_transaction != handle->h_transaction) {
                                JBUFFER_TRACE(jh, "unfile from commit");
                                __journal_temp_unlink_buffer(jh);
                                /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
                        JBUFFER_TRACE(jh, "not on correct data list: unfile");
                        J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-                       __journal_temp_unlink_buffer(jh);
-                       jh->b_transaction = handle->h_transaction;
                        JBUFFER_TRACE(jh, "file as data");
                        __journal_file_buffer(jh, handle->h_transaction,
                                                BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
                        __journal_file_buffer(jh, transaction, BJ_Forget);
                } else {
                        __journal_unfile_buffer(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                        if (!buffer_jbd(bh)) {
                                spin_unlock(&journal->j_list_lock);
                                jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
                mark_buffer_dirty(bh);  /* Expose it to the VM */
 }
 
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
 void __journal_unfile_buffer(struct journal_head *jh)
 {
        __journal_temp_unlink_buffer(jh);
        jh->b_transaction = NULL;
+       journal_put_journal_head(jh);
 }
 
 void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
 {
-       jbd_lock_bh_state(jh2bh(jh));
+       struct buffer_head *bh = jh2bh(jh);
+
+       /* Get reference so that buffer cannot be freed before we unlock it */
+       get_bh(bh);
+       jbd_lock_bh_state(bh);
        spin_lock(&journal->j_list_lock);
        __journal_unfile_buffer(jh);
        spin_unlock(&journal->j_list_lock);
-       jbd_unlock_bh_state(jh2bh(jh));
+       jbd_unlock_bh_state(bh);
+       __brelse(bh);
 }
 
 /*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
                        /* A written-back ordered data buffer */
                        JBUFFER_TRACE(jh, "release data");
                        __journal_unfile_buffer(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                }
        } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
                /* written-back checkpointed metadata buffer */
                if (jh->b_jlist == BJ_None) {
                        JBUFFER_TRACE(jh, "remove from checkpoint list");
                        __journal_remove_checkpoint(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                }
        }
        spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
                /*
                 * We take our own ref against the journal_head here to avoid
                 * having to add tons of locking around each instance of
-                * journal_remove_journal_head() and journal_put_journal_head().
+                * journal_put_journal_head().
                 */
                jh = journal_grab_journal_head(bh);
                if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
        int may_free = 1;
        struct buffer_head *bh = jh2bh(jh);
 
-       __journal_unfile_buffer(jh);
-
        if (jh->b_cp_transaction) {
                JBUFFER_TRACE(jh, "on running+cp transaction");
+               __journal_temp_unlink_buffer(jh);
                /*
                 * We don't want to write the buffer anymore, clear the
                 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
                may_free = 0;
        } else {
                JBUFFER_TRACE(jh, "on running transaction");
-               journal_remove_journal_head(bh);
-               __brelse(bh);
+               __journal_unfile_buffer(jh);
        }
        return may_free;
 }
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
 
        if (jh->b_transaction)
                __journal_temp_unlink_buffer(jh);
+       else
+               journal_grab_journal_head(bh);
        jh->b_transaction = transaction;
 
        switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
  * already started to be used by a subsequent transaction, refile the
  * buffer on that transaction's metadata list.
  *
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
  */
 void __journal_refile_buffer(struct journal_head *jh)
 {
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
 
        was_dirty = test_clear_buffer_jbddirty(bh);
        __journal_temp_unlink_buffer(jh);
+       /*
+        * We set b_transaction here because b_next_transaction will inherit
+        * our jh reference and thus __journal_file_buffer() must not take a
+        * new one.
+        */
        jh->b_transaction = jh->b_next_transaction;
        jh->b_next_transaction = NULL;
        if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
 }
 
 /*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists.  In that case it is up
- * to the caller to remove the journal_head if necessary.  For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __journal_refile_buffer() with necessary locking added. We take our bh
+ * reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
  */
 void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 {
        struct buffer_head *bh = jh2bh(jh);
 
+       /* Get reference so that buffer cannot be freed before we unlock it */
+       get_bh(bh);
        jbd_lock_bh_state(bh);
        spin_lock(&journal->j_list_lock);
-
        __journal_refile_buffer(jh);
        jbd_unlock_bh_state(bh);
-       journal_remove_journal_head(bh);
-
        spin_unlock(&journal->j_list_lock);
        __brelse(bh);
 }
index 2dfa7076e8b601f5197420c6bdbab1780ca77d7f..53792bf36c715d4c7f16c08a4a71a02ebda0eefd 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <linux/types.h>
 #include <linux/magic.h>
+#include <linux/fs.h>
 
 /*
  * The second extended filesystem constants/structures
index 0c473fd79acb4f0ae9ca4b41637fc1e8133b21de..67a803aee619c0b75593e4c740abc35087f28d49 100644 (file)
@@ -418,12 +418,11 @@ struct ext3_inode {
 #define EXT2_MOUNT_DATA_FLAGS          EXT3_MOUNT_DATA_FLAGS
 #endif
 
-#define ext3_set_bit                   __test_and_set_bit_le
+#define ext3_set_bit                   __set_bit_le
 #define ext3_set_bit_atomic            ext2_set_bit_atomic
-#define ext3_clear_bit                 __test_and_clear_bit_le
+#define ext3_clear_bit                 __clear_bit_le
 #define ext3_clear_bit_atomic          ext2_clear_bit_atomic
 #define ext3_test_bit                  test_bit_le
-#define ext3_find_first_zero_bit       find_first_zero_bit_le
 #define ext3_find_next_zero_bit                find_next_zero_bit_le
 
 /*
@@ -913,7 +912,7 @@ extern void ext3_dirty_inode(struct inode *, int);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
 extern int ext3_can_truncate(struct inode *inode);
-extern void ext3_truncate (struct inode *);
+extern void ext3_truncate(struct inode *inode);
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_get_inode_flags(struct ext3_inode_info *);
 extern void ext3_set_aops(struct inode *inode);
index e06965081ba5548f74db935543af84334f58259e..e6a5e34bed4fe64df66592371848e9c66270c569 100644 (file)
@@ -940,7 +940,6 @@ extern int     journal_force_commit(journal_t *);
  */
 struct journal_head *journal_add_journal_head(struct buffer_head *bh);
 struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
-void journal_remove_journal_head(struct buffer_head *bh);
 void journal_put_journal_head(struct journal_head *jh);
 
 /*
index 44e95d0a721f1eb50db7e41df52a15403ddf814d..423cb6d78ee0bc9958d63a592f53c7d5a17fa874 100644 (file)
@@ -45,7 +45,7 @@ struct journal_head {
         * has been cowed
         * [jbd_lock_bh_state()]
         */
-       unsigned b_cow_tid;
+       tid_t b_cow_tid;
 
        /*
         * Copy of the buffer data frozen for writing to the log.
index 9a85412e0db6a4015388bc4ebb2c5466e6b55db0..313b7defc08861e17075b56bdc686f287e67307f 100644 (file)
@@ -415,13 +415,5 @@ struct quota_module_name {
        {QFMT_VFS_V0, "quota_v2"},\
        {0, NULL}}
 
-#else
-
-# /* nodep */ include <sys/cdefs.h>
-
-__BEGIN_DECLS
-long quotactl __P ((unsigned int, const char *, int, caddr_t));
-__END_DECLS
-
 #endif /* __KERNEL__ */
 #endif /* _QUOTA_ */
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h
new file mode 100644 (file)
index 0000000..7b53c05
--- /dev/null
@@ -0,0 +1,864 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ext3
+
+#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXT3_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ext3_free_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        uid_t,  uid                     )
+               __field(        gid_t,  gid                     )
+               __field(        blkcnt_t, blocks                )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->mode   = inode->i_mode;
+               __entry->uid    = inode->i_uid;
+               __entry->gid    = inode->i_gid;
+               __entry->blocks = inode->i_blocks;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->uid, __entry->gid,
+                 (unsigned long) __entry->blocks)
+);
+
+TRACE_EVENT(ext3_request_inode,
+       TP_PROTO(struct inode *dir, int mode),
+
+       TP_ARGS(dir, mode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  dir                     )
+               __field(        umode_t, mode                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = dir->i_sb->s_dev;
+               __entry->dir    = dir->i_ino;
+               __entry->mode   = mode;
+       ),
+
+       TP_printk("dev %d,%d dir %lu mode 0%o",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_allocate_inode,
+       TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+       TP_ARGS(inode, dir, mode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        ino_t,  dir                     )
+               __field(        umode_t, mode                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->dir    = dir->i_ino;
+               __entry->mode   = mode;
+       ),
+
+       TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_evict_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        int,    nlink                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->nlink  = inode->i_nlink;
+       ),
+
+       TP_printk("dev %d,%d ino %lu nlink %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->nlink)
+);
+
+TRACE_EVENT(ext3_drop_inode,
+       TP_PROTO(struct inode *inode, int drop),
+
+       TP_ARGS(inode, drop),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        int,    drop                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->drop   = drop;
+       ),
+
+       TP_printk("dev %d,%d ino %lu drop %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->drop)
+);
+
+TRACE_EVENT(ext3_mark_inode_dirty,
+       TP_PROTO(struct inode *inode, unsigned long IP),
+
+       TP_ARGS(inode, IP),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(unsigned long,  ip                      )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->ip     = IP;
+       ),
+
+       TP_printk("dev %d,%d ino %lu caller %pF",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, (void *)__entry->ip)
+);
+
+TRACE_EVENT(ext3_write_begin,
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int flags),
+
+       TP_ARGS(inode, pos, len, flags),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned int, len               )
+               __field(        unsigned int, flags             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->pos    = pos;
+               __entry->len    = len;
+               __entry->flags  = flags;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(ext3__write_end,
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                       unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned int, len               )
+               __field(        unsigned int, copied            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->pos    = pos;
+               __entry->len    = len;
+               __entry->copied = copied;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DECLARE_EVENT_CLASS(ext3__page_op,
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        pgoff_t, index                  )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->index)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_readpage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_releasepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+TRACE_EVENT(ext3_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset),
+
+       TP_STRUCT__entry(
+               __field(        pgoff_t, index                  )
+               __field(        unsigned long, offset           )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->offset = offset;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->index, __entry->offset)
+);
+
+TRACE_EVENT(ext3_discard_blocks,
+       TP_PROTO(struct super_block *sb, unsigned long blk,
+                       unsigned long count),
+
+       TP_ARGS(sb, blk, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  blk             )
+               __field(        unsigned long,  count           )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->blk    = blk;
+               __entry->count  = count;
+       ),
+
+       TP_printk("dev %d,%d blk %lu count %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blk, __entry->count)
+);
+
+TRACE_EVENT(ext3_request_blocks,
+       TP_PROTO(struct inode *inode, unsigned long goal,
+                unsigned long count),
+
+       TP_ARGS(inode, goal, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        unsigned long, count            )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->count  = count;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->count, __entry->goal)
+);
+
+TRACE_EVENT(ext3_allocate_blocks,
+       TP_PROTO(struct inode *inode, unsigned long goal,
+                unsigned long count, unsigned long block),
+
+       TP_ARGS(inode, goal, count, block),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        unsigned long,  block           )
+               __field(        unsigned long, count            )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->block  = block;
+               __entry->count  = count;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                  __entry->count, __entry->block,
+                 __entry->goal)
+);
+
+TRACE_EVENT(ext3_free_blocks,
+       TP_PROTO(struct inode *inode, unsigned long block,
+                unsigned long count),
+
+       TP_ARGS(inode, block, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        unsigned long,  block           )
+               __field(        unsigned long,  count           )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->mode           = inode->i_mode;
+               __entry->block          = block;
+               __entry->count          = count;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->block, __entry->count)
+);
+
+TRACE_EVENT(ext3_sync_file_enter,
+       TP_PROTO(struct file *file, int datasync),
+
+       TP_ARGS(file, datasync),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        ino_t,  parent                  )
+               __field(        int,    datasync                )
+       ),
+
+       TP_fast_assign(
+               struct dentry *dentry = file->f_path.dentry;
+
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->datasync       = datasync;
+               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+       ),
+
+       TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long) __entry->parent, __entry->datasync)
+);
+
+TRACE_EVENT(ext3_sync_file_exit,
+       TP_PROTO(struct inode *inode, int ret),
+
+       TP_ARGS(inode, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret                     )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ret            = ret;
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
+TRACE_EVENT(ext3_sync_fs,
+       TP_PROTO(struct super_block *sb, int wait),
+
+       TP_ARGS(sb, wait),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    wait                    )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->wait   = wait;
+       ),
+
+       TP_printk("dev %d,%d wait %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->wait)
+);
+
+TRACE_EVENT(ext3_rsv_window_add,
+       TP_PROTO(struct super_block *sb,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(sb, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+       ),
+
+       TP_printk("dev %d,%d start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_discard_reservation,
+       TP_PROTO(struct inode *inode,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(inode, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long)__entry->ino, __entry->start,
+                 __entry->end)
+);
+
+TRACE_EVENT(ext3_alloc_new_reservation,
+       TP_PROTO(struct super_block *sb, unsigned long goal),
+
+       TP_ARGS(sb, goal),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d goal %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->goal)
+);
+
+TRACE_EVENT(ext3_reserved,
+       TP_PROTO(struct super_block *sb, unsigned long block,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(sb, block, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  block           )
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->block  = block;
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+               __entry->dev    = sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d block %lu, start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->block, __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_forget,
+       TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
+
+       TP_ARGS(inode, is_metadata, block),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        int,    is_metadata             )
+               __field(        unsigned long,  block           )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->mode   = inode->i_mode;
+               __entry->is_metadata = is_metadata;
+               __entry->block  = block;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->is_metadata, __entry->block)
+);
+
+TRACE_EVENT(ext3_read_block_bitmap,
+       TP_PROTO(struct super_block *sb, unsigned int group),
+
+       TP_ARGS(sb, group),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        __u32,  group                   )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->group  = group;
+       ),
+
+       TP_printk("dev %d,%d group %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->group)
+);
+
+TRACE_EVENT(ext3_direct_IO_enter,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+       TP_ARGS(inode, offset, len, rw),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->rw)
+);
+
+TRACE_EVENT(ext3_direct_IO_exit,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+                int rw, int ret),
+
+       TP_ARGS(inode, offset, len, rw, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext3_unlink_enter,
+       TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+       TP_ARGS(parent, dentry),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  parent                  )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, size                    )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->parent         = parent->i_ino;
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->size           = dentry->d_inode->i_size;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu size %lld parent %ld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long)__entry->size,
+                 (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext3_unlink_exit,
+       TP_PROTO(struct dentry *dentry, int ret),
+
+       TP_ARGS(dentry, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext3__truncate,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        blkcnt_t,       blocks          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->blocks = inode->i_blocks;
+       ),
+
+       TP_printk("dev %d,%d ino %lu blocks %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+TRACE_EVENT(ext3_get_blocks_enter,
+       TP_PROTO(struct inode *inode, unsigned long lblk,
+                unsigned long len, int create),
+
+       TP_ARGS(inode, lblk, len, create),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  lblk            )
+               __field(        unsigned long,  len             )
+               __field(        int,            create          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->len    = len;
+               __entry->create = create;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->lblk, __entry->len, __entry->create)
+);
+
+TRACE_EVENT(ext3_get_blocks_exit,
+       TP_PROTO(struct inode *inode, unsigned long lblk,
+                unsigned long pblk, unsigned long len, int ret),
+
+       TP_ARGS(inode, lblk, pblk, len, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  lblk            )
+               __field(        unsigned long,  pblk            )
+               __field(        unsigned long,  len             )
+               __field(        int,            ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->pblk   = pblk;
+               __entry->len    = len;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                  __entry->lblk, __entry->pblk,
+                 __entry->len, __entry->ret)
+);
+
+TRACE_EVENT(ext3_load_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino             )
+               __field(        dev_t,  dev             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino)
+);
+
+#endif /* _TRACE_EXT3_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
new file mode 100644 (file)
index 0000000..aff64d8
--- /dev/null
@@ -0,0 +1,203 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd
+
+#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_JBD_H
+
+#include <linux/jbd.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(jbd_checkpoint,
+
+       TP_PROTO(journal_t *journal, int result),
+
+       TP_ARGS(journal, result),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    result                  )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->result         = result;
+       ),
+
+       TP_printk("dev %d,%d result %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->result)
+);
+
+DECLARE_EVENT_CLASS(jbd_commit,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_start_commit,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_locking,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_logging,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+TRACE_EVENT(jbd_drop_transaction,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_end_commit,
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+               __field(        int,    head                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+               __entry->head           = journal->j_tail_sequence;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d head %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit, __entry->head)
+);
+
+TRACE_EVENT(jbd_do_submit_data,
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_cleanup_journal_tail,
+
+       TP_PROTO(journal_t *journal, tid_t first_tid,
+                unsigned long block_nr, unsigned long freed),
+
+       TP_ARGS(journal, first_tid, block_nr, freed),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        tid_t,  tail_sequence           )
+               __field(        tid_t,  first_tid               )
+               __field(unsigned long,  block_nr                )
+               __field(unsigned long,  freed                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->tail_sequence  = journal->j_tail_sequence;
+               __entry->first_tid      = first_tid;
+               __entry->block_nr       = block_nr;
+               __entry->freed          = freed;
+       ),
+
+       TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tail_sequence, __entry->first_tid,
+                 __entry->block_nr, __entry->freed)
+);
+
+TRACE_EVENT(jbd_update_superblock_end,
+       TP_PROTO(journal_t *journal, int wait),
+
+       TP_ARGS(journal, wait),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    wait                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->wait           = wait;
+       ),
+
+       TP_printk("dev %d,%d wait %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->wait)
+);
+
+#endif /* _TRACE_JBD_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>