Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
diff --combined fs/btrfs/ctree.h

index 4bab807227ad938c87039ea2aa1ef8f94fe46146,c38734a07a651627c802838be17ef2a0db73d3dd..0d195b5076604b4350f88de9a58e9f4836fd785a
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -1252,7 -1252,6 +1252,6 @@@ struct btrfs_fs_info 
         atomic_t tree_mod_seq;
         struct list_head tree_mod_seq_list;
         struct seq_list tree_mod_seq_elem;
-       wait_queue_head_t tree_mod_seq_wait;
   
         /* this protects tree_mod_log */
         rwlock_t tree_mod_log_lock;
@@@ -3192,7 -3191,7 +3191,7 @@@ int btrfs_del_csums(struct btrfs_trans_
   int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                           struct bio *bio, u32 *dst);
   int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 logical_offset, u32 *dst);
+                             struct bio *bio, u64 logical_offset);
   int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 objectid, u64 pos,
@@@ -3342,22 -3341,10 +3341,22 @@@ ssize_t btrfs_listxattr(struct dentry *
   /* super.c */
   int btrfs_parse_options(struct btrfs_root *root, char *options);
   int btrfs_sync_fs(struct super_block *sb, int wait);
+ +
+ +#ifdef CONFIG_PRINTK
+ +__printf(2, 3)
   void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
+ +#else
+ +static inline __printf(2, 3)
+ +void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+ +{
+ +}
+ +#endif
+ +
+ +__printf(5, 6)
   void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                      unsigned int line, int errno, const char *fmt, ...);
   
+ +
   void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root, const char *function,
                                unsigned int line, int errno);
@@@ -3398,7 -3385,6 +3397,7 @@@ do {                                                            
                           (errno), fmt, ##args);                \
   } while (0)
   
+ +__printf(5, 6)
   void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                    unsigned int line, int errno, const char *fmt, ...);
   
diff --combined fs/btrfs/disk-io.c

index 62e0cafd6e250d5d1717656d3d5821e2d1bfec42,29c69e60d3b000de3e373c9b33742800771f0109..22e98e04c2eabbc0b4baeb2618033657a9344fb9
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -377,9 -377,13 +377,13 @@@ static int btree_read_extent_buffer_pag
                 ret = read_extent_buffer_pages(io_tree, eb, start,
                                                WAIT_COMPLETE,
                                                btree_get_extent, mirror_num);
-               if (!ret && !verify_parent_transid(io_tree, eb,
+               if (!ret) {
+                       if (!verify_parent_transid(io_tree, eb,
                                                    parent_transid, 0))
-                       break;
+                               break;
+                       else
+                               ret = -EIO;
+               }
   
                 /*
                  * This buffer's crc is fine, but its contents are corrupted, so
@@@ -754,9 -758,7 +758,7 @@@ static void run_one_async_done(struct b
         limit = btrfs_async_submit_limit(fs_info);
         limit = limit * 2 / 3;
   
-       atomic_dec(&fs_info->nr_async_submits);
- 
-       if (atomic_read(&fs_info->nr_async_submits) < limit &&
+       if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
             waitqueue_active(&fs_info->async_submit_wait))
                 wake_up(&fs_info->async_submit_wait);
   
@@@ -1114,7 -1116,7 +1116,7 @@@ void clean_tree_block(struct btrfs_tran
                                 spin_unlock(&root->fs_info->delalloc_lock);
                                 btrfs_panic(root->fs_info, -EOVERFLOW,
                                           "Can't clear %lu bytes from "
- -                                        " dirty_mdatadata_bytes (%lu)",
+ +                                        " dirty_mdatadata_bytes (%llu)",
                                           buf->len,
                                           root->fs_info->dirty_metadata_bytes);
                         }
@@@ -1614,6 -1616,8 +1616,6 @@@ static int cleaner_kthread(void *arg
         struct btrfs_root *root = arg;
   
         do {
- -              vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
- -
                 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
                     mutex_trylock(&root->fs_info->cleaner_mutex)) {
                         btrfs_run_delayed_iputs(root);
@@@ -1645,6 -1649,7 +1647,6 @@@ static int transaction_kthread(void *ar
         do {
                 cannot_commit = false;
                 delay = HZ * 30;
- -              vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                 mutex_lock(&root->fs_info->transaction_kthread_mutex);
   
                 spin_lock(&root->fs_info->trans_lock);
@@@ -2032,8 -2037,6 +2034,6 @@@ int open_ctree(struct super_block *sb
         fs_info->free_chunk_space = 0;
         fs_info->tree_mod_log = RB_ROOT;
   
-       init_waitqueue_head(&fs_info->tree_mod_seq_wait);
- 
         /* readahead state */
         INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
         spin_lock_init(&fs_info->reada_lock);
@@@ -2528,8 -2531,7 +2528,7 @@@ retry_root_backup
                 goto fail_trans_kthread;
   
         /* do not make disk changes in broken FS */
-       if (btrfs_super_log_root(disk_super) != 0 &&
-           !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
+       if (btrfs_super_log_root(disk_super) != 0) {
                 u64 bytenr = btrfs_super_log_root(disk_super);
   
                 if (fs_devices->rw_devices == 0) {
@@@ -3189,30 -3191,14 +3188,14 @@@ int close_ctree(struct btrfs_root *root
         /* clear out the rbtree of defraggable inodes */
         btrfs_run_defrag_inodes(fs_info);
   
-       /*
-        * Here come 2 situations when btrfs is broken to flip readonly:
-        *
-        * 1. when btrfs flips readonly somewhere else before
-        * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
-        * and btrfs will skip to write sb directly to keep
-        * ERROR state on disk.
-        *
-        * 2. when btrfs flips readonly just in btrfs_commit_super,
-        * and in such case, btrfs cannot write sb via btrfs_commit_super,
-        * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
-        * btrfs will cleanup all FS resources first and write sb then.
-        */
         if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                 ret = btrfs_commit_super(root);
                 if (ret)
                         printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
         }
   
-       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-               ret = btrfs_error_commit_super(root);
-               if (ret)
-                       printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
-       }
+       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+               btrfs_error_commit_super(root);
   
         btrfs_put_block_group_cache(fs_info);
   
@@@ -3434,18 -3420,11 +3417,11 @@@ static int btrfs_check_super_valid(stru
         if (read_only)
                 return 0;
   
-       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-               printk(KERN_WARNING "warning: mount fs with errors, "
-                      "running btrfsck is recommended\n");
-       }
- 
         return 0;
   }
   
- int btrfs_error_commit_super(struct btrfs_root *root)
+ void btrfs_error_commit_super(struct btrfs_root *root)
   {
-       int ret;
- 
         mutex_lock(&root->fs_info->cleaner_mutex);
         btrfs_run_delayed_iputs(root);
         mutex_unlock(&root->fs_info->cleaner_mutex);
@@@ -3455,10 -3434,6 +3431,6 @@@
   
         /* cleanup FS via transaction */
         btrfs_cleanup_transaction(root);
- 
-       ret = write_ctree_super(NULL, root, 0);
- 
-       return ret;
   }
   
   static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@@ -3782,14 -3757,17 +3754,17 @@@ int btrfs_cleanup_transaction(struct bt
                 /* FIXME: cleanup wait for commit */
                 t->in_commit = 1;
                 t->blocked = 1;
+               smp_mb();
                 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
                         wake_up(&root->fs_info->transaction_blocked_wait);
   
                 t->blocked = 0;
+               smp_mb();
                 if (waitqueue_active(&root->fs_info->transaction_wait))
                         wake_up(&root->fs_info->transaction_wait);
   
                 t->commit_done = 1;
+               smp_mb();
                 if (waitqueue_active(&t->commit_wait))
                         wake_up(&t->commit_wait);
   
diff --combined fs/btrfs/extent_io.c

index 45c81bb4ac820323c7fd4282a0cf71f7cc2d190d,49085f2336d2669c2fbfcbdb4ec1b532ab67868e..4c878476bb91ce0985dabc25464622442aaca54a
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -929,8 -929,7 +929,8 @@@ int set_extent_bit(struct extent_io_tre
   
   
   /**
- - * convert_extent - convert all bits in a given range from one bit to another
+ + * convert_extent_bit - convert all bits in a given range from one bit to
+ + *                    another
    * @tree:     the io tree to search
    * @start:    the start offset in bytes
    * @end:      the end offset in bytes (inclusive)
@@@ -2330,23 -2329,10 +2330,10 @@@ static void end_bio_extent_readpage(str
                 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                         ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                               state, mirror);
-                       if (ret) {
-                               /* no IO indicated but software detected errors
-                                * in the block, either checksum errors or
-                                * issues with the contents */
-                               struct btrfs_root *root =
-                                       BTRFS_I(page->mapping->host)->root;
-                               struct btrfs_device *device;
- 
+                       if (ret)
                                 uptodate = 0;
-                               device = btrfs_find_device_for_logical(
-                                               root, start, mirror);
-                               if (device)
-                                       btrfs_dev_stat_inc_and_print(device,
-                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
-                       } else {
+                       else
                                 clean_io_failure(start, page);
-                       }
                 }
   
                 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --combined fs/btrfs/inode.c

index 6e8f416773d4b221713c30079eb1de1f732e5ded,6ba80b9028771495e66e3c18a861d9e96e1bb001..ec154f95464696cfa7df3cd6ab87ac4a0185ed03
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -324,8 -324,7 +324,8 @@@ static noinline int add_async_extent(st
    * If this code finds it can't get good compression, it puts an
    * entry onto the work queue to write the uncompressed bytes.  This
    * makes sure that both compressed inodes and uncompressed inodes
- - * are written in the same order that pdflush sent them down.
+ + * are written in the same order that the flusher thread sent them
+ + * down.
    */
   static noinline int compress_file_range(struct inode *inode,
                                         struct page *locked_page,
@@@ -1008,9 -1007,7 +1008,7 @@@ static noinline void async_cow_submit(s
         nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
                 PAGE_CACHE_SHIFT;
   
-       atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
- 
-       if (atomic_read(&root->fs_info->async_delalloc_pages) <
+       if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
             5 * 1024 * 1024 &&
             waitqueue_active(&root->fs_info->async_submit_wait))
                 wake_up(&root->fs_info->async_submit_wait);
@@@ -1885,8 -1882,11 +1883,11 @@@ static int btrfs_finish_ordered_io(stru
                                 trans = btrfs_join_transaction_nolock(root);
                         else
                                 trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans))
-                               return PTR_ERR(trans);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               trans = NULL;
+                               goto out;
+                       }
                         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                         ret = btrfs_update_inode_fallback(trans, root, inode);
                         if (ret) /* -ENOMEM or corruption */
@@@ -3174,7 -3174,7 +3175,7 @@@ int btrfs_unlink_subvol(struct btrfs_tr
         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
         inode_inc_iversion(dir);
         dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-       ret = btrfs_update_inode(trans, root, dir);
+       ret = btrfs_update_inode_fallback(trans, root, dir);
         if (ret)
                 btrfs_abort_transaction(trans, root, ret);
   out:
@@@ -4249,7 -4249,7 +4250,7 @@@ static void btrfs_dentry_release(struc
   }
   
   static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
- -                                 struct nameidata *nd)
+ +                                 unsigned int flags)
   {
         struct dentry *ret;
   
@@@ -4903,7 -4903,7 +4904,7 @@@ out_unlock
   }
   
   static int btrfs_create(struct inode *dir, struct dentry *dentry,
- -                      umode_t mode, struct nameidata *nd)
+ +                      umode_t mode, bool excl)
   {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(dir)->root;
@@@ -5774,18 -5774,112 +5775,112 @@@ out
         return ret;
   }
   
+ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
+                             struct extent_state **cached_state, int writing)
+ {
+       struct btrfs_ordered_extent *ordered;
+       int ret = 0;
+ 
+       while (1) {
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                0, cached_state);
+               /*
+                * We're concerned with the entire range that we're going to be
+                * doing DIO to, so we need to make sure theres no ordered
+                * extents in this range.
+                */
+               ordered = btrfs_lookup_ordered_range(inode, lockstart,
+                                                    lockend - lockstart + 1);
+ 
+               /*
+                * We need to make sure there are no buffered pages in this
+                * range either, we could have raced between the invalidate in
+                * generic_file_direct_write and locking the extent.  The
+                * invalidate needs to happen so that reads after a write do not
+                * get stale data.
+                */
+               if (!ordered && (!writing ||
+                   !test_range_bit(&BTRFS_I(inode)->io_tree,
+                                   lockstart, lockend, EXTENT_UPTODATE, 0,
+                                   *cached_state)))
+                       break;
+ 
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                    cached_state, GFP_NOFS);
+ 
+               if (ordered) {
+                       btrfs_start_ordered_extent(inode, ordered, 1);
+                       btrfs_put_ordered_extent(ordered);
+               } else {
+                       /* Screw you mmap */
+                       ret = filemap_write_and_wait_range(inode->i_mapping,
+                                                          lockstart,
+                                                          lockend);
+                       if (ret)
+                               break;
+ 
+                       /*
+                        * If we found a page that couldn't be invalidated just
+                        * fall back to buffered.
+                        */
+                       ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                       lockstart >> PAGE_CACHE_SHIFT,
+                                       lockend >> PAGE_CACHE_SHIFT);
+                       if (ret)
+                               break;
+               }
+ 
+               cond_resched();
+       }
+ 
+       return ret;
+ }
+ 
   static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
   {
         struct extent_map *em;
         struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_state *cached_state = NULL;
         u64 start = iblock << inode->i_blkbits;
+       u64 lockstart, lockend;
         u64 len = bh_result->b_size;
         struct btrfs_trans_handle *trans;
+       int unlock_bits = EXTENT_LOCKED;
+       int ret;
+ 
+       if (create) {
+               ret = btrfs_delalloc_reserve_space(inode, len);
+               if (ret)
+                       return ret;
+               unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+       } else {
+               len = min_t(u64, len, root->sectorsize);
+       }
+ 
+       lockstart = start;
+       lockend = start + len - 1;
+ 
+       /*
+        * If this errors out it's because we couldn't invalidate pagecache for
+        * this range and we need to fallback to buffered.
+        */
+       if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
+               return -ENOTBLK;
+ 
+       if (create) {
+               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                    lockend, EXTENT_DELALLOC, NULL,
+                                    &cached_state, GFP_NOFS);
+               if (ret)
+                       goto unlock_err;
+       }
   
         em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-       if (IS_ERR(em))
-               return PTR_ERR(em);
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto unlock_err;
+       }
   
         /*
          * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@@ -5804,17 -5898,16 +5899,16 @@@
         if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
             em->block_start == EXTENT_MAP_INLINE) {
                 free_extent_map(em);
-               return -ENOTBLK;
+               ret = -ENOTBLK;
+               goto unlock_err;
         }
   
         /* Just a good old fashioned hole, return */
         if (!create && (em->block_start == EXTENT_MAP_HOLE ||
                         test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
                 free_extent_map(em);
-               /* DIO will do one hole at a time, so just unlock a sector */
-               unlock_extent(&BTRFS_I(inode)->io_tree, start,
-                             start + root->sectorsize - 1);
-               return 0;
+               ret = 0;
+               goto unlock_err;
         }
   
         /*
@@@ -5827,8 -5920,9 +5921,9 @@@
          *
          */
         if (!create) {
-               len = em->len - (start - em->start);
-               goto map;
+               len = min(len, em->len - (start - em->start));
+               lockstart = start + len;
+               goto unlock;
         }
   
         if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@@ -5860,7 -5954,7 +5955,7 @@@
                         btrfs_end_transaction(trans, root);
                         if (ret) {
                                 free_extent_map(em);
-                               return ret;
+                               goto unlock_err;
                         }
                         goto unlock;
                 }
@@@ -5873,14 -5967,12 +5968,12 @@@ must_cow
          */
         len = bh_result->b_size;
         em = btrfs_new_extent_direct(inode, em, start, len);
-       if (IS_ERR(em))
-               return PTR_ERR(em);
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto unlock_err;
+       }
         len = min(len, em->len - (start - em->start));
   unlock:
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
-                         EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
-                         0, NULL, GFP_NOFS);
- map:
         bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
                 inode->i_blkbits;
         bh_result->b_size = len;
@@@ -5898,9 -5990,44 +5991,44 @@@
                         i_size_write(inode, start + len);
         }
   
+       /*
+        * In the case of write we need to clear and unlock the entire range,
+        * in the case of read we need to unlock only the end area that we
+        * aren't using if there is any left over space.
+        */
+       if (lockstart < lockend) {
+               if (create && len < lockend - lockstart) {
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                        lockstart + len - 1, unlock_bits, 1, 0,
+                                        &cached_state, GFP_NOFS);
+                       /*
+                        * Beside unlock, we also need to cleanup reserved space
+                        * for the left range by attaching EXTENT_DO_ACCOUNTING.
+                        */
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                                        lockstart + len, lockend,
+                                        unlock_bits | EXTENT_DO_ACCOUNTING,
+                                        1, 0, NULL, GFP_NOFS);
+               } else {
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                        lockend, unlock_bits, 1, 0,
+                                        &cached_state, GFP_NOFS);
+               }
+       } else {
+               free_extent_state(cached_state);
+       }
+ 
         free_extent_map(em);
   
         return 0;
+ 
+ unlock_err:
+       if (create)
+               unlock_bits |= EXTENT_DO_ACCOUNTING;
+ 
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                        unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+       return ret;
   }
   
   struct btrfs_dio_private {
@@@ -5908,7 -6035,6 +6036,6 @@@
         u64 logical_offset;
         u64 disk_bytenr;
         u64 bytes;
-       u32 *csums;
         void *private;
   
         /* number of bios pending for this dio */
@@@ -5928,7 -6054,6 +6055,6 @@@ static void btrfs_endio_direct_read(str
         struct inode *inode = dip->inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 start;
-       u32 *private = dip->csums;
   
         start = dip->logical_offset;
         do {
@@@ -5936,8 -6061,12 +6062,12 @@@
                         struct page *page = bvec->bv_page;
                         char *kaddr;
                         u32 csum = ~(u32)0;
+                       u64 private = ~(u32)0;
                         unsigned long flags;
   
+                       if (get_state_private(&BTRFS_I(inode)->io_tree,
+                                             start, &private))
+                               goto failed;
                         local_irq_save(flags);
                         kaddr = kmap_atomic(page);
                         csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@@ -5947,18 -6076,18 +6077,18 @@@
                         local_irq_restore(flags);
   
                         flush_dcache_page(bvec->bv_page);
-                       if (csum != *private) {
+                       if (csum != private) {
+ failed:
                                 printk(KERN_ERR "btrfs csum failed ino %llu off"
                                       " %llu csum %u private %u\n",
                                       (unsigned long long)btrfs_ino(inode),
                                       (unsigned long long)start,
-                                     csum, *private);
+                                     csum, (unsigned)private);
                                 err = -EIO;
                         }
                 }
   
                 start += bvec->bv_len;
-               private++;
                 bvec++;
         } while (bvec <= bvec_end);
   
@@@ -5966,7 -6095,6 +6096,6 @@@
                       dip->logical_offset + dip->bytes - 1);
         bio->bi_private = dip->private;
   
-       kfree(dip->csums);
         kfree(dip);
   
         /* If we had a csum failure make sure to clear the uptodate flag */
@@@ -6072,7 -6200,7 +6201,7 @@@ static struct bio *btrfs_dio_bio_alloc(
   
   static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                          int rw, u64 file_offset, int skip_sum,
-                                        u32 *csums, int async_submit)
+                                        int async_submit)
   {
         int write = rw & REQ_WRITE;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@@ -6105,8 -6233,7 +6234,7 @@@
                 if (ret)
                         goto err;
         } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
-                                         file_offset, csums);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
                 if (ret)
                         goto err;
         }
@@@ -6132,10 -6259,8 +6260,8 @@@ static int btrfs_submit_direct_hook(in
         u64 submit_len = 0;
         u64 map_length;
         int nr_pages = 0;
-       u32 *csums = dip->csums;
         int ret = 0;
         int async_submit = 0;
-       int write = rw & REQ_WRITE;
   
         map_length = orig_bio->bi_size;
         ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@@ -6171,16 -6296,13 +6297,13 @@@
                         atomic_inc(&dip->pending_bios);
                         ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                      file_offset, skip_sum,
-                                                    csums, async_submit);
+                                                    async_submit);
                         if (ret) {
                                 bio_put(bio);
                                 atomic_dec(&dip->pending_bios);
                                 goto out_err;
                         }
   
-                       /* Write's use the ordered csums */
-                       if (!write && !skip_sum)
-                               csums = csums + nr_pages;
                         start_sector += submit_len >> 9;
                         file_offset += submit_len;
   
@@@ -6210,7 -6332,7 +6333,7 @@@
   
   submit:
         ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                    csums, async_submit);
+                                    async_submit);
         if (!ret)
                 return 0;
   
@@@ -6246,17 -6368,6 +6369,6 @@@ static void btrfs_submit_direct(int rw
                 ret = -ENOMEM;
                 goto free_ordered;
         }
-       dip->csums = NULL;
- 
-       /* Write's use the ordered csum stuff, so we don't need dip->csums */
-       if (!write && !skip_sum) {
-               dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
-               if (!dip->csums) {
-                       kfree(dip);
-                       ret = -ENOMEM;
-                       goto free_ordered;
-               }
-       }
   
         dip->private = bio->bi_private;
         dip->inode = inode;
@@@ -6341,132 -6452,22 +6453,22 @@@ static ssize_t check_direct_IO(struct b
   out:
         return retval;
   }
+ 
   static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                         const struct iovec *iov, loff_t offset,
                         unsigned long nr_segs)
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
-       struct btrfs_ordered_extent *ordered;
-       struct extent_state *cached_state = NULL;
-       u64 lockstart, lockend;
-       ssize_t ret;
-       int writing = rw & WRITE;
-       int write_bits = 0;
-       size_t count = iov_length(iov, nr_segs);
   
         if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-                           offset, nr_segs)) {
+                           offset, nr_segs))
                 return 0;
-       }
- 
-       lockstart = offset;
-       lockend = offset + count - 1;
   
-       if (writing) {
-               ret = btrfs_delalloc_reserve_space(inode, count);
-               if (ret)
-                       goto out;
-       }
- 
-       while (1) {
-               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, &cached_state);
-               /*
-                * We're concerned with the entire range that we're going to be
-                * doing DIO to, so we need to make sure theres no ordered
-                * extents in this range.
-                */
-               ordered = btrfs_lookup_ordered_range(inode, lockstart,
-                                                    lockend - lockstart + 1);
- 
-               /*
-                * We need to make sure there are no buffered pages in this
-                * range either, we could have raced between the invalidate in
-                * generic_file_direct_write and locking the extent.  The
-                * invalidate needs to happen so that reads after a write do not
-                * get stale data.
-                */
-               if (!ordered && (!writing ||
-                   !test_range_bit(&BTRFS_I(inode)->io_tree,
-                                   lockstart, lockend, EXTENT_UPTODATE, 0,
-                                   cached_state)))
-                       break;
- 
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    &cached_state, GFP_NOFS);
- 
-               if (ordered) {
-                       btrfs_start_ordered_extent(inode, ordered, 1);
-                       btrfs_put_ordered_extent(ordered);
-               } else {
-                       /* Screw you mmap */
-                       ret = filemap_write_and_wait_range(file->f_mapping,
-                                                          lockstart,
-                                                          lockend);
-                       if (ret)
-                               goto out;
- 
-                       /*
-                        * If we found a page that couldn't be invalidated just
-                        * fall back to buffered.
-                        */
-                       ret = invalidate_inode_pages2_range(file->f_mapping,
-                                       lockstart >> PAGE_CACHE_SHIFT,
-                                       lockend >> PAGE_CACHE_SHIFT);
-                       if (ret) {
-                               if (ret == -EBUSY)
-                                       ret = 0;
-                               goto out;
-                       }
-               }
- 
-               cond_resched();
-       }
- 
-       /*
-        * we don't use btrfs_set_extent_delalloc because we don't want
-        * the dirty or uptodate bits
-        */
-       if (writing) {
-               write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
-               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                    EXTENT_DELALLOC, NULL, &cached_state,
-                                    GFP_NOFS);
-               if (ret) {
-                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-                                        lockend, EXTENT_LOCKED | write_bits,
-                                        1, 0, &cached_state, GFP_NOFS);
-                       goto out;
-               }
-       }
- 
-       free_extent_state(cached_state);
-       cached_state = NULL;
- 
-       ret = __blockdev_direct_IO(rw, iocb, inode,
+       return __blockdev_direct_IO(rw, iocb, inode,
                    BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
                    iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
                    btrfs_submit_direct, 0);
- 
-       if (ret < 0 && ret != -EIOCBQUEUED) {
-               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
-                             offset + iov_length(iov, nr_segs) - 1,
-                             EXTENT_LOCKED | write_bits, 1, 0,
-                             &cached_state, GFP_NOFS);
-       } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
-               /*
-                * We're falling back to buffered, unlock the section we didn't
-                * do IO on.
-                */
-               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
-                             offset + iov_length(iov, nr_segs) - 1,
-                             EXTENT_LOCKED | write_bits, 1, 0,
-                             &cached_state, GFP_NOFS);
-       }
- out:
-       free_extent_state(cached_state);
-       return ret;
   }
   
   static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@@ -6630,7 -6631,6 +6632,7 @@@ int btrfs_page_mkwrite(struct vm_area_s
         u64 page_start;
         u64 page_end;
   
+ +      sb_start_pagefault(inode->i_sb);
         ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
         if (!ret) {
                 ret = file_update_time(vma->vm_file);
@@@ -6720,15 -6720,12 +6722,15 @@@ again
         unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
   
   out_unlock:
- -      if (!ret)
+ +      if (!ret) {
+ +              sb_end_pagefault(inode->i_sb);
                 return VM_FAULT_LOCKED;
+ +      }
         unlock_page(page);
   out:
         btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
   out_noreserve:
+ +      sb_end_pagefault(inode->i_sb);
         return ret;
   }
   
@@@ -7000,7 -6997,7 +7002,7 @@@ void btrfs_destroy_inode(struct inode *
         struct btrfs_ordered_extent *ordered;
         struct btrfs_root *root = BTRFS_I(inode)->root;
   
- -      WARN_ON(!list_empty(&inode->i_dentry));
+ +      WARN_ON(!hlist_empty(&inode->i_dentry));
         WARN_ON(inode->i_data.nrpages);
         WARN_ON(BTRFS_I(inode)->outstanding_extents);
         WARN_ON(BTRFS_I(inode)->reserved_extents);
diff --combined fs/btrfs/ioctl.c

index 7bb755677a220f71fd0540932c0c19565aec6a23,a1fbca0a10030b201d9bf8bcd1fdf661a02edffa..9df50fa8a0781ba387553297fbe442ed964e671e
--- 1/fs/btrfs/ioctl.c
--- 2/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@@ -195,10 -195,6 +195,10 @@@ static int btrfs_ioctl_setflags(struct 
         if (!inode_owner_or_capable(inode))
                 return -EACCES;
   
+ +      ret = mnt_want_write_file(file);
+ +      if (ret)
+ +              return ret;
+ +
         mutex_lock(&inode->i_mutex);
   
         ip_oldflags = ip->flags;
@@@ -213,6 -209,10 +213,6 @@@
                 }
         }
   
- -      ret = mnt_want_write_file(file);
- -      if (ret)
- -              goto out_unlock;
- -
         if (flags & FS_SYNC_FL)
                 ip->flags |= BTRFS_INODE_SYNC;
         else
@@@ -275,9 -275,9 +275,9 @@@
                 inode->i_flags = i_oldflags;
         }
   
- -      mnt_drop_write_file(file);
    out_unlock:
         mutex_unlock(&inode->i_mutex);
+ +      mnt_drop_write_file(file);
         return ret;
   }
   
@@@ -424,7 -424,7 +424,7 @@@ static noinline int create_subvol(struc
         uuid_le_gen(&new_uuid);
         memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
         root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
-       root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+       root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
         root_item.ctime = root_item.otime;
         btrfs_set_root_ctransid(&root_item, trans->transid);
         btrfs_set_root_otransid(&root_item, trans->transid);
diff --combined fs/btrfs/super.c

index f2eb24c477a3ca1c60ee95b51040354dd5a869ba,073c2368f45974651b98a78888028ba6078317ef..83d6f9f9c2209861efdec86dec9ad54d629deeb9
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -100,6 -100,10 +100,6 @@@ static void __save_error_info(struct bt
         fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
   }
   
- -/* NOTE:
- - *    We move write_super stuff at umount in order to avoid deadlock
- - *    for umount hold all lock.
- - */
   static void save_error_info(struct btrfs_fs_info *fs_info)
   {
         __save_error_info(fs_info);
@@@ -121,7 -125,6 +121,7 @@@ static void btrfs_handle_error(struct b
         }
   }
   
+ +#ifdef CONFIG_PRINTK
   /*
    * __btrfs_std_error decodes expected errors from the caller and
    * invokes the approciate error response.
@@@ -164,7 -167,7 +164,7 @@@ void __btrfs_std_error(struct btrfs_fs_
         va_end(args);
   }
   
- -const char *logtypes[] = {
+ +static const char * const logtypes[] = {
         "emergency",
         "alert",
         "critical",
@@@ -182,49 -185,21 +182,49 @@@ void btrfs_printk(struct btrfs_fs_info 
         struct va_format vaf;
         va_list args;
         const char *type = logtypes[4];
+ +      int kern_level;
   
         va_start(args, fmt);
   
- -      if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
- -              memcpy(lvl, fmt, 3);
- -              lvl[3] = '\0';
- -              fmt += 3;
- -              type = logtypes[fmt[1] - '0'];
+ +      kern_level = printk_get_level(fmt);
+ +      if (kern_level) {
+ +              size_t size = printk_skip_level(fmt) - fmt;
+ +              memcpy(lvl, fmt,  size);
+ +              lvl[size] = '\0';
+ +              fmt += size;
+ +              type = logtypes[kern_level - '0'];
         } else
                 *lvl = '\0';
   
         vaf.fmt = fmt;
         vaf.va = &args;
+ +
         printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
+ +
+ +      va_end(args);
+ +}
+ +
+ +#else
+ +
+ +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ +                     unsigned int line, int errno, const char *fmt, ...)
+ +{
+ +      struct super_block *sb = fs_info->sb;
+ +
+ +      /*
+ +       * Special case: if the error is EROFS, and we're already
+ +       * under MS_RDONLY, then it is safe here.
+ +       */
+ +      if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+ +              return;
+ +
+ +      /* Don't go through full error handling during mount */
+ +      if (sb->s_flags & MS_BORN) {
+ +              save_error_info(fs_info);
+ +              btrfs_handle_error(fs_info);
+ +      }
   }
+ +#endif
   
   /*
    * We only mark the transaction aborted and then set the file system read-only.
@@@ -838,7 -813,6 +838,6 @@@ int btrfs_sync_fs(struct super_block *s
         struct btrfs_trans_handle *trans;
         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
         struct btrfs_root *root = fs_info->tree_root;
-       int ret;
   
         trace_btrfs_sync_fs(wait);
   
@@@ -849,11 -823,17 +848,17 @@@
   
         btrfs_wait_ordered_extents(root, 0, 0);
   
-       trans = btrfs_start_transaction(root, 0);
+       spin_lock(&fs_info->trans_lock);
+       if (!fs_info->running_transaction) {
+               spin_unlock(&fs_info->trans_lock);
+               return 0;
+       }
+       spin_unlock(&fs_info->trans_lock);
+ 
+       trans = btrfs_join_transaction(root);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       return ret;
+       return btrfs_commit_transaction(trans, root);
   }
   
   static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@@ -1101,8 -1081,7 +1106,8 @@@ static struct dentry *btrfs_mount(struc
         }
   
         bdev = fs_devices->latest_bdev;
- -      s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info);
+ +      s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
+ +               fs_info);
         if (IS_ERR(s)) {
                 error = PTR_ERR(s);
                 goto error_close_devices;
@@@ -1116,6 -1095,7 +1121,6 @@@
         } else {
                 char b[BDEVNAME_SIZE];
   
- -              s->s_flags = flags | MS_NOSEC;
                 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                 btrfs_sb(s)->bdev_holder = fs_type;
                 error = btrfs_fill_super(s, fs_devices, data,
@@@ -1530,6 -1510,8 +1535,8 @@@ static int btrfs_show_devname(struct se
         while (cur_devices) {
                 head = &cur_devices->devices;
                 list_for_each_entry(dev, head, dev_list) {
+                       if (dev->missing)
+                               continue;
                         if (!first_dev || dev->devid < first_dev->devid)
                                 first_dev = dev;
                 }
diff --combined fs/btrfs/transaction.c

index 17be3dedacbab1c47084270153ed059719984470,3ee8d58e97ad788d34d12ee455ef3348362525a6..27c26004e050a33211674363cfd80c02c98d1063
--- 1/fs/btrfs/transaction.c
--- 2/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@@ -335,8 -335,6 +335,8 @@@ again
         if (!h)
                 return ERR_PTR(-ENOMEM);
   
+ +      sb_start_intwrite(root->fs_info->sb);
+ +
         if (may_wait_transaction(root, type))
                 wait_current_trans(root);
   
@@@ -347,7 -345,6 +347,7 @@@
         } while (ret == -EBUSY);
   
         if (ret < 0) {
+ +              sb_end_intwrite(root->fs_info->sb);
                 kmem_cache_free(btrfs_trans_handle_cachep, h);
                 return ERR_PTR(ret);
         }
@@@ -551,8 -548,6 +551,8 @@@ static int __btrfs_end_transaction(stru
         btrfs_trans_release_metadata(trans, root);
         trans->block_rsv = NULL;
   
+ +      sb_end_intwrite(root->fs_info->sb);
+ +
         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
             should_end_transaction(trans, root)) {
                 trans->transaction->blocked = 1;
@@@ -1031,6 -1026,7 +1031,7 @@@ static noinline int create_pending_snap
   
         btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                          dentry->d_name.len * 2);
+       parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
         ret = btrfs_update_inode(trans, parent_root, parent_inode);
         if (ret)
                 goto abort_trans_dput;
@@@ -1066,7 -1062,7 +1067,7 @@@
         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
                         BTRFS_UUID_SIZE);
         new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
-       new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+       new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
         btrfs_set_root_otransid(new_root_item, trans->transid);
         memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
         memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
@@@ -1583,8 -1579,6 +1584,8 @@@ int btrfs_commit_transaction(struct btr
         put_transaction(cur_trans);
         put_transaction(cur_trans);
   
+ +      sb_end_intwrite(root->fs_info->sb);
+ +
         trace_btrfs_transaction_commit(root);
   
         btrfs_scrub_continue(root);
diff --combined fs/btrfs/volumes.c

index e86ae04abe6a78e72dd86b3a813bce3107a8802e,3f4e70e171ed440513a58204492d90074175c3d8..88b969aeeb71a53128ae941e569ad19f7b1038c3
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -227,9 -227,8 +227,8 @@@ loop_lock
                 cur = pending;
                 pending = pending->bi_next;
                 cur->bi_next = NULL;
-               atomic_dec(&fs_info->nr_async_bios);
   
-               if (atomic_read(&fs_info->nr_async_bios) < limit &&
+               if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
                     waitqueue_active(&fs_info->async_submit_wait))
                         wake_up(&fs_info->async_submit_wait);
   
@@@ -569,9 -568,11 +568,11 @@@ static int __btrfs_close_devices(struc
                 memcpy(new_device, device, sizeof(*new_device));
   
                 /* Safe because we are under uuid_mutex */
-               name = rcu_string_strdup(device->name->str, GFP_NOFS);
-               BUG_ON(device->name && !name); /* -ENOMEM */
-               rcu_assign_pointer(new_device->name, name);
+               if (device->name) {
+                       name = rcu_string_strdup(device->name->str, GFP_NOFS);
+                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       rcu_assign_pointer(new_device->name, name);
+               }
                 new_device->bdev = NULL;
                 new_device->writeable = 0;
                 new_device->in_fs_metadata = 0;
@@@ -1744,6 -1745,10 +1745,6 @@@ int btrfs_init_new_device(struct btrfs_
   
         device->fs_devices = root->fs_info->fs_devices;
   
- -      /*
- -       * we don't want write_supers to jump in here with our device
- -       * half setup
- -       */
         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
         list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
         list_add(&device->dev_alloc_list,
@@@ -4605,28 -4610,6 +4606,6 @@@ int btrfs_read_sys_array(struct btrfs_r
         return ret;
   }
   
- struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
-                                                  u64 logical, int mirror_num)
- {
-       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-       int ret;
-       u64 map_length = 0;
-       struct btrfs_bio *bbio = NULL;
-       struct btrfs_device *device;
- 
-       BUG_ON(mirror_num == 0);
-       ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
-                             mirror_num);
-       if (ret) {
-               BUG_ON(bbio != NULL);
-               return NULL;
-       }
-       BUG_ON(mirror_num != bbio->mirror_num);
-       device = bbio->stripes[mirror_num - 1].dev;
-       kfree(bbio);
-       return device;
- }
- 
   int btrfs_read_chunk_tree(struct btrfs_root *root)
   {
         struct btrfs_path *path;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 29 Aug 2012 18:36:22 +0000 (11:36 -0700)
		1	2
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/transaction.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history