Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 5 Jul 2012 20:06:25 +0000 (13:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 5 Jul 2012 20:06:25 +0000 (13:06 -0700)
Pull btrfs updates from Chris Mason:
 "I held off on my rc5 pull because I hit an oops during log recovery
  after a crash.  I wanted to make sure it wasn't a regression because
  we have some logging fixes in here.

  It turns out that a commit during the merge window just made it much
  more likely to trigger directory logging instead of full commits,
  which exposed an old bug.

  The new backref walking code got some additional fixes.  This should
  be the final set of them.

  Josef fixed up a corner where our O_DIRECT writes and buffered reads
  could expose old file contents (not stale, just not the most recent).
  He and Liu Bo fixed crashes during tree log recover as well.

  Ilya fixed errors while we resume disk balancing operations on
  readonly mounts."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: run delayed directory updates during log replay
  Btrfs: hold a ref on the inode during writepages
  Btrfs: fix tree log remove space corner case
  Btrfs: fix wrong check during log recovery
  Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS
  Btrfs: resume balance on rw (re)mounts properly
  Btrfs: restore restriper state on all mounts
  Btrfs: fix dio write vs buffered read race
  Btrfs: don't count I/O statistic read errors for missing devices
  Btrfs: resolve tree mod log locking issue in btrfs_next_leaf
  Btrfs: fix tree mod log rewind of ADD operations
  Btrfs: leave critical region in btrfs_find_all_roots as soon as possible
  Btrfs: always put insert_ptr modifications into the tree mod log
  Btrfs: fix tree mod log for root replacements at leaf level
  Btrfs: support root level changes in __resolve_indirect_ref
  Btrfs: avoid waiting for delayed refs when we must not

13 files changed:
fs/btrfs/backref.c
fs/btrfs/ctree.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.h
fs/btrfs/super.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 7301cdb4b2cb3cf6c5d8626b2275a85f88d4f55f..a383c18e74e86eebaa847d756e3493e7ca3c9bfd 100644 (file)
@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
 
        eb = path->nodes[level];
-       if (!eb) {
-               WARN_ON(1);
-               ret = 1;
-               goto out;
+       while (!eb) {
+               if (!level) {
+                       WARN_ON(1);
+                       ret = 1;
+                       goto out;
+               }
+               level--;
+               eb = path->nodes[level];
        }
 
        ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
@@ -835,6 +839,7 @@ again:
                        }
                        ret = __add_delayed_refs(head, delayed_ref_seq,
                                                 &prefs_delayed);
+                       mutex_unlock(&head->mutex);
                        if (ret) {
                                spin_unlock(&delayed_refs->lock);
                                goto out;
@@ -928,8 +933,6 @@ again:
        }
 
 out:
-       if (head)
-               mutex_unlock(&head->mutex);
        btrfs_free_path(path);
        while (!list_empty(&prefs)) {
                ref = list_first_entry(&prefs, struct __prelim_ref, list);
index 15cbc2bf4ff0ed11f8bb2e163bc1960c65b27fa7..8206b3900587efa23570b5b8f7f8071ab6e73156 100644 (file)
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
                if (!looped && !tm)
                        return 0;
                /*
-                * we must have key remove operations in the log before the
-                * replace operation.
+                * if there are no tree operation for the oldest root, we simply
+                * return it. this should only happen if that (old) root is at
+                * level 0.
                 */
-               BUG_ON(!tm);
+               if (!tm)
+                       break;
 
+               /*
+                * if there's an operation that's not a root replacement, we
+                * found the oldest version of our root. normally, we'll find a
+                * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+                */
                if (tm->op != MOD_LOG_ROOT_REPLACE)
                        break;
 
@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
                                                      tm->generation);
                        break;
                case MOD_LOG_KEY_ADD:
-                       if (tm->slot != n - 1) {
-                               o_dst = btrfs_node_key_ptr_offset(tm->slot);
-                               o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
-                               memmove_extent_buffer(eb, o_dst, o_src, p_size);
-                       }
+                       /* if a move operation is needed it's in the log */
                        n--;
                        break;
                case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
        }
 
        tm = tree_mod_log_search(root->fs_info, logical, time_seq);
-       /*
-        * there was an item in the log when __tree_mod_log_oldest_root
-        * returned. this one must not go away, because the time_seq passed to
-        * us must be blocking its removal.
-        */
-       BUG_ON(!tm);
-
        if (old_root)
-               eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
-                                              root->nodesize);
+               eb = alloc_dummy_extent_buffer(logical, root->nodesize);
        else
                eb = btrfs_clone_extent_buffer(root->node);
        btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                btrfs_set_header_level(eb, old_root->level);
                btrfs_set_header_generation(eb, old_generation);
        }
-       __tree_mod_log_rewind(eb, time_seq, tm);
+       if (tm)
+               __tree_mod_log_rewind(eb, time_seq, tm);
+       else
+               WARN_ON(btrfs_header_level(eb) != 0);
        extent_buffer_get(eb);
 
        return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 static void insert_ptr(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *path,
                       struct btrfs_disk_key *key, u64 bytenr,
-                      int slot, int level, int tree_mod_log)
+                      int slot, int level)
 {
        struct extent_buffer *lower;
        int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
        BUG_ON(slot > nritems);
        BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
        if (slot != nritems) {
-               if (tree_mod_log && level)
+               if (level)
                        tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
                                             slot, nritems - slot);
                memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
                              btrfs_node_key_ptr_offset(slot),
                              (nritems - slot) * sizeof(struct btrfs_key_ptr));
        }
-       if (tree_mod_log && level) {
+       if (level) {
                ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
                                              MOD_LOG_KEY_ADD);
                BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(split);
 
        insert_ptr(trans, root, path, &disk_key, split->start,
-                  path->slots[level + 1] + 1, level + 1, 1);
+                  path->slots[level + 1] + 1, level + 1);
 
        if (path->slots[level] >= mid) {
                path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
        btrfs_set_header_nritems(l, mid);
        btrfs_item_key(right, &disk_key, 0);
        insert_ptr(trans, root, path, &disk_key, right->start,
-                  path->slots[1] + 1, 1, 0);
+                  path->slots[1] + 1, 1);
 
        btrfs_mark_buffer_dirty(right);
        btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
                if (mid <= slot) {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                  path->slots[1] + 1, 1, 0);
+                                  path->slots[1] + 1, 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
                } else {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                         path->slots[1], 1, 0);
+                                         path->slots[1], 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:
 
                if (!path->skip_locking) {
                        ret = btrfs_try_tree_read_lock(next);
+                       if (!ret && time_seq) {
+                               /*
+                                * If we don't get the lock, we may be racing
+                                * with push_leaf_left, holding that lock while
+                                * itself waiting for the leaf we've currently
+                                * locked. To solve this situation, we give up
+                                * on our lock and cycle.
+                                */
+                               btrfs_release_path(path);
+                               cond_resched();
+                               goto again;
+                       }
                        if (!ret) {
                                btrfs_set_path_blocking(path);
                                btrfs_tree_read_lock(next);
index 7b845ff4af99fb643bd959e52dd6f202b7dee7ef..2936ca49b3b4af3a799f7585b74038d289ab8278 100644 (file)
@@ -2354,12 +2354,17 @@ retry_root_backup:
                                  BTRFS_CSUM_TREE_OBJECTID, csum_root);
        if (ret)
                goto recovery_tree_root;
-
        csum_root->track_dirty = 1;
 
        fs_info->generation = generation;
        fs_info->last_trans_committed = generation;
 
+       ret = btrfs_recover_balance(fs_info);
+       if (ret) {
+               printk(KERN_WARNING "btrfs: failed to recover balance\n");
+               goto fail_block_groups;
+       }
+
        ret = btrfs_init_dev_stats(fs_info);
        if (ret) {
                printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
                goto fail_trans_kthread;
        }
 
-       if (!(sb->s_flags & MS_RDONLY)) {
-               down_read(&fs_info->cleanup_work_sem);
-               err = btrfs_orphan_cleanup(fs_info->fs_root);
-               if (!err)
-                       err = btrfs_orphan_cleanup(fs_info->tree_root);
-               up_read(&fs_info->cleanup_work_sem);
+       if (sb->s_flags & MS_RDONLY)
+               return 0;
 
-               if (!err)
-                       err = btrfs_recover_balance(fs_info->tree_root);
+       down_read(&fs_info->cleanup_work_sem);
+       if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+           (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
+               up_read(&fs_info->cleanup_work_sem);
+               close_ctree(tree_root);
+               return ret;
+       }
+       up_read(&fs_info->cleanup_work_sem);
 
-               if (err) {
-                       close_ctree(tree_root);
-                       return err;
-               }
+       ret = btrfs_resume_balance_async(fs_info);
+       if (ret) {
+               printk(KERN_WARNING "btrfs: failed to resume balance\n");
+               close_ctree(tree_root);
+               return ret;
        }
 
        return 0;
index 4b5a1e1bdefbe095c239b464e55c9699e865175b..6e1d36702ff71c4fc5bde2733cbf166376b726d3 100644 (file)
@@ -2347,12 +2347,10 @@ next:
        return count;
 }
 
-
 static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
-                       unsigned long num_refs)
+                              unsigned long num_refs,
+                              struct list_head *first_seq)
 {
-       struct list_head *first_seq = delayed_refs->seq_head.next;
-
        spin_unlock(&delayed_refs->lock);
        pr_debug("waiting for more refs (num %ld, first %p)\n",
                 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_delayed_ref_node *ref;
        struct list_head cluster;
+       struct list_head *first_seq = NULL;
        int ret;
        u64 delayed_start;
        int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
                                 */
                                consider_waiting = 1;
                                num_refs = delayed_refs->num_entries;
+                               first_seq = root->fs_info->tree_mod_seq_list.next;
                        } else {
-                               wait_for_more_refs(delayed_refs, num_refs);
+                               wait_for_more_refs(delayed_refs,
+                                                  num_refs, first_seq);
                                /*
                                 * after waiting, things have changed. we
                                 * dropped the lock and someone else might have
index aaa12c1eb3483a8371df48e5765b986c436451c4..01c21b6c6d43e44a28a4c862ca6532d6f8b02654 100644 (file)
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                             writepage_t writepage, void *data,
                             void (*flush_fn)(void *))
 {
+       struct inode *inode = mapping->host;
        int ret = 0;
        int done = 0;
        int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        int scanned = 0;
        int tag;
 
+       /*
+        * We have to hold onto the inode so that ordered extents can do their
+        * work when the IO finishes.  The alternative to this is failing to add
+        * an ordered extent if the igrab() fails there and that is a huge pain
+        * to deal with, so instead just hold onto the inode throughout the
+        * writepages operation.  If it fails here we are freeing up the inode
+        * anyway and we'd rather not waste our time writing out stuff that is
+        * going to be truncated anyway.
+        */
+       if (!igrab(inode))
+               return 0;
+
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
                index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
                index = 0;
                goto retry;
        }
+       btrfs_add_delayed_iput(inode);
        return ret;
 }
 
index 70dc8ca73e257bc3a1e7a96ea48009bff093af9a..9aa01ec2138d466f3d1726ccd6291d054c5e5c98 100644 (file)
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                                    loff_t *ppos, size_t count, size_t ocount)
 {
        struct file *file = iocb->ki_filp;
-       struct inode *inode = fdentry(file)->d_inode;
        struct iov_iter i;
        ssize_t written;
        ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
                                            count, ocount);
 
-       /*
-        * the generic O_DIRECT will update in-memory i_size after the
-        * DIOs are done.  But our endio handlers that update the on
-        * disk i_size never update past the in memory i_size.  So we
-        * need one more update here to catch any additions to the
-        * file
-        */
-       if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
-               btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
-               mark_inode_dirty(inode);
-       }
-
        if (written < 0 || written == count)
                return written;
 
index 81296c57405a5d53a27dba626a4d6201829bd578..6c4e2baa9290e16d06a4a8dfc5c3b05880cfeb27 100644 (file)
@@ -1543,29 +1543,26 @@ again:
        end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
 
        /*
-        * XXX - this can go away after a few releases.
-        *
-        * since the only user of btrfs_remove_free_space is the tree logging
-        * stuff, and the only way to test that is under crash conditions, we
-        * want to have this debug stuff here just in case somethings not
-        * working.  Search the bitmap for the space we are trying to use to
-        * make sure its actually there.  If its not there then we need to stop
-        * because something has gone wrong.
+        * We need to search for bits in this bitmap.  We could only cover some
+        * of the extent in this bitmap thanks to how we add space, so we need
+        * to search for as much as it as we can and clear that amount, and then
+        * go searching for the next bit.
         */
        search_start = *offset;
-       search_bytes = *bytes;
+       search_bytes = ctl->unit;
        search_bytes = min(search_bytes, end - search_start + 1);
        ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
        BUG_ON(ret < 0 || search_start != *offset);
 
-       if (*offset > bitmap_info->offset && *offset + *bytes > end) {
-               bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
-               *bytes -= end - *offset + 1;
-               *offset = end + 1;
-       } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
-               bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
-               *bytes = 0;
-       }
+       /* We may have found more bits than what we need */
+       search_bytes = min(search_bytes, *bytes);
+
+       /* Cannot clear past the end of the bitmap */
+       search_bytes = min(search_bytes, end - search_start + 1);
+
+       bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
+       *offset += search_bytes;
+       *bytes -= search_bytes;
 
        if (*bytes) {
                struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
                 * everything over again.
                 */
                search_start = *offset;
-               search_bytes = *bytes;
+               search_bytes = ctl->unit;
                ret = search_bitmap(ctl, bitmap_info, &search_start,
                                    &search_bytes);
                if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *info;
-       struct btrfs_free_space *next_info = NULL;
        int ret = 0;
 
        spin_lock(&ctl->tree_lock);
 
 again:
+       if (!bytes)
+               goto out_lock;
+
        info = tree_search_offset(ctl, offset, 0, 0);
        if (!info) {
                /*
@@ -1905,88 +1904,48 @@ again:
                }
        }
 
-       if (info->bytes < bytes && rb_next(&info->offset_index)) {
-               u64 end;
-               next_info = rb_entry(rb_next(&info->offset_index),
-                                            struct btrfs_free_space,
-                                            offset_index);
-
-               if (next_info->bitmap)
-                       end = next_info->offset +
-                             BITS_PER_BITMAP * ctl->unit - 1;
-               else
-                       end = next_info->offset + next_info->bytes;
-
-               if (next_info->bytes < bytes ||
-                   next_info->offset > offset || offset > end) {
-                       printk(KERN_CRIT "Found free space at %llu, size %llu,"
-                             " trying to use %llu\n",
-                             (unsigned long long)info->offset,
-                             (unsigned long long)info->bytes,
-                             (unsigned long long)bytes);
-                       WARN_ON(1);
-                       ret = -EINVAL;
-                       goto out_lock;
-               }
-
-               info = next_info;
-       }
-
-       if (info->bytes == bytes) {
+       if (!info->bitmap) {
                unlink_free_space(ctl, info);
-               if (info->bitmap) {
-                       kfree(info->bitmap);
-                       ctl->total_bitmaps--;
-               }
-               kmem_cache_free(btrfs_free_space_cachep, info);
-               ret = 0;
-               goto out_lock;
-       }
-
-       if (!info->bitmap && info->offset == offset) {
-               unlink_free_space(ctl, info);
-               info->offset += bytes;
-               info->bytes -= bytes;
-               ret = link_free_space(ctl, info);
-               WARN_ON(ret);
-               goto out_lock;
-       }
+               if (offset == info->offset) {
+                       u64 to_free = min(bytes, info->bytes);
+
+                       info->bytes -= to_free;
+                       info->offset += to_free;
+                       if (info->bytes) {
+                               ret = link_free_space(ctl, info);
+                               WARN_ON(ret);
+                       } else {
+                               kmem_cache_free(btrfs_free_space_cachep, info);
+                       }
 
-       if (!info->bitmap && info->offset <= offset &&
-           info->offset + info->bytes >= offset + bytes) {
-               u64 old_start = info->offset;
-               /*
-                * we're freeing space in the middle of the info,
-                * this can happen during tree log replay
-                *
-                * first unlink the old info and then
-                * insert it again after the hole we're creating
-                */
-               unlink_free_space(ctl, info);
-               if (offset + bytes < info->offset + info->bytes) {
-                       u64 old_end = info->offset + info->bytes;
+                       offset += to_free;
+                       bytes -= to_free;
+                       goto again;
+               } else {
+                       u64 old_end = info->bytes + info->offset;
 
-                       info->offset = offset + bytes;
-                       info->bytes = old_end - info->offset;
+                       info->bytes = offset - info->offset;
                        ret = link_free_space(ctl, info);
                        WARN_ON(ret);
                        if (ret)
                                goto out_lock;
-               } else {
-                       /* the hole we're creating ends at the end
-                        * of the info struct, just free the info
-                        */
-                       kmem_cache_free(btrfs_free_space_cachep, info);
-               }
-               spin_unlock(&ctl->tree_lock);
 
-               /* step two, insert a new info struct to cover
-                * anything before the hole
-                */
-               ret = btrfs_add_free_space(block_group, old_start,
-                                          offset - old_start);
-               WARN_ON(ret); /* -ENOMEM */
-               goto out;
+                       /* Not enough bytes in this entry to satisfy us */
+                       if (old_end < offset + bytes) {
+                               bytes -= old_end - offset;
+                               offset = old_end;
+                               goto again;
+                       } else if (old_end == offset + bytes) {
+                               /* all done */
+                               goto out_lock;
+                       }
+                       spin_unlock(&ctl->tree_lock);
+
+                       ret = btrfs_add_free_space(block_group, offset + bytes,
+                                                  old_end - (offset + bytes));
+                       WARN_ON(ret);
+                       goto out;
+               }
        }
 
        ret = remove_from_bitmap(ctl, info, &offset, &bytes);
index d8bb0dbc4941ca3f288119134df64eeb1eb679da..a7d1921ac76b8ee5d85d563c1ceed80f4a65a2dd 100644 (file)
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
        if (root->fs_info->log_root_recovering) {
-               BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+               BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                                 &BTRFS_I(inode)->runtime_flags));
                goto no_delete;
        }
@@ -5876,8 +5876,17 @@ map:
        bh_result->b_size = len;
        bh_result->b_bdev = em->bdev;
        set_buffer_mapped(bh_result);
-       if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-               set_buffer_new(bh_result);
+       if (create) {
+               if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       set_buffer_new(bh_result);
+
+               /*
+                * Need to update the i_size under the extent lock so buffered
+                * readers will get the updated i_size when we unlock.
+                */
+               if (start + len > i_size_read(inode))
+                       i_size_write(inode, start + len);
+       }
 
        free_extent_map(em);
 
@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                 */
                ordered = btrfs_lookup_ordered_range(inode, lockstart,
                                                     lockend - lockstart + 1);
-               if (!ordered)
+
+               /*
+                * We need to make sure there are no buffered pages in this
+                * range either, we could have raced between the invalidate in
+                * generic_file_direct_write and locking the extent.  The
+                * invalidate needs to happen so that reads after a write do not
+                * get stale data.
+                */
+               if (!ordered && (!writing ||
+                   !test_range_bit(&BTRFS_I(inode)->io_tree,
+                                   lockstart, lockend, EXTENT_UPTODATE, 0,
+                                   cached_state)))
                        break;
+
                unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                                     &cached_state, GFP_NOFS);
-               btrfs_start_ordered_extent(inode, ordered, 1);
-               btrfs_put_ordered_extent(ordered);
+
+               if (ordered) {
+                       btrfs_start_ordered_extent(inode, ordered, 1);
+                       btrfs_put_ordered_extent(ordered);
+               } else {
+                       /* Screw you mmap */
+                       ret = filemap_write_and_wait_range(file->f_mapping,
+                                                          lockstart,
+                                                          lockend);
+                       if (ret)
+                               goto out;
+
+                       /*
+                        * If we found a page that couldn't be invalidated just
+                        * fall back to buffered.
+                        */
+                       ret = invalidate_inode_pages2_range(file->f_mapping,
+                                       lockstart >> PAGE_CACHE_SHIFT,
+                                       lockend >> PAGE_CACHE_SHIFT);
+                       if (ret) {
+                               if (ret == -EBUSY)
+                                       ret = 0;
+                               goto out;
+                       }
+               }
+
                cond_resched();
        }
 
index 497c530724cf6b7a50296d2c6660fef7f4066cb9..e440aa653c30d6f6c8ad1e7437bfa6e2b4d50799 100644 (file)
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
 #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
                                   struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
 #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
                              struct btrfs_ioctl_scrub_args)
index 0eb9a4da069e6cb1e3c4294d4ad02e2ab92da666..e23991574fdf309e6ed95e23985fc5dd0af28e0f 100644 (file)
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                if (ret)
                        goto restore;
 
+               ret = btrfs_resume_balance_async(fs_info);
+               if (ret)
+                       goto restore;
+
                sb->s_flags &= ~MS_RDONLY;
        }
 
index 2017d0ff511ca3304dad46e85045ad2ab28d4e75..8abeae4224f92dbd870877b76224515c304e979d 100644 (file)
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        kfree(name);
 
        iput(inode);
+
+       btrfs_run_delayed_items(trans, root);
        return ret;
 }
 
@@ -895,6 +897,7 @@ again:
                                ret = btrfs_unlink_inode(trans, root, dir,
                                                         inode, victim_name,
                                                         victim_name_len);
+                               btrfs_run_delayed_items(trans, root);
                        }
                        kfree(victim_name);
                        ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
                        ret = btrfs_unlink_inode(trans, root, dir, inode,
                                                 name, name_len);
                        BUG_ON(ret);
+
+                       btrfs_run_delayed_items(trans, root);
+
                        kfree(name);
                        iput(inode);
 
index 8a3d2594b80726b3a4da08c5924cb51b8e28a0ba..ecaad40e7ef499fedb7667659d9efd955a661eaa 100644 (file)
@@ -2845,31 +2845,48 @@ out:
 
 static int balance_kthread(void *data)
 {
-       struct btrfs_balance_control *bctl =
-                       (struct btrfs_balance_control *)data;
-       struct btrfs_fs_info *fs_info = bctl->fs_info;
+       struct btrfs_fs_info *fs_info = data;
        int ret = 0;
 
        mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
 
-       set_balance_control(bctl);
-
-       if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
-               printk(KERN_INFO "btrfs: force skipping balance\n");
-       } else {
+       if (fs_info->balance_ctl) {
                printk(KERN_INFO "btrfs: continuing balance\n");
-               ret = btrfs_balance(bctl, NULL);
+               ret = btrfs_balance(fs_info->balance_ctl, NULL);
        }
 
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
+
        return ret;
 }
 
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
 {
        struct task_struct *tsk;
+
+       spin_lock(&fs_info->balance_lock);
+       if (!fs_info->balance_ctl) {
+               spin_unlock(&fs_info->balance_lock);
+               return 0;
+       }
+       spin_unlock(&fs_info->balance_lock);
+
+       if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+               printk(KERN_INFO "btrfs: force skipping balance\n");
+               return 0;
+       }
+
+       tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+       if (IS_ERR(tsk))
+               return PTR_ERR(tsk);
+
+       return 0;
+}
+
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
+{
        struct btrfs_balance_control *bctl;
        struct btrfs_balance_item *item;
        struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
        if (!path)
                return -ENOMEM;
 
-       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
-       if (!bctl) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
        key.objectid = BTRFS_BALANCE_OBJECTID;
        key.type = BTRFS_BALANCE_ITEM_KEY;
        key.offset = 0;
 
-       ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
        if (ret < 0)
-               goto out_bctl;
+               goto out;
        if (ret > 0) { /* ret = -ENOENT; */
                ret = 0;
-               goto out_bctl;
+               goto out;
+       }
+
+       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+       if (!bctl) {
+               ret = -ENOMEM;
+               goto out;
        }
 
        leaf = path->nodes[0];
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
 
-       bctl->fs_info = tree_root->fs_info;
-       bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+       bctl->fs_info = fs_info;
+       bctl->flags = btrfs_balance_flags(leaf, item);
+       bctl->flags |= BTRFS_BALANCE_RESUME;
 
        btrfs_balance_data(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
        btrfs_balance_sys(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-       tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
-       if (IS_ERR(tsk))
-               ret = PTR_ERR(tsk);
-       else
-               goto out;
+       mutex_lock(&fs_info->volume_mutex);
+       mutex_lock(&fs_info->balance_mutex);
 
-out_bctl:
-       kfree(bctl);
+       set_balance_control(bctl);
+
+       mutex_unlock(&fs_info->balance_mutex);
+       mutex_unlock(&fs_info->volume_mutex);
 out:
        btrfs_free_path(path);
        return ret;
@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
 
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
-                       if (bio->bi_rw & WRITE)
-                               btrfs_dev_stat_inc(dev,
-                                                  BTRFS_DEV_STAT_WRITE_ERRS);
-                       else
-                               btrfs_dev_stat_inc(dev,
-                                                  BTRFS_DEV_STAT_READ_ERRS);
-                       if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
-                               btrfs_dev_stat_inc(dev,
-                                                  BTRFS_DEV_STAT_FLUSH_ERRS);
-                       btrfs_dev_stat_print_on_error(dev);
+                       if (dev->bdev) {
+                               if (bio->bi_rw & WRITE)
+                                       btrfs_dev_stat_inc(dev,
+                                               BTRFS_DEV_STAT_WRITE_ERRS);
+                               else
+                                       btrfs_dev_stat_inc(dev,
+                                               BTRFS_DEV_STAT_READ_ERRS);
+                               if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+                                       btrfs_dev_stat_inc(dev,
+                                               BTRFS_DEV_STAT_FLUSH_ERRS);
+                               btrfs_dev_stat_print_on_error(dev);
+                       }
                }
        }
 
index 74366f27a76bbc7272e4b41f2f92bf9520bca813..95f6637614db5932f8d52daba79943514a7ed8da 100644 (file)
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_balance_control *bctl,
                  struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
 int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);