Btrfs: fix buffer leak in btrfs_next_old_leaf
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / ctree.c
index 69ce3f7deeef5f508e609ce151f1582d71e1989e..67fe46fdee6f9c4af8ee7b9473fa00b7c6d9924b 100644 (file)
@@ -455,23 +455,56 @@ unlock:
        return ret;
 }
 
-int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
-                  struct tree_mod_elem **tm_ret)
+static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
+                                   struct extent_buffer *eb) {
+       smp_mb();
+       if (list_empty(&(fs_info)->tree_mod_seq_list))
+               return 1;
+       if (!eb)
+               return 0;
+       if (btrfs_header_level(eb) == 0)
+               return 1;
+       return 0;
+}
+
+/*
+ * This allocates memory and gets a tree modification sequence number when
+ * needed.
+ *
+ * Returns 0 when no sequence number is needed, < 0 on error.
+ * Returns 1 when a sequence number was added. In this case,
+ * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
+ * after inserting into the rb tree.
+ */
+static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
+                                struct tree_mod_elem **tm_ret)
 {
        struct tree_mod_elem *tm;
-       u64 seq = 0;
+       int seq;
 
-       smp_mb();
-       if (list_empty(&fs_info->tree_mod_seq_list))
+       if (tree_mod_dont_log(fs_info, NULL))
                return 0;
 
        tm = *tm_ret = kzalloc(sizeof(*tm), flags);
        if (!tm)
                return -ENOMEM;
 
-       __get_tree_mod_seq(fs_info, &tm->elem);
-       seq = tm->elem.seq;
        tm->elem.flags = 0;
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       if (list_empty(&fs_info->tree_mod_seq_list)) {
+               /*
+                * someone emptied the list while we were waiting for the lock.
+                * we must not add to the list, because no blocker exists. items
+                * are removed from the list only when the existing blocker is
+                * removed from the list.
+                */
+               kfree(tm);
+               seq = 0;
+               spin_unlock(&fs_info->tree_mod_seq_lock);
+       } else {
+               __get_tree_mod_seq(fs_info, &tm->elem);
+               seq = tm->elem.seq;
+       }
 
        return seq;
 }
@@ -497,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
        tm->slot = slot;
        tm->generation = btrfs_node_ptr_generation(eb, slot);
 
-       return __tree_mod_log_insert(fs_info, tm);
+       ret = __tree_mod_log_insert(fs_info, tm);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+       return ret;
 }
 
 static noinline int
@@ -516,9 +551,8 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        int ret;
        int i;
 
-       ret = tree_mod_alloc(fs_info, flags, &tm);
-       if (ret <= 0)
-               return ret;
+       if (tree_mod_dont_log(fs_info, eb))
+               return 0;
 
        for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
                ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
@@ -526,13 +560,19 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
                BUG_ON(ret < 0);
        }
 
+       ret = tree_mod_alloc(fs_info, flags, &tm);
+       if (ret <= 0)
+               return ret;
+
        tm->index = eb->start >> PAGE_CACHE_SHIFT;
        tm->slot = src_slot;
        tm->move.dst_slot = dst_slot;
        tm->move.nr_items = nr_items;
        tm->op = MOD_LOG_MOVE_KEYS;
 
-       return __tree_mod_log_insert(fs_info, tm);
+       ret = __tree_mod_log_insert(fs_info, tm);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+       return ret;
 }
 
 static noinline int
@@ -553,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        tm->generation = btrfs_header_generation(old_root);
        tm->op = MOD_LOG_ROOT_REPLACE;
 
-       return __tree_mod_log_insert(fs_info, tm);
+       ret = __tree_mod_log_insert(fs_info, tm);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
+       return ret;
 }
 
 static struct tree_mod_elem *
@@ -630,8 +672,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
        int ret;
        int i;
 
-       smp_mb();
-       if (list_empty(&fs_info->tree_mod_seq_list))
+       if (tree_mod_dont_log(fs_info, NULL))
                return;
 
        if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
@@ -678,11 +719,7 @@ static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
        int ret;
        u32 nritems;
 
-       smp_mb();
-       if (list_empty(&fs_info->tree_mod_seq_list))
-               return;
-
-       if (btrfs_header_level(eb) == 0)
+       if (tree_mod_dont_log(fs_info, eb))
                return;
 
        nritems = btrfs_header_nritems(eb);
@@ -960,6 +997,229 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+/*
+ * returns the logical address of the oldest predecessor of the given root.
+ * entries older than time_seq are ignored.
+ */
+static struct tree_mod_elem *
+__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
+                          struct btrfs_root *root, u64 time_seq)
+{
+       struct tree_mod_elem *tm;
+       struct tree_mod_elem *found = NULL;
+       u64 root_logical = root->node->start;
+       int looped = 0;
+
+       if (!time_seq)
+               return 0;
+
+       /*
+        * the very last operation that's logged for a root is the replacement
+        * operation (if it is replaced at all). this has the index of the *new*
+        * root, making it the very first operation that's logged for this root.
+        */
+       while (1) {
+               tm = tree_mod_log_search_oldest(fs_info, root_logical,
+                                               time_seq);
+               if (!looped && !tm)
+                       return 0;
+               /*
+                * if there are no tree operation for the oldest root, we simply
+                * return it. this should only happen if that (old) root is at
+                * level 0.
+                */
+               if (!tm)
+                       break;
+
+               /*
+                * if there's an operation that's not a root replacement, we
+                * found the oldest version of our root. normally, we'll find a
+                * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+                */
+               if (tm->op != MOD_LOG_ROOT_REPLACE)
+                       break;
+
+               found = tm;
+               root_logical = tm->old_root.logical;
+               BUG_ON(root_logical == root->node->start);
+               looped = 1;
+       }
+
+       /* if there's no old root to return, return what we found instead */
+       if (!found)
+               found = tm;
+
+       return found;
+}
+
+/*
+ * tm is a pointer to the first operation to rewind within eb. then, all
+ * previous operations will be rewinded (until we reach something older than
+ * time_seq).
+ */
+static void
+__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
+                     struct tree_mod_elem *first_tm)
+{
+       u32 n;
+       struct rb_node *next;
+       struct tree_mod_elem *tm = first_tm;
+       unsigned long o_dst;
+       unsigned long o_src;
+       unsigned long p_size = sizeof(struct btrfs_key_ptr);
+
+       n = btrfs_header_nritems(eb);
+       while (tm && tm->elem.seq >= time_seq) {
+               /*
+                * all the operations are recorded with the operator used for
+                * the modification. as we're going backwards, we do the
+                * opposite of each operation here.
+                */
+               switch (tm->op) {
+               case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
+                       BUG_ON(tm->slot < n);
+               case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
+               case MOD_LOG_KEY_REMOVE:
+                       btrfs_set_node_key(eb, &tm->key, tm->slot);
+                       btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+                       btrfs_set_node_ptr_generation(eb, tm->slot,
+                                                     tm->generation);
+                       n++;
+                       break;
+               case MOD_LOG_KEY_REPLACE:
+                       BUG_ON(tm->slot >= n);
+                       btrfs_set_node_key(eb, &tm->key, tm->slot);
+                       btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+                       btrfs_set_node_ptr_generation(eb, tm->slot,
+                                                     tm->generation);
+                       break;
+               case MOD_LOG_KEY_ADD:
+                       /* if a move operation is needed it's in the log */
+                       n--;
+                       break;
+               case MOD_LOG_MOVE_KEYS:
+                       o_dst = btrfs_node_key_ptr_offset(tm->slot);
+                       o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
+                       memmove_extent_buffer(eb, o_dst, o_src,
+                                             tm->move.nr_items * p_size);
+                       break;
+               case MOD_LOG_ROOT_REPLACE:
+                       /*
+                        * this operation is special. for roots, this must be
+                        * handled explicitly before rewinding.
+                        * for non-roots, this operation may exist if the node
+                        * was a root: root A -> child B; then A gets empty and
+                        * B is promoted to the new root. in the mod log, we'll
+                        * have a root-replace operation for B, a tree block
+                        * that is no root. we simply ignore that operation.
+                        */
+                       break;
+               }
+               next = rb_next(&tm->node);
+               if (!next)
+                       break;
+               tm = container_of(next, struct tree_mod_elem, node);
+               if (tm->index != first_tm->index)
+                       break;
+       }
+       btrfs_set_header_nritems(eb, n);
+}
+
+static struct extent_buffer *
+tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+                   u64 time_seq)
+{
+       struct extent_buffer *eb_rewin;
+       struct tree_mod_elem *tm;
+
+       if (!time_seq)
+               return eb;
+
+       if (btrfs_header_level(eb) == 0)
+               return eb;
+
+       tm = tree_mod_log_search(fs_info, eb->start, time_seq);
+       if (!tm)
+               return eb;
+
+       if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+               BUG_ON(tm->slot != 0);
+               eb_rewin = alloc_dummy_extent_buffer(eb->start,
+                                               fs_info->tree_root->nodesize);
+               BUG_ON(!eb_rewin);
+               btrfs_set_header_bytenr(eb_rewin, eb->start);
+               btrfs_set_header_backref_rev(eb_rewin,
+                                            btrfs_header_backref_rev(eb));
+               btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
+               btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
+       } else {
+               eb_rewin = btrfs_clone_extent_buffer(eb);
+               BUG_ON(!eb_rewin);
+       }
+
+       extent_buffer_get(eb_rewin);
+       free_extent_buffer(eb);
+
+       __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+
+       return eb_rewin;
+}
+
+/*
+ * get_old_root() rewinds the state of @root's root node to the given @time_seq
+ * value. If there are no changes, the current root->root_node is returned. If
+ * anything changed in between, there's a fresh buffer allocated on which the
+ * rewind operations are done. In any case, the returned buffer is read locked.
+ * Returns NULL on error (with no locks held).
+ */
+static inline struct extent_buffer *
+get_old_root(struct btrfs_root *root, u64 time_seq)
+{
+       struct tree_mod_elem *tm;
+       struct extent_buffer *eb;
+       struct tree_mod_root *old_root = NULL;
+       u64 old_generation = 0;
+       u64 logical;
+
+       eb = btrfs_read_lock_root_node(root);
+       tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+       if (!tm)
+               return root->node;
+
+       if (tm->op == MOD_LOG_ROOT_REPLACE) {
+               old_root = &tm->old_root;
+               old_generation = tm->generation;
+               logical = old_root->logical;
+       } else {
+               logical = root->node->start;
+       }
+
+       tm = tree_mod_log_search(root->fs_info, logical, time_seq);
+       if (old_root)
+               eb = alloc_dummy_extent_buffer(logical, root->nodesize);
+       else
+               eb = btrfs_clone_extent_buffer(root->node);
+       btrfs_tree_read_unlock(root->node);
+       free_extent_buffer(root->node);
+       if (!eb)
+               return NULL;
+       btrfs_tree_read_lock(eb);
+       if (old_root) {
+               btrfs_set_header_bytenr(eb, eb->start);
+               btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
+               btrfs_set_header_owner(eb, root->root_key.objectid);
+               btrfs_set_header_level(eb, old_root->level);
+               btrfs_set_header_generation(eb, old_generation);
+       }
+       if (tm)
+               __tree_mod_log_rewind(eb, time_seq, tm);
+       else
+               WARN_ON(btrfs_header_level(eb) != 0);
+       extent_buffer_get(eb);
+
+       return eb;
+}
+
 static inline int should_cow_block(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct extent_buffer *buf)
@@ -1164,7 +1424,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                                if (!cur)
                                        return -EIO;
                        } else if (!uptodate) {
-                               btrfs_read_buffer(cur, gen);
+                               err = btrfs_read_buffer(cur, gen);
+                               if (err) {
+                                       free_extent_buffer(cur);
+                                       return err;
+                               }
                        }
                }
                if (search_start == 0)
@@ -1279,20 +1543,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                      int level, int *slot)
 {
-       if (level == 0) {
+       if (level == 0)
                return generic_bin_search(eb,
                                          offsetof(struct btrfs_leaf, items),
                                          sizeof(struct btrfs_item),
                                          key, btrfs_header_nritems(eb),
                                          slot);
-       } else {
+       else
                return generic_bin_search(eb,
                                          offsetof(struct btrfs_node, ptrs),
                                          sizeof(struct btrfs_key_ptr),
                                          key, btrfs_header_nritems(eb),
                                          slot);
-       }
-       return -1;
 }
 
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
@@ -1422,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
 
-       btrfs_header_nritems(mid);
-
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
                btrfs_tree_lock(left);
@@ -1453,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                wret = push_node_left(trans, root, left, mid, 1);
                if (wret < 0)
                        ret = wret;
-               btrfs_header_nritems(mid);
        }
 
        /*
@@ -1930,7 +2189,7 @@ static int
 read_block_for_search(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *p,
                       struct extent_buffer **eb_ret, int level, int slot,
-                      struct btrfs_key *key)
+                      struct btrfs_key *key, u64 time_seq)
 {
        u64 blocknr;
        u64 gen;
@@ -2284,7 +2543,7 @@ cow_done:
                        }
 
                        err = read_block_for_search(trans, root, p,
-                                                   &b, level, slot, key);
+                                                   &b, level, slot, key, 0);
                        if (err == -EAGAIN)
                                goto again;
                        if (err) {
@@ -2355,6 +2614,113 @@ done:
        return ret;
 }
 
+/*
+ * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
+ * current state of the tree together with the operations recorded in the tree
+ * modification log to search for the key in a previous version of this tree, as
+ * denoted by the time_seq parameter.
+ *
+ * Naturally, there is no support for insert, delete or cow operations.
+ *
+ * The resulting path and return value will be set up as if we called
+ * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
+ */
+int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+                         struct btrfs_path *p, u64 time_seq)
+{
+       struct extent_buffer *b;
+       int slot;
+       int ret;
+       int err;
+       int level;
+       int lowest_unlock = 1;
+       u8 lowest_level = 0;
+
+       lowest_level = p->lowest_level;
+       WARN_ON(p->nodes[0] != NULL);
+
+       if (p->search_commit_root) {
+               BUG_ON(time_seq);
+               return btrfs_search_slot(NULL, root, key, p, 0, 0);
+       }
+
+again:
+       b = get_old_root(root, time_seq);
+       level = btrfs_header_level(b);
+       p->locks[level] = BTRFS_READ_LOCK;
+
+       while (b) {
+               level = btrfs_header_level(b);
+               p->nodes[level] = b;
+               btrfs_clear_path_blocking(p, NULL, 0);
+
+               /*
+                * we have a lock on b and as long as we aren't changing
+                * the tree, there is no way to for the items in b to change.
+                * It is safe to drop the lock on our parent before we
+                * go through the expensive btree search on b.
+                */
+               btrfs_unlock_up_safe(p, level + 1);
+
+               ret = bin_search(b, key, level, &slot);
+
+               if (level != 0) {
+                       int dec = 0;
+                       if (ret && slot > 0) {
+                               dec = 1;
+                               slot -= 1;
+                       }
+                       p->slots[level] = slot;
+                       unlock_up(p, level, lowest_unlock, 0, NULL);
+
+                       if (level == lowest_level) {
+                               if (dec)
+                                       p->slots[level]++;
+                               goto done;
+                       }
+
+                       err = read_block_for_search(NULL, root, p, &b, level,
+                                                   slot, key, time_seq);
+                       if (err == -EAGAIN)
+                               goto again;
+                       if (err) {
+                               ret = err;
+                               goto done;
+                       }
+
+                       level = btrfs_header_level(b);
+                       err = btrfs_try_tree_read_lock(b);
+                       if (!err) {
+                               btrfs_set_path_blocking(p);
+                               btrfs_tree_read_lock(b);
+                               btrfs_clear_path_blocking(p, b,
+                                                         BTRFS_READ_LOCK);
+                       }
+                       p->locks[level] = BTRFS_READ_LOCK;
+                       p->nodes[level] = b;
+                       b = tree_mod_log_rewind(root->fs_info, b, time_seq);
+                       if (b != p->nodes[level]) {
+                               btrfs_tree_unlock_rw(p->nodes[level],
+                                                    p->locks[level]);
+                               p->locks[level] = 0;
+                               p->nodes[level] = b;
+                       }
+               } else {
+                       p->slots[level] = slot;
+                       unlock_up(p, level, lowest_unlock, 0, NULL);
+                       goto done;
+               }
+       }
+       ret = 1;
+done:
+       if (!p->leave_spinning)
+               btrfs_set_path_blocking(p);
+       if (ret < 0)
+               btrfs_release_path(p);
+
+       return ret;
+}
+
 /*
  * adjust the pointers going up the tree, starting at level
  * making sure the right key of each node is points to 'key'.
@@ -2627,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 static void insert_ptr(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *path,
                       struct btrfs_disk_key *key, u64 bytenr,
-                      int slot, int level, int tree_mod_log)
+                      int slot, int level)
 {
        struct extent_buffer *lower;
        int nritems;
@@ -2640,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
        BUG_ON(slot > nritems);
        BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
        if (slot != nritems) {
-               if (tree_mod_log && level)
+               if (level)
                        tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
                                             slot, nritems - slot);
                memmove_extent_buffer(lower,
@@ -2648,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
                              btrfs_node_key_ptr_offset(slot),
                              (nritems - slot) * sizeof(struct btrfs_key_ptr));
        }
-       if (tree_mod_log && level) {
+       if (level) {
                ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
                                              MOD_LOG_KEY_ADD);
                BUG_ON(ret < 0);
@@ -2736,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(split);
 
        insert_ptr(trans, root, path, &disk_key, split->start,
-                  path->slots[level + 1] + 1, level + 1, 1);
+                  path->slots[level + 1] + 1, level + 1);
 
        if (path->slots[level] >= mid) {
                path->slots[level] -= mid;
@@ -3273,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
        btrfs_set_header_nritems(l, mid);
        btrfs_item_key(right, &disk_key, 0);
        insert_ptr(trans, root, path, &disk_key, right->start,
-                  path->slots[1] + 1, 1, 0);
+                  path->slots[1] + 1, 1);
 
        btrfs_mark_buffer_dirty(right);
        btrfs_mark_buffer_dirty(l);
@@ -3480,7 +3846,7 @@ again:
                if (mid <= slot) {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                  path->slots[1] + 1, 1, 0);
+                                  path->slots[1] + 1, 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -3489,7 +3855,7 @@ again:
                } else {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                         path->slots[1], 1, 0);
+                                         path->slots[1], 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -4218,9 +4584,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                              btrfs_node_key_ptr_offset(slot + 1),
                              sizeof(struct btrfs_key_ptr) *
                              (nritems - slot - 1));
-       }
-
-       if (tree_mod_log && level) {
+       } else if (tree_mod_log && level) {
                ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
                                              MOD_LOG_KEY_REMOVE);
                BUG_ON(ret < 0);
@@ -4665,6 +5029,12 @@ next:
  * returns < 0 on io errors.
  */
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+       return btrfs_next_old_leaf(root, path, 0);
+}
+
+int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
+                       u64 time_seq)
 {
        int slot;
        int level;
@@ -4690,7 +5060,10 @@ again:
        path->keep_locks = 1;
        path->leave_spinning = 1;
 
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (time_seq)
+               ret = btrfs_search_old_slot(root, &key, path, time_seq);
+       else
+               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        path->keep_locks = 0;
 
        if (ret < 0)
@@ -4735,7 +5108,7 @@ again:
                next = c;
                next_rw_lock = path->locks[level];
                ret = read_block_for_search(NULL, root, path, &next, level,
-                                           slot, &key);
+                                           slot, &key, 0);
                if (ret == -EAGAIN)
                        goto again;
 
@@ -4746,6 +5119,19 @@ again:
 
                if (!path->skip_locking) {
                        ret = btrfs_try_tree_read_lock(next);
+                       if (!ret && time_seq) {
+                               /*
+                                * If we don't get the lock, we may be racing
+                                * with push_leaf_left, holding that lock while
+                                * itself waiting for the leaf we've currently
+                                * locked. To solve this situation, we give up
+                                * on our lock and cycle.
+                                */
+                               free_extent_buffer(next);
+                               btrfs_release_path(path);
+                               cond_resched();
+                               goto again;
+                       }
                        if (!ret) {
                                btrfs_set_path_blocking(path);
                                btrfs_tree_read_lock(next);
@@ -4772,7 +5158,7 @@ again:
                        break;
 
                ret = read_block_for_search(NULL, root, path, &next, level,
-                                           0, &key);
+                                           0, &key, 0);
                if (ret == -EAGAIN)
                        goto again;