Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / extent-tree.c
index 6526f1faf6c2a9e6fc402b186f4528d1be440054..df472ab1b5acca7b411b05bcc414913bcebcc244 100644 (file)
@@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
                                       u64 num_bytes, int reserve);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
                               u64 num_bytes);
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num_bytes, int reserved);
 
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -684,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
 
-u64 btrfs_find_block_group(struct btrfs_root *root,
-                          u64 search_start, u64 search_hint, int owner)
-{
-       struct btrfs_block_group_cache *cache;
-       u64 used;
-       u64 last = max(search_hint, search_start);
-       u64 group_start = 0;
-       int full_search = 0;
-       int factor = 9;
-       int wrapped = 0;
-again:
-       while (1) {
-               cache = btrfs_lookup_first_block_group(root->fs_info, last);
-               if (!cache)
-                       break;
-
-               spin_lock(&cache->lock);
-               last = cache->key.objectid + cache->key.offset;
-               used = btrfs_block_group_used(&cache->item);
-
-               if ((full_search || !cache->ro) &&
-                   block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
-                       if (used + cache->pinned + cache->reserved <
-                           div_factor(cache->key.offset, factor)) {
-                               group_start = cache->key.objectid;
-                               spin_unlock(&cache->lock);
-                               btrfs_put_block_group(cache);
-                               goto found;
-                       }
-               }
-               spin_unlock(&cache->lock);
-               btrfs_put_block_group(cache);
-               cond_resched();
-       }
-       if (!wrapped) {
-               last = search_start;
-               wrapped = 1;
-               goto again;
-       }
-       if (!full_search && factor < 10) {
-               last = search_start;
-               full_search = 1;
-               factor = 10;
-               goto again;
-       }
-found:
-       return group_start;
-}
-
 /* simple helper to search for an existing extent at a given offset */
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 {
@@ -2117,8 +2070,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        u32 item_size;
        int ret;
        int err = 0;
-       int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                       node->type == BTRFS_SHARED_BLOCK_REF_KEY);
+       int metadata = !extent_op->is_data;
 
        if (trans->aborted)
                return 0;
@@ -2133,11 +2085,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        key.objectid = node->bytenr;
 
        if (metadata) {
-               struct btrfs_delayed_tree_ref *tree_ref;
-
-               tree_ref = btrfs_delayed_node_to_tree_ref(node);
                key.type = BTRFS_METADATA_ITEM_KEY;
-               key.offset = tree_ref->level;
+               key.offset = extent_op->level;
        } else {
                key.type = BTRFS_EXTENT_ITEM_KEY;
                key.offset = node->num_bytes;
@@ -2541,9 +2490,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
            !trans->delayed_ref_elem.seq) {
                /* list without seq or seq without list */
                btrfs_err(fs_info,
-                       "qgroup accounting update error, list is%s empty, seq is %llu",
+                       "qgroup accounting update error, list is%s empty, seq is %#x.%x",
                        list_empty(&trans->qgroup_ref_list) ? "" : " not",
-                       trans->delayed_ref_elem.seq);
+                       (u32)(trans->delayed_ref_elem.seq >> 32),
+                       (u32)trans->delayed_ref_elem.seq);
                BUG();
        }
 
@@ -2765,7 +2715,7 @@ out:
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
-                               int is_data)
+                               int level, int is_data)
 {
        struct btrfs_delayed_extent_op *extent_op;
        int ret;
@@ -2778,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->update_flags = 1;
        extent_op->update_key = 0;
        extent_op->is_data = is_data ? 1 : 0;
+       extent_op->level = level;
 
        ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
                                          num_bytes, extent_op);
@@ -3155,6 +3106,11 @@ again:
        WARN_ON(ret);
 
        if (i_size_read(inode) > 0) {
+               ret = btrfs_check_trunc_cache_free_space(root,
+                                       &root->fs_info->global_block_rsv);
+               if (ret)
+                       goto out_put;
+
                ret = btrfs_truncate_free_space_cache(root, trans, path,
                                                      inode);
                if (ret)
@@ -3452,7 +3408,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
  * progress (either running or paused) picks the target profile (if it's
  * already available), otherwise falls back to plain reducing.
  */
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
        /*
         * we add in the count of missing devices because we want
@@ -3672,6 +3628,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
 
+static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
+{
+       return (global->size << 1);
+}
+
 static int should_alloc_chunk(struct btrfs_root *root,
                              struct btrfs_space_info *sinfo, int force)
 {
@@ -3689,7 +3650,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
         * global_rsv, it doesn't change except when the transaction commits.
         */
        if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
-               num_allocated += global_rsv->size;
+               num_allocated += calc_global_rsv_need_space(global_rsv);
 
        /*
         * in limited mode, we want to have some free space up to
@@ -3861,7 +3822,7 @@ static int can_overcommit(struct btrfs_root *root,
 {
        struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
        u64 profile = btrfs_get_alloc_profile(root, 0);
-       u64 rsv_size = 0;
+       u64 space_size;
        u64 avail;
        u64 used;
        u64 to_add;
@@ -3869,18 +3830,16 @@ static int can_overcommit(struct btrfs_root *root,
        used = space_info->bytes_used + space_info->bytes_reserved +
                space_info->bytes_pinned + space_info->bytes_readonly;
 
-       spin_lock(&global_rsv->lock);
-       rsv_size = global_rsv->size;
-       spin_unlock(&global_rsv->lock);
-
        /*
         * We only want to allow over committing if we have lots of actual space
         * free, but if we don't have enough space to handle the global reserve
         * space then we could end up having a real enospc problem when trying
         * to allocate a chunk or some other such important allocation.
         */
-       rsv_size <<= 1;
-       if (used + rsv_size >= space_info->total_bytes)
+       spin_lock(&global_rsv->lock);
+       space_size = calc_global_rsv_need_space(global_rsv);
+       spin_unlock(&global_rsv->lock);
+       if (used + space_size >= space_info->total_bytes)
                return 0;
 
        used += space_info->bytes_may_use;
@@ -3923,8 +3882,8 @@ static int can_overcommit(struct btrfs_root *root,
        return 0;
 }
 
-void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
-                                 unsigned long nr_pages)
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+                                        unsigned long nr_pages)
 {
        struct super_block *sb = root->fs_info->sb;
        int started;
@@ -4605,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
        fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+       if (fs_info->quota_root)
+               fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
 
        update_global_block_rsv(fs_info);
@@ -5206,9 +5167,11 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
                                    u64 bytenr, u64 num_bytes)
 {
        struct btrfs_block_group_cache *cache;
+       int ret;
 
        cache = btrfs_lookup_block_group(root->fs_info, bytenr);
-       BUG_ON(!cache); /* Logic error */
+       if (!cache)
+               return -EINVAL;
 
        /*
         * pull in the free space cache (if any) so that our pin
@@ -5221,9 +5184,9 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
        pin_down_extent(root, cache, bytenr, num_bytes, 0);
 
        /* remove us from the free space cache (if we're there at all) */
-       btrfs_remove_free_space(cache, bytenr, num_bytes);
+       ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
        btrfs_put_block_group(cache);
-       return 0;
+       return ret;
 }
 
 /**
@@ -5596,7 +5559,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        }
 
        refs = btrfs_extent_refs(leaf, ei);
-       BUG_ON(refs < refs_to_drop);
+       if (refs < refs_to_drop) {
+               btrfs_err(info, "trying to drop %d refs but we only have %Lu "
+                         "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+               ret = -EINVAL;
+               btrfs_abort_transaction(trans, extent_root, ret);
+               goto out;
+       }
        refs -= refs_to_drop;
 
        if (refs > 0) {
@@ -5911,7 +5880,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *orig_root,
                                     u64 num_bytes, u64 empty_size,
                                     u64 hint_byte, struct btrfs_key *ins,
-                                    u64 data)
+                                    u64 flags)
 {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -5922,8 +5891,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        int empty_cluster = 2 * 1024 * 1024;
        struct btrfs_space_info *space_info;
        int loop = 0;
-       int index = __get_raid_index(data);
-       int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
+       int index = __get_raid_index(flags);
+       int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
                RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
@@ -5936,11 +5905,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        ins->objectid = 0;
        ins->offset = 0;
 
-       trace_find_free_extent(orig_root, num_bytes, empty_size, data);
+       trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
 
-       space_info = __find_space_info(root->fs_info, data);
+       space_info = __find_space_info(root->fs_info, flags);
        if (!space_info) {
-               btrfs_err(root->fs_info, "No space info for %llu", data);
+               btrfs_err(root->fs_info, "No space info for %llu", flags);
                return -ENOSPC;
        }
 
@@ -5951,13 +5920,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        if (btrfs_mixed_space_info(space_info))
                use_cluster = false;
 
-       if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
+       if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
                last_ptr = &root->fs_info->meta_alloc_cluster;
                if (!btrfs_test_opt(root, SSD))
                        empty_cluster = 64 * 1024;
        }
 
-       if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+       if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
            btrfs_test_opt(root, SSD)) {
                last_ptr = &root->fs_info->data_alloc_cluster;
        }
@@ -5986,7 +5955,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                 * However if we are re-searching with an ideal block group
                 * picked out then we don't care that the block group is cached.
                 */
-               if (block_group && block_group_bits(block_group, data) &&
+               if (block_group && block_group_bits(block_group, flags) &&
                    block_group->cached != BTRFS_CACHE_NO) {
                        down_read(&space_info->groups_sem);
                        if (list_empty(&block_group->list) ||
@@ -6024,7 +5993,7 @@ search:
                 * raid types, but we want to make sure we only allocate
                 * for the proper type.
                 */
-               if (!block_group_bits(block_group, data)) {
+               if (!block_group_bits(block_group, flags)) {
                    u64 extra = BTRFS_BLOCK_GROUP_DUP |
                                BTRFS_BLOCK_GROUP_RAID1 |
                                BTRFS_BLOCK_GROUP_RAID5 |
@@ -6036,7 +6005,7 @@ search:
                         * doesn't provide them, bail.  This does allow us to
                         * fill raid0 from raid1.
                         */
-                       if ((data & extra) && !(block_group->flags & extra))
+                       if ((flags & extra) && !(block_group->flags & extra))
                                goto loop;
                }
 
@@ -6067,7 +6036,7 @@ have_block_group:
                        if (used_block_group != block_group &&
                            (!used_block_group ||
                             used_block_group->ro ||
-                            !block_group_bits(used_block_group, data))) {
+                            !block_group_bits(used_block_group, flags))) {
                                used_block_group = block_group;
                                goto refill_cluster;
                        }
@@ -6263,7 +6232,7 @@ loop:
                index = 0;
                loop++;
                if (loop == LOOP_ALLOC_CHUNK) {
-                       ret = do_chunk_alloc(trans, root, data,
+                       ret = do_chunk_alloc(trans, root, flags,
                                             CHUNK_ALLOC_FORCE);
                        /*
                         * Do not bail out on ENOSPC since we
@@ -6341,16 +6310,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
-                        struct btrfs_key *ins, u64 data)
+                        struct btrfs_key *ins, int is_data)
 {
        bool final_tried = false;
+       u64 flags;
        int ret;
 
-       data = btrfs_get_alloc_profile(root, data);
+       flags = btrfs_get_alloc_profile(root, is_data);
 again:
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
-                              hint_byte, ins, data);
+                              hint_byte, ins, flags);
 
        if (ret == -ENOSPC) {
                if (!final_tried) {
@@ -6363,9 +6333,9 @@ again:
                } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
                        struct btrfs_space_info *sinfo;
 
-                       sinfo = __find_space_info(root->fs_info, data);
+                       sinfo = __find_space_info(root->fs_info, flags);
                        btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
-                               (unsigned long long)data,
+                               (unsigned long long)flags,
                                (unsigned long long)num_bytes);
                        if (sinfo)
                                dump_space_info(sinfo, num_bytes, 1);
@@ -6601,47 +6571,48 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        if (!caching_ctl) {
                BUG_ON(!block_group_cache_done(block_group));
                ret = btrfs_remove_free_space(block_group, start, num_bytes);
-               BUG_ON(ret); /* -ENOMEM */
+               if (ret)
+                       goto out;
        } else {
                mutex_lock(&caching_ctl->mutex);
 
                if (start >= caching_ctl->progress) {
                        ret = add_excluded_extent(root, start, num_bytes);
-                       BUG_ON(ret); /* -ENOMEM */
                } else if (start + num_bytes <= caching_ctl->progress) {
                        ret = btrfs_remove_free_space(block_group,
                                                      start, num_bytes);
-                       BUG_ON(ret); /* -ENOMEM */
                } else {
                        num_bytes = caching_ctl->progress - start;
                        ret = btrfs_remove_free_space(block_group,
                                                      start, num_bytes);
-                       BUG_ON(ret); /* -ENOMEM */
+                       if (ret)
+                               goto out_lock;
 
                        start = caching_ctl->progress;
                        num_bytes = ins->objectid + ins->offset -
                                    caching_ctl->progress;
                        ret = add_excluded_extent(root, start, num_bytes);
-                       BUG_ON(ret); /* -ENOMEM */
                }
-
+out_lock:
                mutex_unlock(&caching_ctl->mutex);
                put_caching_control(caching_ctl);
+               if (ret)
+                       goto out;
        }
 
        ret = btrfs_update_reserved_bytes(block_group, ins->offset,
                                          RESERVE_ALLOC_NO_ACCOUNT);
        BUG_ON(ret); /* logic error */
-       btrfs_put_block_group(block_group);
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
+out:
+       btrfs_put_block_group(block_group);
        return ret;
 }
 
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
-                                           struct btrfs_root *root,
-                                           u64 bytenr, u32 blocksize,
-                                           int level)
+static struct extent_buffer *
+btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     u64 bytenr, u32 blocksize, int level)
 {
        struct extent_buffer *buf;
 
@@ -6684,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
        int ret;
+       bool global_updated = false;
 
        block_rsv = get_block_rsv(trans, root);
 
-       if (block_rsv->size == 0) {
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
-                                            BTRFS_RESERVE_NO_FLUSH);
-               /*
-                * If we couldn't reserve metadata bytes try and use some from
-                * the global reserve.
-                */
-               if (ret && block_rsv != global_rsv) {
-                       ret = block_rsv_use_bytes(global_rsv, blocksize);
-                       if (!ret)
-                               return global_rsv;
-                       return ERR_PTR(ret);
-               } else if (ret) {
-                       return ERR_PTR(ret);
-               }
+       if (unlikely(block_rsv->size == 0))
+               goto try_reserve;
+again:
+       ret = block_rsv_use_bytes(block_rsv, blocksize);
+       if (!ret)
                return block_rsv;
+
+       if (block_rsv->failfast)
+               return ERR_PTR(ret);
+
+       if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+               global_updated = true;
+               update_global_block_rsv(root->fs_info);
+               goto again;
        }
 
-       ret = block_rsv_use_bytes(block_rsv, blocksize);
+       if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+               static DEFINE_RATELIMIT_STATE(_rs,
+                               DEFAULT_RATELIMIT_INTERVAL * 10,
+                               /*DEFAULT_RATELIMIT_BURST*/ 1);
+               if (__ratelimit(&_rs))
+                       WARN(1, KERN_DEBUG
+                               "btrfs: block rsv returned %d\n", ret);
+       }
+try_reserve:
+       ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                                    BTRFS_RESERVE_NO_FLUSH);
        if (!ret)
                return block_rsv;
-       if (ret && !block_rsv->failfast) {
-               if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
-                       static DEFINE_RATELIMIT_STATE(_rs,
-                                       DEFAULT_RATELIMIT_INTERVAL * 10,
-                                       /*DEFAULT_RATELIMIT_BURST*/ 1);
-                       if (__ratelimit(&_rs))
-                               WARN(1, KERN_DEBUG
-                                       "btrfs: block rsv returned %d\n", ret);
-               }
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
-                                            BTRFS_RESERVE_NO_FLUSH);
-               if (!ret) {
-                       return block_rsv;
-               } else if (ret && block_rsv != global_rsv) {
-                       ret = block_rsv_use_bytes(global_rsv, blocksize);
-                       if (!ret)
-                               return global_rsv;
-               }
+       /*
+        * If we couldn't reserve metadata bytes try and use some from
+        * the global reserve if its space type is the same as the global
+        * reservation.
+        */
+       if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+           block_rsv->space_info == global_rsv->space_info) {
+               ret = block_rsv_use_bytes(global_rsv, blocksize);
+               if (!ret)
+                       return global_rsv;
        }
-
-       return ERR_PTR(-ENOSPC);
+       return ERR_PTR(ret);
 }
 
 static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6796,6 +6767,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                        extent_op->update_key = 1;
                extent_op->update_flags = 1;
                extent_op->is_data = 0;
+               extent_op->level = level;
 
                ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
                                        ins.objectid,
@@ -6967,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
                BUG_ON(ret); /* -ENOMEM */
                ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
-                                                 eb->len, flag, 0);
+                                                 eb->len, flag,
+                                                 btrfs_header_level(eb), 0);
                BUG_ON(ret); /* -ENOMEM */
                wc->flags[level] |= flag;
        }