Btrfs: fix the missing error information in create_pending_snapshot()

[firefly-linux-kernel-4.4.55.git] / fs / btrfs / transaction.c
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 27c26004e050a33211674363cfd80c02c98d1063..910ff8051ba93c99b6f655baf354e2e091a2913c 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -272,6 +272,7 @@ enum btrfs_trans_type {
         TRANS_JOIN,
         TRANS_USERSPACE,
         TRANS_JOIN_NOLOCK,
+       TRANS_JOIN_FREEZE,
  };
  
  static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -290,7 +291,8 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
  }
  
  static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
-                                                   u64 num_items, int type)
+                                                   u64 num_items, int type,
+                                                   int noflush)
  {
         struct btrfs_trans_handle *h;
         struct btrfs_transaction *cur_trans;
@@ -324,9 +326,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
                 }
  
                 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-               ret = btrfs_block_rsv_add(root,
-                                         &root->fs_info->trans_block_rsv,
-                                         num_bytes);
+               if (noflush)
+                       ret = btrfs_block_rsv_add_noflush(root,
+                                               &root->fs_info->trans_block_rsv,
+                                               num_bytes);
+               else
+                       ret = btrfs_block_rsv_add(root,
+                                               &root->fs_info->trans_block_rsv,
+                                               num_bytes);
                 if (ret)
                         return ERR_PTR(ret);
         }
@@ -335,7 +342,19 @@ again:
         if (!h)
                 return ERR_PTR(-ENOMEM);
  
-       sb_start_intwrite(root->fs_info->sb);
+       /*
+        * If we are JOIN_NOLOCK we're already committing a transaction and
+        * waiting on this guy, so we don't need to do the sb_start_intwrite
+        * because we're already holding a ref.  We need this because we could
+        * have raced in and did an fsync() on a file which can kick a commit
+        * and then we deadlock with somebody doing a freeze.
+        */
+       if (type != TRANS_JOIN_NOLOCK &&
+           !__sb_start_write(root->fs_info->sb, SB_FREEZE_FS, false)) {
+               if (type == TRANS_JOIN_FREEZE)
+                       return ERR_PTR(-EPERM);
+               sb_start_intwrite(root->fs_info->sb);
+       }
  
         if (may_wait_transaction(root, type))
                 wait_current_trans(root);
@@ -368,6 +387,7 @@ again:
         h->qgroup_reserved = qgroup_reserved;
         h->delayed_ref_elem.seq = 0;
         INIT_LIST_HEAD(&h->qgroup_ref_list);
+       INIT_LIST_HEAD(&h->new_bgs);
  
         smp_mb();
         if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -393,21 +413,33 @@ got_it:
  struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                    int num_items)
  {
-       return start_transaction(root, num_items, TRANS_START);
+       return start_transaction(root, num_items, TRANS_START, 0);
  }
+
+struct btrfs_trans_handle *btrfs_start_transaction_noflush(
+                                       struct btrfs_root *root, int num_items)
+{
+       return start_transaction(root, num_items, TRANS_START, 1);
+}
+
  struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
  {
-       return start_transaction(root, 0, TRANS_JOIN);
+       return start_transaction(root, 0, TRANS_JOIN, 0);
  }
  
  struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
  {
-       return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+       return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0);
  }
  
  struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
  {
-       return start_transaction(root, 0, TRANS_USERSPACE);
+       return start_transaction(root, 0, TRANS_USERSPACE, 0);
+}
+
+struct btrfs_trans_handle *btrfs_join_transaction_freeze(struct btrfs_root *root)
+{
+       return start_transaction(root, 0, TRANS_JOIN_FREEZE, 0);
  }
  
  /* wait for a transaction commit to be fully complete */
@@ -536,6 +568,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                 trans->qgroup_reserved = 0;
         }
  
+       if (!list_empty(&trans->new_bgs))
+               btrfs_create_pending_block_groups(trans, root);
+
         while (count < 2) {
                 unsigned long cur = trans->delayed_ref_updates;
                 trans->delayed_ref_updates = 0;
@@ -551,7 +586,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
         btrfs_trans_release_metadata(trans, root);
         trans->block_rsv = NULL;
  
-       sb_end_intwrite(root->fs_info->sb);
+       if (!list_empty(&trans->new_bgs))
+               btrfs_create_pending_block_groups(trans, root);
  
         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
             should_end_transaction(trans, root)) {
@@ -573,6 +609,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                 }
         }
  
+       if (lock)
+               sb_end_intwrite(root->fs_info->sb);
+
         WARN_ON(cur_trans != info->running_transaction);
         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
         atomic_dec(&cur_trans->num_writers);
@@ -955,6 +994,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         struct btrfs_root *parent_root;
         struct btrfs_block_rsv *rsv;
         struct inode *parent_inode;
+       struct btrfs_path *path;
+       struct btrfs_dir_item *dir_item;
         struct dentry *parent;
         struct dentry *dentry;
         struct extent_buffer *tmp;
@@ -967,18 +1008,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         u64 root_flags;
         uuid_le new_uuid;
  
-       rsv = trans->block_rsv;
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = pending->error = -ENOMEM;
+               goto path_alloc_fail;
+       }
  
         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
         if (!new_root_item) {
                 ret = pending->error = -ENOMEM;
-               goto fail;
+               goto root_item_alloc_fail;
         }
  
         ret = btrfs_find_free_objectid(tree_root, &objectid);
         if (ret) {
                 pending->error = ret;
-               goto fail;
+               goto no_free_objectid;
         }
  
         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
@@ -988,22 +1033,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                                   to_reserve);
                 if (ret) {
                         pending->error = ret;
-                       goto fail;
+                       goto no_free_objectid;
                 }
         }
  
         ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
                                    objectid, pending->inherit);
-       kfree(pending->inherit);
         if (ret) {
                 pending->error = ret;
-               goto fail;
+               goto no_free_objectid;
         }
  
         key.objectid = objectid;
         key.offset = (u64)-1;
         key.type = BTRFS_ROOT_ITEM_KEY;
  
+       rsv = trans->block_rsv;
         trans->block_rsv = &pending->block_rsv;
  
         dentry = pending->dentry;
@@ -1017,24 +1062,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
          */
         ret = btrfs_set_inode_index(parent_inode, &index);
         BUG_ON(ret); /* -ENOMEM */
-       ret = btrfs_insert_dir_item(trans, parent_root,
-                               dentry->d_name.name, dentry->d_name.len,
-                               parent_inode, &key,
-                               BTRFS_FT_DIR, index);
-       if (ret == -EEXIST) {
+
+       /* check if there is a file/dir which has the same name. */
+       dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
+                                        btrfs_ino(parent_inode),
+                                        dentry->d_name.name,
+                                        dentry->d_name.len, 0);
+       if (dir_item != NULL && !IS_ERR(dir_item)) {
                 pending->error = -EEXIST;
-               dput(parent);
                 goto fail;
-       } else if (ret) {
-               goto abort_trans_dput;
+       } else if (IS_ERR(dir_item)) {
+               ret = PTR_ERR(dir_item);
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
         }
-
-       btrfs_i_size_write(parent_inode, parent_inode->i_size +
-                                        dentry->d_name.len * 2);
-       parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
-       ret = btrfs_update_inode(trans, parent_root, parent_inode);
-       if (ret)
-               goto abort_trans_dput;
+       btrfs_release_path(path);
  
         /*
          * pull in the delayed directory update
@@ -1043,8 +1085,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
          * snapshot
          */
         ret = btrfs_run_delayed_items(trans, root);
-       if (ret) { /* Transaction aborted */
-               dput(parent);
+       if (ret) {      /* Transaction aborted */
+               btrfs_abort_transaction(trans, root, ret);
                 goto fail;
         }
  
@@ -1079,7 +1121,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         if (ret) {
                 btrfs_tree_unlock(old);
                 free_extent_buffer(old);
-               goto abort_trans_dput;
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
         }
  
         btrfs_set_lock_blocking(old);
@@ -1088,8 +1131,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         /* clean up in any case */
         btrfs_tree_unlock(old);
         free_extent_buffer(old);
-       if (ret)
-               goto abort_trans_dput;
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
  
         /* see comments in should_cow_block() */
         root->force_cow = 1;
@@ -1101,8 +1146,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
         btrfs_tree_unlock(tmp);
         free_extent_buffer(tmp);
-       if (ret)
-               goto abort_trans_dput;
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
  
         /*
          * insert root back/forward references
@@ -1111,32 +1158,58 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                  parent_root->root_key.objectid,
                                  btrfs_ino(parent_inode), index,
                                  dentry->d_name.name, dentry->d_name.len);
-       dput(parent);
-       if (ret)
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
                 goto fail;
+       }
  
         key.offset = (u64)-1;
         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
         if (IS_ERR(pending->snap)) {
                 ret = PTR_ERR(pending->snap);
-               goto abort_trans;
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
         }
  
         ret = btrfs_reloc_post_snapshot(trans, pending);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
+       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
+       ret = btrfs_insert_dir_item(trans, parent_root,
+                                   dentry->d_name.name, dentry->d_name.len,
+                                   parent_inode, &key,
+                                   BTRFS_FT_DIR, index);
+       /* We have check then name at the beginning, so it is impossible. */
+       BUG_ON(ret == -EEXIST);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
+       btrfs_i_size_write(parent_inode, parent_inode->i_size +
+                                        dentry->d_name.len * 2);
+       parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+       ret = btrfs_update_inode(trans, parent_root, parent_inode);
         if (ret)
-               goto abort_trans;
-       ret = 0;
+               btrfs_abort_transaction(trans, root, ret);
  fail:
-       kfree(new_root_item);
+       dput(parent);
         trans->block_rsv = rsv;
+no_free_objectid:
+       kfree(new_root_item);
+root_item_alloc_fail:
+       btrfs_free_path(path);
+path_alloc_fail:
         btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
         return ret;
-
-abort_trans_dput:
-       dput(parent);
-abort_trans:
-       btrfs_abort_transaction(trans, root, ret);
-       goto fail;
  }
  
  /*
@@ -1229,6 +1302,16 @@ static void do_async_commit(struct work_struct *work)
         struct btrfs_async_commit *ac =
                 container_of(work, struct btrfs_async_commit, work.work);
  
+       /*
+        * We've got freeze protection passed with the transaction.
+        * Tell lockdep about it.
+        */
+       rwsem_acquire_read(
+               &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
+               0, 1, _THIS_IP_);
+
+       current->journal_info = ac->newtrans;
+
         btrfs_commit_transaction(ac->newtrans, ac->root);
         kfree(ac);
  }
@@ -1258,6 +1341,14 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
         atomic_inc(&cur_trans->use_count);
  
         btrfs_end_transaction(trans, root);
+
+       /*
+        * Tell lockdep we've released the freeze rwsem, since the
+        * async commit thread will be the one to unlock it.
+        */
+       rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                     1, _THIS_IP_);
+
         schedule_delayed_work(&ac->work, 0);
  
         /* wait for transaction to start and unblock */
@@ -1348,6 +1439,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
          */
         cur_trans->delayed_refs.flushing = 1;
  
+       if (!list_empty(&trans->new_bgs))
+               btrfs_create_pending_block_groups(trans, root);
+
         ret = btrfs_run_delayed_refs(trans, root, 0);
         if (ret)
                 goto cleanup_transaction;
@@ -1403,7 +1497,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
                 if (flush_on_commit || snap_pending) {
                         btrfs_start_delalloc_inodes(root, 1);
-                       btrfs_wait_ordered_extents(root, 0, 1);
+                       btrfs_wait_ordered_extents(root, 1);
                 }
  
                 ret = btrfs_run_delayed_items(trans, root);
@@ -1456,13 +1550,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
          */
         mutex_lock(&root->fs_info->reloc_mutex);
  
-       ret = btrfs_run_delayed_items(trans, root);
+       /*
+        * We needn't worry about the delayed items because we will
+        * deal with them in create_pending_snapshot(), which is the
+        * core function of the snapshot creation.
+        */
+       ret = create_pending_snapshots(trans, root->fs_info);
         if (ret) {
                 mutex_unlock(&root->fs_info->reloc_mutex);
                 goto cleanup_transaction;
         }
  
-       ret = create_pending_snapshots(trans, root->fs_info);
+       /*
+        * We insert the dir indexes of the snapshots and update the inode
+        * of the snapshots' parents after the snapshot creation, so there
+        * are some delayed items which are not dealt with. Now deal with
+        * them.
+        *
+        * We needn't worry that this operation will corrupt the snapshots,
+        * because all the tree which are snapshoted will be forced to COW
+        * the nodes and leaves.
+        */
+       ret = btrfs_run_delayed_items(trans, root);
         if (ret) {
                 mutex_unlock(&root->fs_info->reloc_mutex);
                 goto cleanup_transaction;